Skip to content

[VLM] Florence-2 supports online serving #16164

New issue

Have a question about this project? No Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “No Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? No Sign in to your account

Merged
merged 3 commits into from
Apr 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions examples/template_florence2.jinja
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{%- for message in messages -%}
{%- if message['role'] == 'user' -%}
{{- message['content'] -}}
{%- elif message['role'] == 'assistant' -%}
{{- message['content'] -}}
{%- endif -%}
{%- endfor -%}
4 changes: 2 additions & 2 deletions vllm/entrypoints/chat_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,8 +487,8 @@ def _placeholder_str(self, modality: ModalityStr,
return "<|endoftext10|>" # 200010 (see vocab.json in hf model)
if model_type in ("minicpmo", "minicpmv"):
return "(<image>./</image>)"
if model_type in ("blip-2", "fuyu", "paligemma", "pixtral",
"mistral3"):
if model_type in ("blip-2", "florence2", "fuyu", "paligemma",
"pixtral", "mistral3"):
# These models do not use image tokens in the prompt
return None
if model_type == "qwen":
Expand Down
14 changes: 13 additions & 1 deletion vllm/model_executor/models/florence2.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import torch.nn as nn
import torch.nn.functional as F
from einops import rearrange
from transformers import BatchFeature, PretrainedConfig
from transformers import BartTokenizer, BatchFeature, PretrainedConfig

from vllm.config import VllmConfig
from vllm.model_executor.layers.logits_processor import LogitsProcessor
Expand Down Expand Up @@ -826,6 +826,18 @@ def create_decoder_prompt(
) -> Union[str, list[int]]:
return [self.info.get_hf_config().eos_token_id]

def _apply_hf_processor_tokens_only(
self,
prompt_tokens: list[int],
) -> list[int]:
hf_processor = self.info.get_hf_processor()
tokenizer: BartTokenizer = hf_processor.tokenizer
prompt_text = tokenizer.decode(prompt_tokens)
# convert task tokens to prompt
prompt_text = hf_processor._construct_prompts([prompt_text])[0]
prompt_tokens = tokenizer.encode(prompt_text, add_special_tokens=False)
return prompt_tokens

def _call_hf_processor(
self,
prompt: str,
Expand Down