[Misc] Add warning for multimodal data in LLM.beam_search (vllm-project#16241)

alex-jw-brooks · yangw-dev · commit 703cccb60c54 · 2025-04-21T10:08:12.000-07:00
Signed-off-by: Alex-Brooks &lt;Alex.Brooks@ibm.com&gt;
Signed-off-by: Yang Wang &lt;elainewy@meta.com&gt;
diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py
@@ -536,6 +536,16 @@ def sort_beams_key(x: BeamSearchSequence) -> float:
                                          tokenizer.eos_token_id,
                                          length_penalty)
 
+        # TODO - fix handling of multimodal data for beam search; we pass it
+        # through in the async version on the abstract EngineClient, but not
+        # here.
+        if any("multi_modal_data" in prompt
+               and prompt["multi_modal_data"] is not None
+               for prompt in prompts):
+            logger.warning(
+                "Multimodal data appears to have been provided, but is not"
+                " currently being passed through in LLM.beam_search()!")
+
         tokenizer = self.get_tokenizer()
         # generate 2 * beam_width candidates at each step
         # following the huggingface transformers implementation