Support processors that do not return BatchFeature

lgeiger · lgeiger · commit a25576c3fd63 · 2025-05-29T11:43:36.000+01:00
Signed-off-by: Lukas Geiger &lt;lukas.geiger94@gmail.com&gt;
diff --git a/vllm/inputs/registry.py b/vllm/inputs/registry.py
@@ -1,16 +1,20 @@
 # SPDX-License-Identifier: Apache-2.0
 from collections.abc import Mapping
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Any, NamedTuple, Optional, Union
+from typing import TYPE_CHECKING, Any, NamedTuple, Optional, Union, cast
 
 from transformers import BatchFeature, PretrainedConfig, ProcessorMixin
 from typing_extensions import TypeVar
 
+from vllm.jsontree import JSONTree, json_map_leaves
+from vllm.logger import init_logger
 from vllm.transformers_utils.processor import cached_processor_from_config
 from vllm.transformers_utils.tokenizer import AnyTokenizer
 from vllm.utils import resolve_mm_processor_kwargs
 
 if TYPE_CHECKING:
+    import torch
+
     from vllm.config import ModelConfig
     from vllm.multimodal import (MultiModalDataDict, MultiModalPlaceholderDict,
                                  MultiModalRegistry)
@@ -20,6 +24,8 @@
 _C = TypeVar("_C", bound=PretrainedConfig, default=PretrainedConfig)
 _P = TypeVar("_P", bound=ProcessorMixin, default=ProcessorMixin)
 
+logger = init_logger(__name__)
+
 
 @dataclass(frozen=True)
 class InputContext:
@@ -133,7 +139,7 @@ def call_hf_processor(
         hf_processor: ProcessorMixin,
         data: Mapping[str, object],
         kwargs: Mapping[str, object] = {},
-    ) -> BatchFeature:
+    ) -> Union[BatchFeature, JSONTree["torch.Tensor"]]:
         """
         Call `hf_processor` on the prompt `data`
         (text, image, audio...) with configurable options `kwargs`.
@@ -155,7 +161,20 @@ def call_hf_processor(
 
         try:
             output = hf_processor(**data, **merged_kwargs, return_tensors="pt")
-            return output.to(dtype=self.model_config.dtype)
+            if isinstance(output, BatchFeature):
+                return output.to(dtype=self.model_config.dtype)
+
+            def maybe_cast_dtype(x: torch.Tensor):
+                # This mimics the behavior of transformers.BatchFeature
+                dtype = self.model_config.dtype
+                return x.to(dtype=dtype) if x.is_floating_point() else x
+
+            logger.warning_once(
+                f"{type(hf_processor).__name__} did not return `BatchFeature`. "
+                "Make sure to match the behaviour of `ProcessorMixin` when "
+                "implementing custom processors.")
+            output = cast(JSONTree["torch.Tensor"], output)
+            return json_map_leaves(maybe_cast_dtype, output)
         except Exception as exc:
             msg = (f"Failed to apply {type(hf_processor).__name__} "
                    f"on data={data} with kwargs={merged_kwargs}")