huggingface · Rocketknight1 · Jun 20, 2025 · Jun 20, 2025 · Jun 26, 2025
diff --git a/src/transformers/pipelines/__init__.py b/src/transformers/pipelines/__init__.py
diff --git a/src/transformers/pipelines/audio_classification.py b/src/transformers/pipelines/audio_classification.py
@@ -90,6 +90,11 @@ class AudioClassificationPipeline(Pipeline):
     [huggingface.co/models](https://huggingface.co/models?filter=audio-classification).
     """
 
+    _load_processor = False
+    _load_image_processor = False
+    _load_feature_extractor = True
+    _load_tokenizer = False
+
     def __init__(self, *args, **kwargs):
         # Only set default top_k if explicitly provided
         if "top_k" in kwargs and kwargs["top_k"] is None:

diff --git a/src/transformers/pipelines/automatic_speech_recognition.py b/src/transformers/pipelines/automatic_speech_recognition.py
@@ -178,6 +178,10 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline):
     """
 
     _pipeline_calls_generate = True
+    _load_processor = False
+    _load_image_processor = False
+    _load_feature_extractor = True
+    _load_tokenizer = True
     # Make sure the docstring is updated when the default generation config is changed
     _default_generation_config = GenerationConfig(
         max_new_tokens=256,

diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py
@@ -898,21 +898,15 @@ class Pipeline(_ScikitCompat, PushToHubMixin):
     constructor argument. If set to `True`, the output will be stored in the pickle format.
     """
 
-    # Historically we have pipelines working with `tokenizer`, `feature_extractor`, and `image_processor`
-    # as separate processing components. While we have `processor` class that combines them, some pipelines
-    # might still operate with these components separately.
-    # With the addition of `processor` to `pipeline`, we want to avoid:
-    #  - loading `processor` for pipelines that still work with `image_processor` and `tokenizer` separately;
-    #  - loading `image_processor`/`tokenizer` as a separate component while we operate only with `processor`,
-    #    because `processor` will load required sub-components by itself.
-    # Below flags allow granular control over loading components and set to be backward compatible with current
-    # pipelines logic. You may override these flags when creating your pipeline. For example, for
-    # `zero-shot-object-detection` pipeline which operates with `processor` you should set `_load_processor=True`
-    # and all the rest flags to `False` to avoid unnecessary loading of the components.
-    _load_processor = False
-    _load_image_processor = True
-    _load_feature_extractor = True
-    _load_tokenizer = True
+    # These flags should be overridden for downstream pipelines. They indicate which preprocessing classes are
+    # used by each pipeline. The possible values are:
+    # - True (the class is mandatory, raise an error if it's not present in the repo)
+    # - None (the class is optional; it should be loaded if present in the repo but the pipeline can work without it)
+    # - False (the class is never used by the pipeline and should not be loaded even if present)
+    _load_processor = None
+    _load_image_processor = None
+    _load_feature_extractor = None
+    _load_tokenizer = None
 
     # Pipelines that call `generate` have shared logic, e.g. preparing the generation config.
     _pipeline_calls_generate = False

diff --git a/src/transformers/pipelines/depth_estimation.py b/src/transformers/pipelines/depth_estimation.py
@@ -47,6 +47,11 @@ class DepthEstimationPipeline(Pipeline):
     See the list of available models on [huggingface.co/models](https://huggingface.co/models?filter=depth-estimation).
     """
 
+    _load_processor = False
+    _load_image_processor = True
+    _load_feature_extractor = False
+    _load_tokenizer = False
+
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         requires_backends(self, "vision")

diff --git a/src/transformers/pipelines/document_question_answering.py b/src/transformers/pipelines/document_question_answering.py
@@ -135,6 +135,10 @@ class DocumentQuestionAnsweringPipeline(ChunkPipeline):
     """
 
     _pipeline_calls_generate = True
+    _load_processor = False
+    _load_image_processor = None
+    _load_feature_extractor = None
+    _load_tokenizer = False
     # Make sure the docstring is updated when the default generation config is changed
     _default_generation_config = GenerationConfig(
         max_new_tokens=256,

diff --git a/src/transformers/pipelines/feature_extraction.py b/src/transformers/pipelines/feature_extraction.py
@@ -37,6 +37,11 @@ class FeatureExtractionPipeline(Pipeline):
     [huggingface.co/models](https://huggingface.co/models).
     """
 
+    _load_processor = False
+    _load_image_processor = False
+    _load_feature_extractor = False
+    _load_tokenizer = True
+
     def _sanitize_parameters(self, truncation=None, tokenize_kwargs=None, return_tensors=None, **kwargs):
         if tokenize_kwargs is None:
             tokenize_kwargs = {}

diff --git a/src/transformers/pipelines/fill_mask.py b/src/transformers/pipelines/fill_mask.py
@@ -32,6 +32,11 @@
             Additional dictionary of keyword arguments passed along to the tokenizer.""",
 )
 class FillMaskPipeline(Pipeline):
+    _load_processor = False
+    _load_image_processor = False
+    _load_feature_extractor = False
+    _load_tokenizer = True
+
     """
     Masked language modeling prediction pipeline using any `ModelWithLMHead`. See the [masked language modeling
     examples](../task_summary#masked-language-modeling) for more information.

diff --git a/src/transformers/pipelines/image_classification.py b/src/transformers/pipelines/image_classification.py
@@ -99,6 +99,10 @@ class ImageClassificationPipeline(Pipeline):
     """
 
     function_to_apply: ClassificationFunction = ClassificationFunction.NONE
+    _load_processor = False
+    _load_image_processor = True
+    _load_feature_extractor = False
+    _load_tokenizer = False
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)

diff --git a/src/transformers/pipelines/image_feature_extraction.py b/src/transformers/pipelines/image_feature_extraction.py
@@ -45,6 +45,11 @@ class ImageFeatureExtractionPipeline(Pipeline):
     [huggingface.co/models](https://huggingface.co/models).
     """
 
+    _load_processor = False
+    _load_image_processor = True
+    _load_feature_extractor = False
+    _load_tokenizer = False
+
     def _sanitize_parameters(self, image_processor_kwargs=None, return_tensors=None, pool=None, **kwargs):
         preprocess_params = {} if image_processor_kwargs is None else image_processor_kwargs
 

diff --git a/src/transformers/pipelines/image_segmentation.py b/src/transformers/pipelines/image_segmentation.py
@@ -60,6 +60,11 @@ class ImageSegmentationPipeline(Pipeline):
     [huggingface.co/models](https://huggingface.co/models?filter=image-segmentation).
     """
 
+    _load_processor = False
+    _load_image_processor = True
+    _load_feature_extractor = False
+    _load_tokenizer = None  # Oneformer uses it but no-one else does
+
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 

diff --git a/src/transformers/pipelines/image_to_image.py b/src/transformers/pipelines/image_to_image.py
@@ -67,6 +67,11 @@ class ImageToImagePipeline(Pipeline):
     See the list of available models on [huggingface.co/models](https://huggingface.co/models?filter=image-to-image).
     """
 
+    _load_processor = False
+    _load_image_processor = True
+    _load_feature_extractor = False
+    _load_tokenizer = False
+
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         requires_backends(self, "vision")

diff --git a/src/transformers/pipelines/image_to_text.py b/src/transformers/pipelines/image_to_text.py
@@ -72,6 +72,10 @@ class ImageToTextPipeline(Pipeline):
     """
 
     _pipeline_calls_generate = True
+    _load_processor = False
+    _load_image_processor = True
+    _load_feature_extractor = False
+    _load_tokenizer = True
     # Make sure the docstring is updated when the default generation config is changed
     _default_generation_config = GenerationConfig(
         max_new_tokens=256,

diff --git a/src/transformers/pipelines/mask_generation.py b/src/transformers/pipelines/mask_generation.py
@@ -84,6 +84,11 @@ class MaskGenerationPipeline(ChunkPipeline):
     See the list of available models on [huggingface.co/models](https://huggingface.co/models?filter=mask-generation).
     """
 
+    _load_processor = False
+    _load_image_processor = True
+    _load_feature_extractor = False
+    _load_tokenizer = False
+
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
         requires_backends(self, "vision")

diff --git a/src/transformers/pipelines/object_detection.py b/src/transformers/pipelines/object_detection.py
@@ -48,6 +48,11 @@ class ObjectDetectionPipeline(Pipeline):
     See the list of available models on [huggingface.co/models](https://huggingface.co/models?filter=object-detection).
     """
 
+    _load_processor = False
+    _load_image_processor = True
+    _load_feature_extractor = False
+    _load_tokenizer = None
+
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 

diff --git a/src/transformers/pipelines/question_answering.py b/src/transformers/pipelines/question_answering.py
@@ -156,6 +156,11 @@ class QuestionAnsweringArgumentHandler(ArgumentHandler):
     supplied arguments.
     """
 
+    _load_processor = False
+    _load_image_processor = False
+    _load_feature_extractor = False
+    _load_tokenizer = True
+
     def normalize(self, item):
         if isinstance(item, SquadExample):
             return item

diff --git a/src/transformers/pipelines/table_question_answering.py b/src/transformers/pipelines/table_question_answering.py
@@ -122,6 +122,10 @@ class TableQuestionAnsweringPipeline(Pipeline):
     default_input_names = "table,query"
 
     _pipeline_calls_generate = True
+    _load_processor = False
+    _load_image_processor = False
+    _load_feature_extractor = False
+    _load_tokenizer = True
     # Make sure the docstring is updated when the default generation config is changed
     _default_generation_config = GenerationConfig(
         max_new_tokens=256,

diff --git a/src/transformers/pipelines/text2text_generation.py b/src/transformers/pipelines/text2text_generation.py
@@ -68,6 +68,10 @@ class Text2TextGenerationPipeline(Pipeline):
     ```"""
 
     _pipeline_calls_generate = True
+    _load_processor = False
+    _load_image_processor = False
+    _load_feature_extractor = False
+    _load_tokenizer = True
     # Make sure the docstring is updated when the default generation config is changed (in all pipelines in this file)
     _default_generation_config = GenerationConfig(
         max_new_tokens=256,

diff --git a/src/transformers/pipelines/text_classification.py b/src/transformers/pipelines/text_classification.py
@@ -78,6 +78,11 @@ class TextClassificationPipeline(Pipeline):
     [huggingface.co/models](https://huggingface.co/models?filter=text-classification).
     """
 
+    _load_processor = False
+    _load_image_processor = False
+    _load_feature_extractor = False
+    _load_tokenizer = True
+
     return_all_scores = False
     function_to_apply = ClassificationFunction.NONE
 

diff --git a/src/transformers/pipelines/text_generation.py b/src/transformers/pipelines/text_generation.py
@@ -105,6 +105,11 @@ class TextGenerationPipeline(Pipeline):
     """
 
     _pipeline_calls_generate = True
+    _load_processor = False
+    _load_image_processor = False
+    _load_feature_extractor = False
+    _load_tokenizer = True
+
     # Make sure the docstring is updated when the default generation config is changed
     _default_generation_config = GenerationConfig(
         max_new_tokens=256,

diff --git a/src/transformers/pipelines/text_to_audio.py b/src/transformers/pipelines/text_to_audio.py
@@ -81,6 +81,11 @@ class TextToAudioPipeline(Pipeline):
     """
 
     _pipeline_calls_generate = True
+    _load_processor = False
+    _load_image_processor = False
+    _load_feature_extractor = False
+    _load_tokenizer = True
+
     # Make sure the docstring is updated when the default generation config is changed
     _default_generation_config = GenerationConfig(
         max_new_tokens=256,

diff --git a/src/transformers/pipelines/token_classification.py b/src/transformers/pipelines/token_classification.py
@@ -133,6 +133,11 @@ class TokenClassificationPipeline(ChunkPipeline):
 
     default_input_names = "sequences"
 
+    _load_processor = False
+    _load_image_processor = False
+    _load_feature_extractor = False
+    _load_tokenizer = True
+
     def __init__(self, args_parser=TokenClassificationArgumentHandler(), *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.check_model_type(

diff --git a/src/transformers/pipelines/video_classification.py b/src/transformers/pipelines/video_classification.py
@@ -51,6 +51,11 @@ class VideoClassificationPipeline(Pipeline):
     [huggingface.co/models](https://huggingface.co/models?filter=video-classification).
     """
 
+    _load_processor = True
+    _load_image_processor = False
+    _load_feature_extractor = False
+    _load_tokenizer = False
+
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         requires_backends(self, "av")

diff --git a/src/transformers/pipelines/visual_question_answering.py b/src/transformers/pipelines/visual_question_answering.py
@@ -57,6 +57,11 @@ class VisualQuestionAnsweringPipeline(Pipeline):
     [huggingface.co/models](https://huggingface.co/models?filter=visual-question-answering).
     """
 
+    _load_processor = False
+    _load_image_processor = True
+    _load_feature_extractor = False
+    _load_tokenizer = True
+
     _pipeline_calls_generate = True
     # Make sure the docstring is updated when the default generation config is changed
     _default_generation_config = GenerationConfig(

diff --git a/src/transformers/pipelines/zero_shot_audio_classification.py b/src/transformers/pipelines/zero_shot_audio_classification.py
@@ -60,6 +60,11 @@ class ZeroShotAudioClassificationPipeline(Pipeline):
     [huggingface.co/models](https://huggingface.co/models?filter=zero-shot-audio-classification).
     """
 
+    _load_processor = False
+    _load_image_processor = False
+    _load_feature_extractor = True
+    _load_tokenizer = True
+
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
 

diff --git a/src/transformers/pipelines/zero_shot_classification.py b/src/transformers/pipelines/zero_shot_classification.py
@@ -82,6 +82,11 @@ class ZeroShotClassificationPipeline(ChunkPipeline):
     of available models on [huggingface.co/models](https://huggingface.co/models?search=nli).
     """
 
+    _load_processor = False
+    _load_image_processor = False
+    _load_feature_extractor = False
+    _load_tokenizer = True
+
     def __init__(self, args_parser=ZeroShotClassificationArgumentHandler(), *args, **kwargs):
         self._args_parser = args_parser
         super().__init__(*args, **kwargs)

diff --git a/src/transformers/pipelines/zero_shot_image_classification.py b/src/transformers/pipelines/zero_shot_image_classification.py
@@ -64,6 +64,11 @@ class ZeroShotImageClassificationPipeline(Pipeline):
     [huggingface.co/models](https://huggingface.co/models?filter=zero-shot-image-classification).
     """
 
+    _load_processor = False
+    _load_image_processor = True
+    _load_feature_extractor = False
+    _load_tokenizer = True
+
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
 

diff --git a/src/transformers/pipelines/zero_shot_object_detection.py b/src/transformers/pipelines/zero_shot_object_detection.py
@@ -53,6 +53,11 @@ class ZeroShotObjectDetectionPipeline(ChunkPipeline):
     [huggingface.co/models](https://huggingface.co/models?filter=zero-shot-object-detection).
     """
 
+    _load_processor = False
+    _load_image_processor = True
+    _load_feature_extractor = False
+    _load_tokenizer = True
+
     def __init__(self, **kwargs):
         super().__init__(**kwargs)