Skip to content

Totally rewrite how pipelines load preprocessors #38947

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
364 changes: 153 additions & 211 deletions src/transformers/pipelines/__init__.py

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions src/transformers/pipelines/audio_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,11 @@ class AudioClassificationPipeline(Pipeline):
[huggingface.co/models](https://huggingface.co/models?filter=audio-classification).
"""

_load_processor = False
_load_image_processor = False
_load_feature_extractor = True
_load_tokenizer = False

def __init__(self, *args, **kwargs):
# Only set default top_k if explicitly provided
if "top_k" in kwargs and kwargs["top_k"] is None:
Expand Down
4 changes: 4 additions & 0 deletions src/transformers/pipelines/automatic_speech_recognition.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,10 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline):
"""

_pipeline_calls_generate = True
_load_processor = False
_load_image_processor = False
_load_feature_extractor = True
_load_tokenizer = True
# Make sure the docstring is updated when the default generation config is changed
_default_generation_config = GenerationConfig(
max_new_tokens=256,
Expand Down
24 changes: 9 additions & 15 deletions src/transformers/pipelines/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -898,21 +898,15 @@ class Pipeline(_ScikitCompat, PushToHubMixin):
constructor argument. If set to `True`, the output will be stored in the pickle format.
"""

# Historically we have pipelines working with `tokenizer`, `feature_extractor`, and `image_processor`
# as separate processing components. While we have `processor` class that combines them, some pipelines
# might still operate with these components separately.
# With the addition of `processor` to `pipeline`, we want to avoid:
# - loading `processor` for pipelines that still work with `image_processor` and `tokenizer` separately;
# - loading `image_processor`/`tokenizer` as a separate component while we operate only with `processor`,
# because `processor` will load required sub-components by itself.
# Below flags allow granular control over loading components and set to be backward compatible with current
# pipelines logic. You may override these flags when creating your pipeline. For example, for
# `zero-shot-object-detection` pipeline which operates with `processor` you should set `_load_processor=True`
# and all the rest flags to `False` to avoid unnecessary loading of the components.
_load_processor = False
_load_image_processor = True
_load_feature_extractor = True
_load_tokenizer = True
# These flags should be overridden for downstream pipelines. They indicate which preprocessing classes are
# used by each pipeline. The possible values are:
# - True (the class is mandatory, raise an error if it's not present in the repo)
# - None (the class is optional; it should be loaded if present in the repo but the pipeline can work without it)
# - False (the class is never used by the pipeline and should not be loaded even if present)
_load_processor = None
_load_image_processor = None
_load_feature_extractor = None
_load_tokenizer = None

# Pipelines that call `generate` have shared logic, e.g. preparing the generation config.
_pipeline_calls_generate = False
Expand Down
5 changes: 5 additions & 0 deletions src/transformers/pipelines/depth_estimation.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ class DepthEstimationPipeline(Pipeline):
See the list of available models on [huggingface.co/models](https://huggingface.co/models?filter=depth-estimation).
"""

_load_processor = False
_load_image_processor = True
_load_feature_extractor = False
_load_tokenizer = False

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
requires_backends(self, "vision")
Expand Down
4 changes: 4 additions & 0 deletions src/transformers/pipelines/document_question_answering.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,10 @@ class DocumentQuestionAnsweringPipeline(ChunkPipeline):
"""

_pipeline_calls_generate = True
_load_processor = False
_load_image_processor = None
_load_feature_extractor = None
_load_tokenizer = False
# Make sure the docstring is updated when the default generation config is changed
_default_generation_config = GenerationConfig(
max_new_tokens=256,
Expand Down
5 changes: 5 additions & 0 deletions src/transformers/pipelines/feature_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ class FeatureExtractionPipeline(Pipeline):
[huggingface.co/models](https://huggingface.co/models).
"""

_load_processor = False
_load_image_processor = False
_load_feature_extractor = False
_load_tokenizer = True

def _sanitize_parameters(self, truncation=None, tokenize_kwargs=None, return_tensors=None, **kwargs):
if tokenize_kwargs is None:
tokenize_kwargs = {}
Expand Down
5 changes: 5 additions & 0 deletions src/transformers/pipelines/fill_mask.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@
Additional dictionary of keyword arguments passed along to the tokenizer.""",
)
class FillMaskPipeline(Pipeline):
_load_processor = False
_load_image_processor = False
_load_feature_extractor = False
_load_tokenizer = True

"""
Masked language modeling prediction pipeline using any `ModelWithLMHead`. See the [masked language modeling
examples](../task_summary#masked-language-modeling) for more information.
Expand Down
4 changes: 4 additions & 0 deletions src/transformers/pipelines/image_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,10 @@ class ImageClassificationPipeline(Pipeline):
"""

function_to_apply: ClassificationFunction = ClassificationFunction.NONE
_load_processor = False
_load_image_processor = True
_load_feature_extractor = False
_load_tokenizer = False

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
Expand Down
5 changes: 5 additions & 0 deletions src/transformers/pipelines/image_feature_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ class ImageFeatureExtractionPipeline(Pipeline):
[huggingface.co/models](https://huggingface.co/models).
"""

_load_processor = False
_load_image_processor = True
_load_feature_extractor = False
_load_tokenizer = False

def _sanitize_parameters(self, image_processor_kwargs=None, return_tensors=None, pool=None, **kwargs):
preprocess_params = {} if image_processor_kwargs is None else image_processor_kwargs

Expand Down
5 changes: 5 additions & 0 deletions src/transformers/pipelines/image_segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,11 @@ class ImageSegmentationPipeline(Pipeline):
[huggingface.co/models](https://huggingface.co/models?filter=image-segmentation).
"""

_load_processor = False
_load_image_processor = True
_load_feature_extractor = False
_load_tokenizer = None # Oneformer uses it but no-one else does

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

Expand Down
5 changes: 5 additions & 0 deletions src/transformers/pipelines/image_to_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ class ImageToImagePipeline(Pipeline):
See the list of available models on [huggingface.co/models](https://huggingface.co/models?filter=image-to-image).
"""

_load_processor = False
_load_image_processor = True
_load_feature_extractor = False
_load_tokenizer = False

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
requires_backends(self, "vision")
Expand Down
4 changes: 4 additions & 0 deletions src/transformers/pipelines/image_to_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ class ImageToTextPipeline(Pipeline):
"""

_pipeline_calls_generate = True
_load_processor = False
_load_image_processor = True
_load_feature_extractor = False
_load_tokenizer = True
# Make sure the docstring is updated when the default generation config is changed
_default_generation_config = GenerationConfig(
max_new_tokens=256,
Expand Down
5 changes: 5 additions & 0 deletions src/transformers/pipelines/mask_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,11 @@ class MaskGenerationPipeline(ChunkPipeline):
See the list of available models on [huggingface.co/models](https://huggingface.co/models?filter=mask-generation).
"""

_load_processor = False
_load_image_processor = True
_load_feature_extractor = False
_load_tokenizer = False

def __init__(self, **kwargs):
super().__init__(**kwargs)
requires_backends(self, "vision")
Expand Down
5 changes: 5 additions & 0 deletions src/transformers/pipelines/object_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ class ObjectDetectionPipeline(Pipeline):
See the list of available models on [huggingface.co/models](https://huggingface.co/models?filter=object-detection).
"""

_load_processor = False
_load_image_processor = True
_load_feature_extractor = False
_load_tokenizer = None

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

Expand Down
5 changes: 5 additions & 0 deletions src/transformers/pipelines/question_answering.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,11 @@ class QuestionAnsweringArgumentHandler(ArgumentHandler):
supplied arguments.
"""

_load_processor = False
_load_image_processor = False
_load_feature_extractor = False
_load_tokenizer = True

def normalize(self, item):
if isinstance(item, SquadExample):
return item
Expand Down
4 changes: 4 additions & 0 deletions src/transformers/pipelines/table_question_answering.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ class TableQuestionAnsweringPipeline(Pipeline):
default_input_names = "table,query"

_pipeline_calls_generate = True
_load_processor = False
_load_image_processor = False
_load_feature_extractor = False
_load_tokenizer = True
# Make sure the docstring is updated when the default generation config is changed
_default_generation_config = GenerationConfig(
max_new_tokens=256,
Expand Down
4 changes: 4 additions & 0 deletions src/transformers/pipelines/text2text_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ class Text2TextGenerationPipeline(Pipeline):
```"""

_pipeline_calls_generate = True
_load_processor = False
_load_image_processor = False
_load_feature_extractor = False
_load_tokenizer = True
# Make sure the docstring is updated when the default generation config is changed (in all pipelines in this file)
_default_generation_config = GenerationConfig(
max_new_tokens=256,
Expand Down
5 changes: 5 additions & 0 deletions src/transformers/pipelines/text_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,11 @@ class TextClassificationPipeline(Pipeline):
[huggingface.co/models](https://huggingface.co/models?filter=text-classification).
"""

_load_processor = False
_load_image_processor = False
_load_feature_extractor = False
_load_tokenizer = True

return_all_scores = False
function_to_apply = ClassificationFunction.NONE

Expand Down
5 changes: 5 additions & 0 deletions src/transformers/pipelines/text_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,11 @@ class TextGenerationPipeline(Pipeline):
"""

_pipeline_calls_generate = True
_load_processor = False
_load_image_processor = False
_load_feature_extractor = False
_load_tokenizer = True

# Make sure the docstring is updated when the default generation config is changed
_default_generation_config = GenerationConfig(
max_new_tokens=256,
Expand Down
5 changes: 5 additions & 0 deletions src/transformers/pipelines/text_to_audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ class TextToAudioPipeline(Pipeline):
"""

_pipeline_calls_generate = True
_load_processor = False
_load_image_processor = False
_load_feature_extractor = False
_load_tokenizer = True

# Make sure the docstring is updated when the default generation config is changed
_default_generation_config = GenerationConfig(
max_new_tokens=256,
Expand Down
5 changes: 5 additions & 0 deletions src/transformers/pipelines/token_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,11 @@ class TokenClassificationPipeline(ChunkPipeline):

default_input_names = "sequences"

_load_processor = False
_load_image_processor = False
_load_feature_extractor = False
_load_tokenizer = True

def __init__(self, args_parser=TokenClassificationArgumentHandler(), *args, **kwargs):
super().__init__(*args, **kwargs)
self.check_model_type(
Expand Down
5 changes: 5 additions & 0 deletions src/transformers/pipelines/video_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ class VideoClassificationPipeline(Pipeline):
[huggingface.co/models](https://huggingface.co/models?filter=video-classification).
"""

_load_processor = True
_load_image_processor = False
_load_feature_extractor = False
_load_tokenizer = False

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
requires_backends(self, "av")
Expand Down
5 changes: 5 additions & 0 deletions src/transformers/pipelines/visual_question_answering.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@ class VisualQuestionAnsweringPipeline(Pipeline):
[huggingface.co/models](https://huggingface.co/models?filter=visual-question-answering).
"""

_load_processor = False
_load_image_processor = True
_load_feature_extractor = False
_load_tokenizer = True

_pipeline_calls_generate = True
# Make sure the docstring is updated when the default generation config is changed
_default_generation_config = GenerationConfig(
Expand Down
5 changes: 5 additions & 0 deletions src/transformers/pipelines/zero_shot_audio_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,11 @@ class ZeroShotAudioClassificationPipeline(Pipeline):
[huggingface.co/models](https://huggingface.co/models?filter=zero-shot-audio-classification).
"""

_load_processor = False
_load_image_processor = False
_load_feature_extractor = True
_load_tokenizer = True

def __init__(self, **kwargs):
super().__init__(**kwargs)

Expand Down
5 changes: 5 additions & 0 deletions src/transformers/pipelines/zero_shot_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,11 @@ class ZeroShotClassificationPipeline(ChunkPipeline):
of available models on [huggingface.co/models](https://huggingface.co/models?search=nli).
"""

_load_processor = False
_load_image_processor = False
_load_feature_extractor = False
_load_tokenizer = True

def __init__(self, args_parser=ZeroShotClassificationArgumentHandler(), *args, **kwargs):
self._args_parser = args_parser
super().__init__(*args, **kwargs)
Expand Down
5 changes: 5 additions & 0 deletions src/transformers/pipelines/zero_shot_image_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@ class ZeroShotImageClassificationPipeline(Pipeline):
[huggingface.co/models](https://huggingface.co/models?filter=zero-shot-image-classification).
"""

_load_processor = False
_load_image_processor = True
_load_feature_extractor = False
_load_tokenizer = True

def __init__(self, **kwargs):
super().__init__(**kwargs)

Expand Down
5 changes: 5 additions & 0 deletions src/transformers/pipelines/zero_shot_object_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ class ZeroShotObjectDetectionPipeline(ChunkPipeline):
[huggingface.co/models](https://huggingface.co/models?filter=zero-shot-object-detection).
"""

_load_processor = False
_load_image_processor = True
_load_feature_extractor = False
_load_tokenizer = True

def __init__(self, **kwargs):
super().__init__(**kwargs)

Expand Down