diff --git a/docs/source/openvino/models.mdx b/docs/source/openvino/models.mdx index fe6fbeea88..e0659b2f73 100644 --- a/docs/source/openvino/models.mdx +++ b/docs/source/openvino/models.mdx @@ -25,6 +25,7 @@ Here is the list of the supported architectures : - Beit - Bert - BioGPT +- BigBirdPegasus - BlenderBot - BlenderBotSmall - Bloom @@ -64,6 +65,7 @@ Here is the list of the supported architectures : - GPT-Neo - GPT-NeoX - GPT-NeoX-Japanese +- GPT-OSS - Gemma - Gemma2 - Gemma3 @@ -103,6 +105,7 @@ Here is the list of the supported architectures : - MobileVit - Nystromformer - OLMo +- OLMo 2 - OPT - Orion - Pegasus diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py index 4f5b0a55a4..e2a95f2ac3 100644 --- a/optimum/exporters/openvino/model_configs.py +++ b/optimum/exporters/openvino/model_configs.py @@ -2994,9 +2994,6 @@ class Phi4MMConfigBehavior(str, enum.Enum): @register_in_tasks_manager( "phi4mm", *["image-text-to-text", "automatic-speech-recognition"], library_name="transformers" ) -@register_in_tasks_manager( - "phi4_multimodal", *["image-text-to-text", "automatic-speech-recognition"], library_name="transformers" -) class Phi4MMOpenVINOConfig(BaseVLMOpenVINOConfig): SUPPORTED_BEHAVIORS = [model_type.value for model_type in Phi4MMConfigBehavior] NORMALIZED_CONFIG_CLASS = NormalizedVisionConfig @@ -3220,6 +3217,14 @@ def rename_ambiguous_inputs(self, inputs): return inputs +@register_in_tasks_manager( + "phi4_multimodal", *["image-text-to-text", "automatic-speech-recognition"], library_name="transformers" +) +class Phi4MultimodalOpenVINOConfig(Phi4MMOpenVINOConfig): + MIN_TRANSFORMERS_VERSION = "4.51.0" + MAX_TRANSFORMERS_VERSION = "4.60.0" + + class DummyQwen2VLLMInputGenerator(DummyTextInputGenerator): def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"): generated_input = super().generate(input_name, framework, int_dtype, float_dtype) @@ -3671,6 +3676,7 @@ class M2M100OpenVINOConfig(BartOpenVINOConfig): ) @register_in_tasks_manager("deepseek", *["text-generation", "text-generation-with-past"], library_name="transformers") class DeepseekOpenVINOConfig(MiniCPM3OpenVINOConfig): + MIN_TRANSFORMERS_VERSION = "4.46.0" MAX_TRANSFORMERS_VERSION = "4.53.3" _MODEL_PATCHER = DeepseekPatcher diff --git a/tests/openvino/test_decoder.py b/tests/openvino/test_decoder.py index be2c289628..c91b224ddd 100644 --- a/tests/openvino/test_decoder.py +++ b/tests/openvino/test_decoder.py @@ -8,6 +8,7 @@ import torch from parameterized import parameterized from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, PretrainedConfig, pipeline, set_seed +from transformers.models.auto.configuration_auto import CONFIG_MAPPING_NAMES from transformers.testing_utils import slow from utils_tests import ( F32_CONFIG, @@ -19,7 +20,10 @@ patch_awq_for_inference, ) +from optimum.exporters.openvino.model_configs import DeepseekOpenVINOConfig +from optimum.exporters.onnx.model_configs import NemotronOnnxConfig from optimum.exporters.openvino.model_patcher import patch_update_causal_mask +from optimum.exporters.tasks import TasksManager from optimum.intel import OVModelForCausalLM, OVModelForSequenceClassification from optimum.intel.openvino.utils import _print_compiled_model_properties from optimum.intel.pipelines import pipeline as optimum_pipeline @@ -36,6 +40,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "baichuan2", "baichuan2-13b", "gpt_bigcode", + "bigbird_pegasus", "blenderbot", "blenderbot-small", "bloom", @@ -52,6 +57,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "mistral", "mixtral", "mpt", + "mbart", "opt", "pegasus", "qwen", @@ -84,10 +90,10 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "gemma2", "exaone", "granite", - "granite-moe", + "granitemoe", ) - SUPPORTED_SSM_ARCHITECTURES = ("mamba", "falcon-mamba") + SUPPORTED_SSM_ARCHITECTURES = ("mamba", "falcon_mamba") if is_transformers_version(">=", "4.49"): SUPPORTED_SSM_ARCHITECTURES += ("zamba2",) @@ -95,7 +101,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES += SUPPORTED_SSM_ARCHITECTURES if is_transformers_version(">=", "4.46.0"): - SUPPORTED_ARCHITECTURES += ("glm", "mistral-nemo", "minicpm3", "phi3-moe") + SUPPORTED_ARCHITECTURES += ("glm", "mistral-nemo", "minicpm3", "phimoe") # openvino 2025.0 required for disabling check_trace if is_openvino_version(">=", "2025.0"): SUPPORTED_ARCHITECTURES += ("deepseek",) @@ -108,17 +114,23 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): if is_openvino_version(">=", "2024.6.0") and platform.system() != "Windows": SUPPORTED_ARCHITECTURES += ("mixtral_awq",) + if is_transformers_version(">", "4.47"): + SUPPORTED_ARCHITECTURES += ("olmo2",) + + if is_transformers_version(">", "4.48"): + SUPPORTED_ARCHITECTURES += ("nemotron",) + if is_transformers_version(">", "4.49"): - SUPPORTED_ARCHITECTURES += ("gemma3_text",) + SUPPORTED_ARCHITECTURES += ("gemma3_text", "helium") if is_transformers_version(">=", "4.51.0"): - SUPPORTED_ARCHITECTURES += ("qwen3", "qwen3_moe") + SUPPORTED_ARCHITECTURES += ("llama4", "llama4_text", "qwen3", "qwen3_moe") if is_transformers_version(">=", "4.51.3"): SUPPORTED_ARCHITECTURES += ("glm4",) if is_transformers_version(">=", "4.53.0"): - SUPPORTED_ARCHITECTURES += ("arcee",) + SUPPORTED_ARCHITECTURES += ("arcee", "smollm3") if is_transformers_version(">=", "4.54.0"): # remote code models differs after transformers v4.54 @@ -154,6 +166,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "bart": 2, "baichuan2": 2, "baichuan2-13b": 2, + "bigbird_pegasus": 2 if is_transformers_version(">=", "4.52") else 0, "gpt_bigcode": 5, "blenderbot": 2, "blenderbot-small": 2, @@ -166,12 +179,17 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "gpt_neo": 4, "gpt_neox": 5, "llama": 2, + "llama4": 5, + "llama4_text": 2, "marian": 2, + "mbart": 2, "minicpm": 4, "mistral": 2, "mixtral": 2, "mpt": 5, - "opt": 5 if is_transformers_version(">=", "4.46.0") else 0, + "nemotron": 2, + "olmo2": 2, + "opt": 5 if is_transformers_version(">=", "4.46") else 0, "pegasus": 2, "qwen": 2, "phi": 2, @@ -191,6 +209,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "olmo": 2, "stablelm": 2, "starcoder2": 2, + "smollm3": 2, "dbrx": 2, "cohere": 2, "qwen2": 2, @@ -200,25 +219,54 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "gemma2": 4, "exaone": 8, "granite": 6, - "granite-moe": 6, + "granitemoe": 6, "glm": 28, "mistral-nemo": 8, "minicpm3": 6, - "phi3-moe": 2, + "phimoe": 2, "deepseek": 2, "opt_gptq": 12, "mixtral_awq": 2, "gemma3_text": 2, "glm4": 2, + "helium": 2, "qwen3": 2, "qwen3_moe": 2, "mamba": 0, - "falcon-mamba": 0, + "falcon_mamba": 0, "arcee": 2, "gpt_oss": 2 if is_openvino_version(">=", "2025.4") else 0, "gpt_oss_mxfp4": 2 if is_openvino_version(">=", "2025.4") else 0, "zamba2": 1, } + TASK = "text-generation" + + def test_find_untested_architectures(self): + if len(self.SUPPORTED_ARCHITECTURES) != len(set(self.SUPPORTED_ARCHITECTURES)): + raise ValueError( + f"For the task `{self.TASK}`, some architectures are duplicated in the list of tested architectures: " + f"{self.SUPPORTED_ARCHITECTURES}.\n" + ) + + tested_architectures = set(self.SUPPORTED_ARCHITECTURES) + transformers_architectures = set(CONFIG_MAPPING_NAMES.keys()) + ov_architectures = set(TasksManager.get_supported_model_type_for_task(task=self.TASK, exporter="openvino")) + supported_architectures = ov_architectures & transformers_architectures + + if is_transformers_version(">=", str(DeepseekOpenVINOConfig.MAX_TRANSFORMERS_VERSION)): + if "deepseek_v2" in supported_architectures: + supported_architectures.remove("deepseek_v2") + if "deepseek_v3" in supported_architectures: + supported_architectures.remove("deepseek_v3") + elif is_transformers_version("<", str(NemotronOnnxConfig.MIN_TRANSFORMERS_VERSION)): + supported_architectures.remove("nemotron") + + untested_architectures = supported_architectures - tested_architectures + + if len(untested_architectures) > 0: + raise ValueError( + f"For the task `{self.TASK}`, the OpenVINO exporter supports {untested_architectures} which are not tested" + ) # TODO: remove gptq/awq from here @parameterized.expand(SUPPORTED_ARCHITECTURES) diff --git a/tests/openvino/test_export.py b/tests/openvino/test_export.py index 98599ac31b..0f97e7d2aa 100644 --- a/tests/openvino/test_export.py +++ b/tests/openvino/test_export.py @@ -81,7 +81,7 @@ class ExportModelTest(unittest.TestCase): "speecht5": OVModelForTextToSpeechSeq2Seq, "clip": OVModelForZeroShotImageClassification, "mamba": OVModelForCausalLM, - "falcon-mamba": OVModelForCausalLM, + "falcon_mamba": OVModelForCausalLM, "stable-diffusion-3": OVStableDiffusion3Pipeline, "flux": OVFluxPipeline, "ltx-video": OVLTXPipeline, diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py index bba9c3b92a..e657e246ec 100644 --- a/tests/openvino/test_exporters_cli.py +++ b/tests/openvino/test_exporters_cli.py @@ -84,7 +84,7 @@ class OVCLIExportTestCase(unittest.TestCase): ("text2text-generation", "t5"), ("text2text-generation-with-past", "t5"), ("text-generation-with-past", "mamba"), - ("text-generation-with-past", "falcon-mamba"), + ("text-generation-with-past", "falcon_mamba"), ("text-classification", "albert"), ("question-answering", "distilbert"), ("token-classification", "roberta"), @@ -133,7 +133,7 @@ class OVCLIExportTestCase(unittest.TestCase): "speecht5": 2, "clip": 2 if is_tokenizers_version("<", "0.20.0") or is_openvino_version(">=", "2024.5") else 0, "mamba": 2, - "falcon-mamba": 2, + "falcon_mamba": 2, "qwen3": 2, "zamba2": 2, } diff --git a/tests/openvino/test_seq2seq.py b/tests/openvino/test_seq2seq.py index beb17b1389..197773b6d3 100644 --- a/tests/openvino/test_seq2seq.py +++ b/tests/openvino/test_seq2seq.py @@ -18,6 +18,7 @@ from tempfile import TemporaryDirectory import numpy as np +import openvino import pytest import requests import torch @@ -40,6 +41,7 @@ pipeline, set_seed, ) +from transformers.models.auto.configuration_auto import CONFIG_MAPPING_NAMES from transformers.onnx.utils import get_preprocessor from transformers.testing_utils import slow from transformers.utils import http_user_agent @@ -47,6 +49,7 @@ from optimum.exporters.openvino.model_patcher import patch_update_causal_mask from optimum.exporters.openvino.stateful import model_has_state +from optimum.exporters.tasks import TasksManager from optimum.intel import ( OVModelForPix2Struct, OVModelForSeq2SeqLM, @@ -56,60 +59,114 @@ OVModelForVisualCausalLM, ) from optimum.intel.openvino.modeling_seq2seq import OVDecoder, OVEncoder +from optimum.intel.openvino.modeling_text2speech import ( + OVTextToSpeechDecoder, + OVTextToSpeechEncoder, + OVTextToSpeechPostNet, + OVTextToSpeechVocoder, +) from optimum.intel.openvino.modeling_visual_language import MODEL_PARTS_CLS_MAPPING, MODEL_TYPE_TO_CLS_MAPPING from optimum.intel.pipelines import pipeline as optimum_pipeline from optimum.intel.utils.import_utils import is_openvino_version, is_transformers_version -class OVModelForSeq2SeqLMIntegrationTest(unittest.TestCase): +MODEL_NOT_TESTED = set() + +if is_openvino_version(">=", "2025.3.0") and is_openvino_version("<", "2025.5.0"): + MODEL_NOT_TESTED = {"marian"} + + +class OVSeq2SeqTestMixin(unittest.TestCase): + SUPPORTED_ARCHITECTURES = None + + def _check_openvino_model_attributes(self, openvino_model, use_cache: bool = True, stateful: bool = True): + self.assertIsInstance(openvino_model, self.OVMODEL_CLASS) + self.assertIsInstance(openvino_model.config, PretrainedConfig) + self.assertIsInstance(openvino_model.generation_config, GenerationConfig) + + self.assertIsInstance(openvino_model.encoder, OVEncoder) + self.assertIsInstance(openvino_model.decoder, OVDecoder) + self.assertIsInstance(openvino_model.encoder.model, openvino.Model) + self.assertIsInstance(openvino_model.decoder.model, openvino.Model) + + if not stateful and use_cache: + self.assertIsInstance(openvino_model.decoder_with_past, OVDecoder) + self.assertIsInstance(openvino_model.decoder_with_past.model, openvino.Model) + else: + self.assertIsNone(openvino_model.decoder_with_past) + + self.assertEqual(openvino_model.use_cache, use_cache) + self.assertEqual(openvino_model.decoder.stateful, stateful) + self.assertEqual(model_has_state(openvino_model.decoder.model), stateful) + + def _test_find_untested_architectures(self): + if len(self.SUPPORTED_ARCHITECTURES) != len(set(self.SUPPORTED_ARCHITECTURES)): + raise ValueError( + f"For the task `{self.TASK}`, some architectures are duplicated in the list of tested architectures: " + f"{self.SUPPORTED_ARCHITECTURES}.\n" + ) + + tested_architectures = set(self.SUPPORTED_ARCHITECTURES) + transformers_architectures = set(CONFIG_MAPPING_NAMES.keys()) + ov_architectures = set(TasksManager.get_supported_model_type_for_task(task=self.TASK, exporter="openvino")) + supported_architectures = ov_architectures & transformers_architectures + + untested_architectures = supported_architectures - tested_architectures + + if len(untested_architectures - MODEL_NOT_TESTED) > 0: + raise ValueError( + f"For the task `{self.TASK}`, the OpenVINO exporter supports {untested_architectures} which are not tested" + ) + + +class OVModelForSeq2SeqLMIntegrationTest(OVSeq2SeqTestMixin): SUPPORTED_ARCHITECTURES = ( "bart", - # "bigbird_pegasus", + "bigbird_pegasus", "blenderbot", "blenderbot-small", - # "longt5", + "encoder-decoder", + "longt5", "m2m_100", "mbart", "mt5", "pegasus", "t5", ) + OVMODEL_CLASS = OVModelForSeq2SeqLM + AUTOMODEL_CLASS = AutoModelForSeq2SeqLM + TASK = "text2text-generation" + GENERATION_LENGTH = 100 + SPEEDUP_CACHE = 1.1 if not (is_openvino_version(">=", "2025.3.0") and is_openvino_version("<", "2025.5.0")): # There are known issues with marian model on OpenVINO 2025.3.x and 2025.4.x SUPPORTED_ARCHITECTURES += ("marian",) - GENERATION_LENGTH = 100 - SPEEDUP_CACHE = 1.1 - - SUPPORT_STATEFUL = ("t5", "mt5") + SUPPORT_STATEFUL = ("t5", "mt5", "longt5") if is_transformers_version(">=", "4.52.0"): SUPPORT_STATEFUL += ("bart", "blenderbot", "blenderbot-small", "m2m_100", "marian", "mbart") if is_transformers_version(">=", "4.53.0"): SUPPORT_STATEFUL += ("pegasus",) + def test_find_untested_architectures(self): + self._test_find_untested_architectures() + @parameterized.expand(SUPPORTED_ARCHITECTURES) def test_compare_to_transformers(self, model_arch): model_id = MODEL_NAMES[model_arch] set_seed(SEED) - ov_model = OVModelForSeq2SeqLM.from_pretrained( + ov_model = self.OVMODEL_CLASS.from_pretrained( model_id, export=True, ov_config=F32_CONFIG, device=OPENVINO_DEVICE ) - ov_stateless_model = OVModelForSeq2SeqLM.from_pretrained( + ov_stateless_model = self.OVMODEL_CLASS.from_pretrained( model_id, export=True, use_cache=False, stateful=False, ov_config=F32_CONFIG, device=OPENVINO_DEVICE ) expected_stateful = is_transformers_version(">", "4.46") and model_arch in self.SUPPORT_STATEFUL - self.assertEqual(ov_model.decoder.stateful, expected_stateful) - self.assertEqual(model_has_state(ov_model.decoder.model), expected_stateful) - check_with_past_available = self.assertIsNone if expected_stateful else self.assertIsNotNone - check_with_past_available(ov_model.decoder_with_past) - self.assertIsInstance(ov_model.encoder, OVEncoder) - self.assertIsInstance(ov_model.decoder, OVDecoder) - if not ov_model.decoder.stateful: - self.assertIsInstance(ov_model.decoder_with_past, OVDecoder) - self.assertIsInstance(ov_model.config, PretrainedConfig) - - transformers_model = AutoModelForSeq2SeqLM.from_pretrained(model_id) + self._check_openvino_model_attributes(ov_model, use_cache=True, stateful=expected_stateful) + self._check_openvino_model_attributes(ov_stateless_model, use_cache=False, stateful=False) + + transformers_model = self.AUTOMODEL_CLASS.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) tokens = tokenizer("This is a sample input", return_tensors="pt") decoder_start_token_id = transformers_model.config.decoder_start_token_id if model_arch != "mbart" else 2 @@ -156,7 +213,7 @@ def test_pipeline(self, model_arch): model_id = MODEL_NAMES[model_arch] tokenizer = AutoTokenizer.from_pretrained(model_id) inputs = "This is a test" - model = OVModelForSeq2SeqLM.from_pretrained(model_id, compile=False, device=OPENVINO_DEVICE) + model = self.OVMODEL_CLASS.from_pretrained(model_id, compile=False, device=OPENVINO_DEVICE) model.eval() model.half() model.to("cpu") @@ -193,7 +250,7 @@ def test_pipeline(self, model_arch): @slow def test_generate_utils(self, model_arch): model_id = MODEL_NAMES[model_arch] - model = OVModelForSeq2SeqLM.from_pretrained(model_id, export=True, device=OPENVINO_DEVICE) + model = self.OVMODEL_CLASS.from_pretrained(model_id, export=True, device=OPENVINO_DEVICE) tokenizer = AutoTokenizer.from_pretrained(model_id) text = "This is a sample input" tokens = tokenizer(text, return_tensors="pt") @@ -216,8 +273,7 @@ def test_compare_with_and_without_past_key_values(self): tokenizer = AutoTokenizer.from_pretrained(model_id) text = "This is a sample input" tokens = tokenizer(text, return_tensors="pt") - - model_with_pkv = OVModelForSeq2SeqLM.from_pretrained( + model_with_pkv = self.OVMODEL_CLASS.from_pretrained( model_id, export=True, use_cache=True, device=OPENVINO_DEVICE ) _ = model_with_pkv.generate(**tokens) # warmup @@ -225,8 +281,7 @@ def test_compare_with_and_without_past_key_values(self): outputs_model_with_pkv = model_with_pkv.generate( **tokens, min_length=self.GENERATION_LENGTH, max_length=self.GENERATION_LENGTH, num_beams=1 ) - - model_without_pkv = OVModelForSeq2SeqLM.from_pretrained( + model_without_pkv = self.OVMODEL_CLASS.from_pretrained( model_id, export=True, use_cache=False, device=OPENVINO_DEVICE ) _ = model_without_pkv.generate(**tokens) # warmup @@ -248,8 +303,11 @@ def test_compare_with_and_without_past_key_values(self): gc.collect() -class OVModelForSpeechSeq2SeqIntegrationTest(unittest.TestCase): +class OVModelForSpeechSeq2SeqIntegrationTest(OVSeq2SeqTestMixin): SUPPORTED_ARCHITECTURES = ("whisper",) + OVMODEL_CLASS = OVModelForSpeechSeq2Seq + AUTOMODEL_CLASS = AutoModelForSpeechSeq2Seq + TASK = "automatic-speech-recognition" def _generate_random_audio_data(self): np.random.seed(10) @@ -262,20 +320,15 @@ def _generate_random_audio_data(self): def test_compare_to_transformers(self, model_arch): set_seed(SEED) model_id = MODEL_NAMES[model_arch] - transformers_model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id) - ov_model = OVModelForSpeechSeq2Seq.from_pretrained( + transformers_model = self.AUTOMODEL_CLASS.from_pretrained(model_id) + ov_model = self.OVMODEL_CLASS.from_pretrained( model_id, export=True, ov_config=F32_CONFIG, device=OPENVINO_DEVICE ) - ov_model_stateless = OVModelForSpeechSeq2Seq.from_pretrained( + ov_model_stateless = self.OVMODEL_CLASS.from_pretrained( model_id, export=True, ov_config=F32_CONFIG, stateful=False, device=OPENVINO_DEVICE ) - self.assertIsInstance(ov_model.config, PretrainedConfig) - # whisper cache class support implemented in 4.43 - expected_stateful = True - self.assertEqual(ov_model.decoder.stateful, expected_stateful) - self.assertEqual(model_has_state(ov_model.decoder.model), expected_stateful) - check_with_past_available = self.assertIsNone if expected_stateful else self.assertIsNotNone - check_with_past_available(ov_model.decoder_with_past) + self._check_openvino_model_attributes(ov_model, use_cache=True, stateful=True) + self._check_openvino_model_attributes(ov_model_stateless, use_cache=True, stateful=False) processor = get_preprocessor(model_id) data = self._generate_random_audio_data() @@ -335,7 +388,7 @@ def test_compare_to_transformers(self, model_arch): def test_pipeline(self, model_arch): set_seed(SEED) model_id = MODEL_NAMES[model_arch] - model = OVModelForSpeechSeq2Seq.from_pretrained(model_id, device=OPENVINO_DEVICE) + model = self.OVMODEL_CLASS.from_pretrained(model_id, device=OPENVINO_DEVICE) processor = get_preprocessor(model_id) pipe = pipeline( "automatic-speech-recognition", @@ -356,9 +409,18 @@ def test_pipeline(self, model_arch): gc.collect() -class OVModelForVision2SeqIntegrationTest(unittest.TestCase): - SUPPORTED_ARCHITECTURES = ["vision-encoder-decoder", "trocr", "donut"] +class OVModelForVision2SeqIntegrationTest(OVSeq2SeqTestMixin): + SUPPORTED_ARCHITECTURES = [ + "donut", + "got_ocr2", + "pix2struct", + "trocr", + "vision-encoder-decoder", + ] + TASK = "image-to-text" + OVMODEL_CLASS = OVModelForVision2Seq + AUTOMODEL_CLASS = AutoModelForVision2Seq GENERATION_LENGTH = 100 SPEEDUP_CACHE = 1.1 @@ -373,9 +435,12 @@ def _get_preprocessors(self, model_id): return image_processor, tokenizer + def test_find_untested_architectures(self): + self._test_find_untested_architectures() + def test_load_vanilla_transformers_which_is_not_supported(self): with self.assertRaises(Exception) as context: - _ = OVModelForVision2Seq.from_pretrained(MODEL_NAMES["bert"], export=True, device=OPENVINO_DEVICE) + _ = self.OVMODEL_CLASS.from_pretrained(MODEL_NAMES["bert"], export=True, device=OPENVINO_DEVICE) self.assertIn("only supports the tasks", str(context.exception)) @@ -384,7 +449,7 @@ def test_load_vanilla_transformers_which_is_not_supported(self): @slow def test_generate_utils(self, model_arch: str): model_id = MODEL_NAMES[model_arch] - model = OVModelForVision2Seq.from_pretrained(model_id, export=True, device=OPENVINO_DEVICE) + model = self.OVMODEL_CLASS.from_pretrained(model_id, export=True, device=OPENVINO_DEVICE) feature_extractor, tokenizer = self._get_preprocessors(model_id) data = self._get_sample_image() @@ -399,17 +464,11 @@ def test_generate_utils(self, model_arch: str): @parameterized.expand(SUPPORTED_ARCHITECTURES) def test_compare_to_transformers(self, model_arch: str): model_id = MODEL_NAMES[model_arch] - ov_model = OVModelForVision2Seq.from_pretrained(model_id, export=True, device=OPENVINO_DEVICE) - - self.assertIsInstance(ov_model.encoder, OVEncoder) - - self.assertIsInstance(ov_model.decoder, OVDecoder) - self.assertIsInstance(ov_model.decoder_with_past, OVDecoder) - - self.assertIsInstance(ov_model.config, PretrainedConfig) + ov_model = self.OVMODEL_CLASS.from_pretrained(model_id, export=True, device=OPENVINO_DEVICE) + self._check_openvino_model_attributes(ov_model, use_cache=True, stateful=False) set_seed(SEED) - transformers_model = AutoModelForVision2Seq.from_pretrained(model_id) + transformers_model = self.AUTOMODEL_CLASS.from_pretrained(model_id) feature_extractor, tokenizer = self._get_preprocessors(model_id) data = self._get_sample_image() @@ -442,7 +501,7 @@ def test_compare_to_transformers(self, model_arch: str): def test_pipeline(self, model_arch: str): set_seed(SEED) model_id = MODEL_NAMES[model_arch] - ov_model = OVModelForVision2Seq.from_pretrained(model_id, compile=False, device=OPENVINO_DEVICE) + ov_model = self.OVMODEL_CLASS.from_pretrained(model_id, compile=False, device=OPENVINO_DEVICE) feature_extractor, tokenizer = self._get_preprocessors(model_id) ov_model.reshape(1, -1) ov_model.compile() @@ -465,7 +524,7 @@ def test_pipeline(self, model_arch: str): gc.collect() -class OVModelForVisualCausalLMIntegrationTest(unittest.TestCase): +class OVModelForVisualCausalLMIntegrationTest(OVSeq2SeqTestMixin): SUPPORTED_ARCHITECTURES = [ "internvl_chat", "llava", @@ -479,6 +538,8 @@ class OVModelForVisualCausalLMIntegrationTest(unittest.TestCase): ] SUPPORT_VIDEO = ["llava_next_video", "qwen2_vl"] SUPPORT_AUDIO = [] + OVMODEL_CLASS = OVModelForVisualCausalLM + TASK = "image-text-to-text" if is_transformers_version(">=", "4.46.0"): SUPPORTED_ARCHITECTURES += ["maira2", "idefics3"] @@ -490,7 +551,7 @@ class OVModelForVisualCausalLMIntegrationTest(unittest.TestCase): if is_transformers_version(">", "4.49"): SUPPORTED_ARCHITECTURES += ["gemma3", "smolvlm"] if is_transformers_version(">=", "4.51"): - SUPPORTED_ARCHITECTURES += ["llama4"] + SUPPORTED_ARCHITECTURES += ["llama4", "phi4_multimodal"] if is_transformers_version("<", "4.52"): SUPPORTED_ARCHITECTURES += ["minicpmo"] @@ -498,9 +559,7 @@ class OVModelForVisualCausalLMIntegrationTest(unittest.TestCase): # remote code models differs after transformers v4.54 SUPPORTED_ARCHITECTURES = set(SUPPORTED_ARCHITECTURES) - {"llava-qwen2", "phi3_v", "phi4mm"} - TASK = "image-text-to-text" REMOTE_CODE_MODELS = ["internvl_chat", "minicpmv", "minicpmo", "llava-qwen2", "phi3_v", "maira2", "phi4mm"] - IMAGE = Image.open( requests.get( TEST_IMAGE_URL, @@ -551,6 +610,23 @@ def _check_device_and_request(self, ov_model, expected_device, has_request): self.assertEqual(component._device, expected_device) request_check_fn(component.request is None) + def _check_openvino_model_attributes(self, openvino_model, use_cache: bool = True, stateful: bool = True): + self.assertIsInstance(openvino_model, self.OVMODEL_CLASS) + self.assertIsInstance(openvino_model.config, PretrainedConfig) + self.assertIsInstance(openvino_model.generation_config, GenerationConfig) + self.assertIsInstance(openvino_model, MODEL_TYPE_TO_CLS_MAPPING[openvino_model.config.model_type]) + + for component_name, component in openvino_model.components.items(): + self.assertIsInstance(component, MODEL_PARTS_CLS_MAPPING[component_name]) + self.assertIsInstance(component.model, openvino.Model) + + self.assertEqual(openvino_model.use_cache, use_cache) + self.assertEqual(openvino_model.language_model.stateful, stateful) + self.assertEqual(model_has_state(openvino_model.language_model.model), stateful) + + def test_find_untested_architectures(self): + self._test_find_untested_architectures() + @parameterized.expand(SUPPORTED_ARCHITECTURES) def test_compare_to_transformers(self, model_arch): prompt = "What is shown in this image?" @@ -558,33 +634,29 @@ def test_compare_to_transformers(self, model_arch): set_seed(SEED) loading_kwargs = {} + trust_remote_code = model_arch in self.REMOTE_CODE_MODELS if "llama4" in model_arch: loading_kwargs = {"_attn_implementation": "sdpa"} transformers_model = self.get_transformer_model_class(model_arch).from_pretrained( - model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS, **loading_kwargs + model_id, trust_remote_code=trust_remote_code, **loading_kwargs ) transformers_model.eval() if "internvl_chat" in model_arch: - tokenizer = AutoTokenizer.from_pretrained( - model_id, trast_remote_code=model_arch in self.REMOTE_CODE_MODELS - ) + tokenizer = AutoTokenizer.from_pretrained(model_id, trast_remote_code=trust_remote_code) img_context_token_id = tokenizer.convert_tokens_to_ids("") transformers_model.img_context_token_id = img_context_token_id if "llava-qwen2" in model_arch: transformers_model.get_vision_tower().load_model() preprocessors = self.get_preprocessors(model_arch) set_seed(SEED) - ov_model = OVModelForVisualCausalLM.from_pretrained( + ov_model = self.OVMODEL_CLASS.from_pretrained( model_id, export=True, - trust_remote_code=model_arch in self.REMOTE_CODE_MODELS, + trust_remote_code=trust_remote_code, compile=False, device=OPENVINO_DEVICE, ) - self.assertIsInstance(ov_model, MODEL_TYPE_TO_CLS_MAPPING[ov_model.config.model_type]) - for component_name, component in ov_model.components.items(): - self.assertIsInstance(component, MODEL_PARTS_CLS_MAPPING[component_name]) - self.assertIsInstance(ov_model.config, PretrainedConfig) + self._check_openvino_model_attributes(ov_model, use_cache=True, stateful=True) inputs = ov_model.preprocess_inputs(**preprocessors, text=prompt, image=self.IMAGE.resize((600, 600))) if model_arch == "gemma3": @@ -743,7 +815,8 @@ def test_compare_to_transformers(self, model_arch): def test_llava_with_new_preprocessing(self, model_arch): prompt = "\n What is shown in this image?" model_id = MODEL_NAMES[model_arch] - config = AutoConfig.from_pretrained(model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS) + trust_remote_code = model_arch in self.REMOTE_CODE_MODELS + config = AutoConfig.from_pretrained(model_id, trust_remote_code=trust_remote_code) processor = AutoProcessor.from_pretrained( model_id, patch_size=config.vision_config.patch_size, @@ -752,8 +825,8 @@ def test_llava_with_new_preprocessing(self, model_arch): num_additional_image_tokens=1, ) transformers_model = self.get_transformer_model_class(model_arch).from_pretrained(model_id) - ov_model = OVModelForVisualCausalLM.from_pretrained( - model_id, export=True, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS, device=OPENVINO_DEVICE + ov_model = self.OVMODEL_CLASS.from_pretrained( + model_id, export=True, trust_remote_code=trust_remote_code, device=OPENVINO_DEVICE ) self.assertTrue(ov_model._support_new_processing) self.assertTrue(processor.patch_size is not None) @@ -797,11 +870,11 @@ def test_llava_with_new_preprocessing(self, model_arch): @parameterized.expand(SUPPORTED_ARCHITECTURES) def test_generate_utils(self, model_arch): model_id = MODEL_NAMES[model_arch] - model = OVModelForVisualCausalLM.from_pretrained( - model_id, export=True, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS, device=OPENVINO_DEVICE + trust_remote_code = model_arch in self.REMOTE_CODE_MODELS + model = self.OVMODEL_CLASS.from_pretrained( + model_id, export=True, trust_remote_code=trust_remote_code, device=OPENVINO_DEVICE ) - - tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS) + tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=trust_remote_code) question = "Describe image" preprocessors = self.get_preprocessors(model_arch) inputs = model.preprocess_inputs(**preprocessors, text=question, image=self.IMAGE.resize((600, 600))) @@ -892,14 +965,14 @@ def get_preprocessors(self, model_arch): def test_model_can_be_loaded_after_saving(self, model_arch): model_id = MODEL_NAMES[model_arch] with TemporaryDirectory() as save_dir: - ov_model = OVModelForVisualCausalLM.from_pretrained( + ov_model = self.OVMODEL_CLASS.from_pretrained( model_id, compile=False, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS, device=OPENVINO_DEVICE, ) ov_model.save_pretrained(save_dir) - ov_restored_model = OVModelForVisualCausalLM.from_pretrained( + ov_restored_model = self.OVMODEL_CLASS.from_pretrained( save_dir, compile=False, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS, @@ -908,8 +981,10 @@ def test_model_can_be_loaded_after_saving(self, model_arch): self.assertIsInstance(ov_restored_model, type(ov_model)) -class OVModelForTextToSpeechSeq2SeqIntegrationTest(unittest.TestCase): +class OVModelForTextToSpeechSeq2SeqIntegrationTest(OVSeq2SeqTestMixin): SUPPORTED_ARCHITECTURES = ("speecht5",) + OVMODEL_CLASS = OVModelForTextToSpeechSeq2Seq + AUTOMODEL_CLASS = AutoModelForTextToSpectrogram def _generate_text(self): return "This text is converted to speech using OpenVINO backend" @@ -925,12 +1000,6 @@ def _get_processor(self, model_id, model_arch): else: raise Exception("{} unknown processor for text-to-speech".format(model_arch)) - def _get_model(self, model_id, model_arch): - if model_arch == "speecht5": - return AutoModelForTextToSpectrogram.from_pretrained(model_id) - else: - raise Exception("{} unknown model for text-to-speech".format(model_arch)) - def _get_vocoder(self, vocoder_id, model_arch): if model_arch == "speecht5": from transformers import SpeechT5HifiGan @@ -940,6 +1009,23 @@ def _get_vocoder(self, vocoder_id, model_arch): else: raise Exception("{} unknown model for text-to-speech".format(model_arch)) + def _check_openvino_model_attributes(self, openvino_model, use_cache: bool = True): + self.assertIsInstance(openvino_model, self.OVMODEL_CLASS) + self.assertIsInstance(openvino_model.config, PretrainedConfig) + self.assertIsInstance(openvino_model.generation_config, GenerationConfig) + + self.assertIsInstance(openvino_model.encoder, OVTextToSpeechEncoder) + self.assertIsInstance(openvino_model.decoder, OVTextToSpeechDecoder) + self.assertIsInstance(openvino_model.postnet, OVTextToSpeechPostNet) + self.assertIsInstance(openvino_model.vocoder, OVTextToSpeechVocoder) + self.assertIsInstance(openvino_model.encoder.model, openvino.Model) + self.assertIsInstance(openvino_model.decoder.model, openvino.Model) + self.assertIsInstance(openvino_model.postnet.model, openvino.Model) + self.assertIsInstance(openvino_model.vocoder.model, openvino.Model) + + self.assertEqual(openvino_model.use_cache, use_cache) + self.assertEqual(model_has_state(openvino_model.decoder.model), use_cache) + @parameterized.expand(SUPPORTED_ARCHITECTURES) def test_compare_to_transformers(self, model_arch): set_seed(SEED) @@ -947,24 +1033,18 @@ def test_compare_to_transformers(self, model_arch): speaker_embeddings = self._generate_speaker_embedding() model_id = MODEL_NAMES[model_arch] - if model_arch == "speecht5": - # since Auto class for text-to-audio is not implemented in optimum - # generate model classes for reference generation - vocoder_id = "fxmarty/speecht5-hifigan-tiny" - processor = self._get_processor(model_id, model_arch) - model = self._get_model(model_id, model_arch) - vocoder = self._get_vocoder(vocoder_id, model_arch) - inputs = processor(text=text_data, return_tensors="pt") - ref_speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder) - ref_speech = ref_speech.unsqueeze(0) if ref_speech.dim() == 1 else ref_speech - else: - raise Exception("{} unknown model for text-to-speech".format(model_arch)) - - ov_pipe = OVModelForTextToSpeechSeq2Seq.from_pretrained(model_id, vocoder=vocoder_id, device=OPENVINO_DEVICE) - ov_speech = ov_pipe.generate(input_ids=inputs["input_ids"], speaker_embeddings=speaker_embeddings) - - self.assertIsInstance(ov_pipe.config, PretrainedConfig) - self.assertTrue(model_has_state(ov_pipe.decoder.model)) + # since Auto class for text-to-audio is not implemented in optimum + # generate model classes for reference generation + vocoder_id = "fxmarty/speecht5-hifigan-tiny" + processor = self._get_processor(model_id, model_arch) + vocoder = self._get_vocoder(vocoder_id, model_arch) + model = self.AUTOMODEL_CLASS.from_pretrained(model_id) + inputs = processor(text=text_data, return_tensors="pt") + ref_speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder) + ref_speech = ref_speech.unsqueeze(0) if ref_speech.dim() == 1 else ref_speech + ov_model = self.OVMODEL_CLASS.from_pretrained(model_id, vocoder=vocoder_id, device=OPENVINO_DEVICE) + ov_speech = ov_model.generate(input_ids=inputs["input_ids"], speaker_embeddings=speaker_embeddings) + self._check_openvino_model_attributes(ov_model, use_cache=True) self.assertTrue(torch.allclose(ov_speech, ref_speech, atol=1e-3)) del vocoder @@ -973,10 +1053,11 @@ def test_compare_to_transformers(self, model_arch): gc.collect() -class OVModelForPix2StructIntegrationTest(unittest.TestCase): +class OVModelForPix2StructIntegrationTest(OVSeq2SeqTestMixin): SUPPORTED_ARCHITECTURES = ["pix2struct"] TASK = "image-to-text" # is it fine as well with visual-question-answering? - + OVMODEL_CLASS = OVModelForPix2Struct + AUTOMODEL_CLASS = Pix2StructForConditionalGeneration GENERATION_LENGTH = 100 SPEEDUP_CACHE = 1.1 @@ -991,17 +1072,13 @@ class OVModelForPix2StructIntegrationTest(unittest.TestCase): def test_compare_to_transformers(self, model_arch): model_id = MODEL_NAMES[model_arch] set_seed(SEED) - ov_model = OVModelForPix2Struct.from_pretrained( + ov_model = self.OVMODEL_CLASS.from_pretrained( model_id, export=True, ov_config=F32_CONFIG, device=OPENVINO_DEVICE ) - - self.assertIsInstance(ov_model.encoder, OVEncoder) - self.assertIsInstance(ov_model.decoder, OVDecoder) - self.assertIsInstance(ov_model.decoder_with_past, OVDecoder) - self.assertIsInstance(ov_model.config, PretrainedConfig) + self._check_openvino_model_attributes(ov_model, use_cache=True, stateful=False) question = "Who am I?" - transformers_model = Pix2StructForConditionalGeneration.from_pretrained(model_id) + transformers_model = self.AUTOMODEL_CLASS.from_pretrained(model_id) preprocessor = get_preprocessor(model_id) inputs = preprocessor(images=self.IMAGE, text=question, padding=True, return_tensors="pt") @@ -1022,7 +1099,7 @@ def test_compare_to_transformers(self, model_arch): @parameterized.expand(SUPPORTED_ARCHITECTURES) def test_generate_utils(self, model_arch): model_id = MODEL_NAMES[model_arch] - model = OVModelForPix2Struct.from_pretrained(model_id, export=True, device=OPENVINO_DEVICE) + model = self.OVMODEL_CLASS.from_pretrained(model_id, export=True, device=OPENVINO_DEVICE) preprocessor = get_preprocessor(model_id) question = "Who am I?" inputs = preprocessor(images=self.IMAGE, text=question, return_tensors="pt") @@ -1040,8 +1117,7 @@ def test_compare_with_and_without_past_key_values(self): preprocessor = get_preprocessor(model_id) question = "Who am I?" inputs = preprocessor(images=self.IMAGE, text=question, return_tensors="pt") - - model_with_pkv = OVModelForPix2Struct.from_pretrained( + model_with_pkv = self.OVMODEL_CLASS.from_pretrained( model_id, export=True, use_cache=True, device=OPENVINO_DEVICE ) _ = model_with_pkv.generate(**inputs) # warmup @@ -1049,8 +1125,7 @@ def test_compare_with_and_without_past_key_values(self): outputs_model_with_pkv = model_with_pkv.generate( **inputs, min_length=self.GENERATION_LENGTH, max_length=self.GENERATION_LENGTH, num_beams=1 ) - - model_without_pkv = OVModelForPix2Struct.from_pretrained( + model_without_pkv = self.OVMODEL_CLASS.from_pretrained( model_id, export=True, use_cache=False, device=OPENVINO_DEVICE ) _ = model_without_pkv.generate(**inputs) # warmup diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py index 7801ba17bb..5e2a2cd0ad 100644 --- a/tests/openvino/utils_tests.py +++ b/tests/openvino/utils_tests.py @@ -75,6 +75,7 @@ "donut-swin": "optimum-intel-internal-testing/tiny-random-DonutSwinModel", "detr": "optimum-intel-internal-testing/tiny-random-DetrModel", "electra": "optimum-intel-internal-testing/tiny-random-electra", + "encoder-decoder": "optimum-internal-testing/tiny-random-encoder-decoder-gpt2-bert", "esm": "optimum-intel-internal-testing/tiny-random-EsmModel", "exaone": "optimum-intel-internal-testing/tiny-random-exaone", "gemma": "optimum-intel-internal-testing/tiny-random-GemmaForCausalLM", @@ -84,7 +85,7 @@ "gemma3": "optimum-intel-internal-testing/tiny-random-gemma3", "falcon": "optimum-intel-internal-testing/really-tiny-falcon-testing", "falcon-40b": "optimum-intel-internal-testing/tiny-random-falcon-40b", - "falcon-mamba": "optimum-intel-internal-testing/tiny-falcon-mamba", + "falcon_mamba": "optimum-intel-internal-testing/tiny-falcon-mamba", "flaubert": "optimum-intel-internal-testing/tiny-random-flaubert", "flux": "optimum-intel-internal-testing/tiny-random-flux", "flux-fill": "optimum-intel-internal-testing/tiny-random-flux-fill", @@ -97,7 +98,8 @@ "gpt_oss_mxfp4": "optimum-intel-internal-testing/tiny-random-gpt-oss-mxfp4", "gptj": "optimum-intel-internal-testing/tiny-random-GPTJModel", "granite": "optimum-intel-internal-testing/tiny-random-granite", - "granite-moe": "optimum-intel-internal-testing/tiny-random-granite-moe", + "granitemoe": "optimum-intel-internal-testing/tiny-random-granite-moe", + "helium": "hf-internal-testing/tiny-random-HeliumForCausalLM", "hubert": "optimum-intel-internal-testing/tiny-random-HubertModel", "ibert": "optimum-intel-internal-testing/tiny-random-ibert", "idefics3": "optimum-intel-internal-testing/tiny-random-Idefics3ForConditionalGeneration", @@ -106,15 +108,17 @@ "internvl_chat": "optimum-intel-internal-testing/tiny-random-internvl2", "jais": "optimum-intel-internal-testing/tiny-random-jais", "levit": "optimum-intel-internal-testing/tiny-random-LevitModel", - "longt5": "optimum-intel-internal-testing/tiny-random-longt5", + "longt5": "hf-internal-testing/tiny-random-LongT5Model", "llama": "optimum-intel-internal-testing/tiny-random-LlamaForCausalLM", "llama_awq": "optimum-intel-internal-testing/tiny-random-LlamaForCausalLM", "llama4": "optimum-intel-internal-testing/tiny-random-llama4", + "llama4_text": "trl-internal-testing/tiny-Llama4ForCausalLM", "llava": "optimum-intel-internal-testing/tiny-random-llava", "llava_next": "optimum-intel-internal-testing/tiny-random-llava-next", "llava_next_mistral": "optimum-intel-internal-testing/tiny-random-llava-next-mistral", "llava_next_video": "optimum-intel-internal-testing/tiny-random-llava-next-video", "m2m_100": "optimum-intel-internal-testing/tiny-random-m2m_100", + "olmo2": "hf-internal-testing/tiny-random-Olmo2ForCausalLM", "opt": "optimum-intel-internal-testing/tiny-random-OPTModel", "opt125m": "optimum-intel-internal-testing/opt-125m", "opt_gptq": "optimum-intel-internal-testing/opt-125m-gptq-4bit", @@ -139,6 +143,7 @@ "mt5": "optimum-intel-internal-testing/mt5-tiny-random", "llava-qwen2": "optimum-intel-internal-testing/tiny-random-nanollava", "nanollava_vision_tower": "optimum-intel-internal-testing/tiny-random-siglip", + "nemotron": "badaoui/tiny-random-NemotronForCausalLM", "nystromformer": "optimum-intel-internal-testing/tiny-random-NystromformerModel", "olmo": "optimum-intel-internal-testing/tiny-random-olmo-hf", "orion": "optimum-intel-internal-testing/tiny-random-orion", @@ -149,9 +154,10 @@ "pix2struct": "optimum-intel-internal-testing/pix2struct-tiny-random", "phi": "optimum-intel-internal-testing/tiny-random-PhiForCausalLM", "phi3": "optimum-intel-internal-testing/tiny-random-Phi3ForCausalLM", - "phi3-moe": "optimum-intel-internal-testing/phi-3.5-moe-tiny-random", + "phi3moe": "optimum-intel-internal-testing/phi-3.5-moe-tiny-random", "phi3_v": "optimum-intel-internal-testing/tiny-random-phi3-vision", "phi4mm": "optimum-intel-internal-testing/tiny-random-phi-4-multimodal", + "phi4_multimodal": "echarlaix/tiny-random-phi-4-multimodal", "poolformer": "optimum-intel-internal-testing/tiny-random-PoolFormerModel", "qwen": "optimum-intel-internal-testing/tiny-random-qwen", "qwen2": "optimum-intel-internal-testing/tiny-dummy-qwen2", @@ -166,6 +172,7 @@ "segformer": "optimum-intel-internal-testing/tiny-random-SegformerModel", "sentence-transformers-bert": "optimum-intel-internal-testing/stsb-bert-tiny-safetensors", "sam": "optimum-intel-internal-testing/sam-vit-tiny-random", + "smollm3": "optimum-internal-testing/tiny-random-SmolLM3ForCausalLM", "smolvlm": "optimum-intel-internal-testing/tiny-random-smolvlm2", "speecht5": "optimum-intel-internal-testing/tiny-random-SpeechT5ForTextToSpeech", "speech_to_text": "optimum-intel-internal-testing/tiny-random-Speech2TextModel", @@ -330,7 +337,7 @@ }, "clip": {"model": 130}, "mamba": {"model": 386}, - "falcon-mamba": {"model": 194}, + "falcon_mamba": {"model": 194}, "minicpmo": { "lm_model": 16, "text_embeddings_model": 1, @@ -467,10 +474,8 @@ def get_num_sdpa(model): "baichuan2-13b": "baichuan", "chatglm4": "chatglm", "codegen2": "codegen", - "falcon-mamba": "falcon_mamba", "falcon-40b": "falcon", "gpt_oss_mxfp4": "gpt_oss", - "granite-moe": "granitemoe", "llama_awq": "llama", "llava_next_mistral": "llava_next", "mistral-nemo": "mistral", @@ -480,7 +485,6 @@ def get_num_sdpa(model): "opt_gptq": "opt", "perceiver_text": "perceiver", "perceiver_vision": "perceiver", - "phi3-moe": "phimoe", "swin-window": "swin", "vit-with-attentions": "vit", "vit-with-hidden-states": "vit",