From 7015abd588905b27515c2eecf7a65f0f4c0d8780 Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Mon, 9 Jun 2025 13:28:27 +0200 Subject: [PATCH 01/12] Support bitnet models --- optimum/exporters/openvino/__main__.py | 20 ++++++++++++++++++++ optimum/exporters/openvino/model_configs.py | 18 ++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py index 5c6d4addd6..7838cbeff3 100644 --- a/optimum/exporters/openvino/__main__.py +++ b/optimum/exporters/openvino/__main__.py @@ -258,8 +258,11 @@ def main_export( supported_quant_methods = ["gptq"] if is_openvino_version(">=", "2024.6.0"): supported_quant_methods.append("awq") + if is_openvino_version(">=", "2025.3.0"): + supported_quant_methods.append("bitnet") do_quant_patching = quantization_config and quantization_config["quant_method"] in supported_quant_methods do_gptq_patching = do_quant_patching and quantization_config["quant_method"] == "gptq" + do_bitnet_patching = do_quant_patching and quantization_config["quant_method"] == "bitnet" model_type = config.model_type if model_type not in TasksManager._SUPPORTED_MODEL_TYPE: custom_architecture = True @@ -356,6 +359,21 @@ class StoreAttr(object): return model GPTQQuantizer.post_init_model = post_init_model + if do_bitnet_patching: + from transformers.integrations.bitnet import AutoBitLinear, unpack_weights + import functools + + orig_load_hook = AutoBitLinear.load_hook + + # rewrite load hook to save original weight + @functools.wraps(orig_load_hook) + def bitnet_load_hook(self, state_dict, prefix, *args, **kwargs): + if (prefix + "weight") in state_dict and state_dict[prefix + "weight"].dtype != self.weight.dtype: + self.original_weight = state_dict[prefix + "weight"] + state_dict[prefix + "weight"] = unpack_weights(state_dict[prefix + "weight"], dtype=self.weight.dtype).to(torch.device("meta")) + return state_dict + + AutoBitLinear.load_hook = bitnet_load_hook elif library_name == "diffusers" and is_openvino_version(">=", "2024.6"): _loading_kwargs = {} if variant is None else {"variant": variant} if dtype == "auto" or dtype is None: @@ -531,6 +549,8 @@ class StoreAttr(object): torch.cuda.is_available = orig_cuda_check if do_gptq_patching: GPTQQuantizer.post_init_model = orig_post_init_model + if do_bitnet_patching: + AutoBitLinear.load_hook = orig_load_hook def maybe_convert_tokenizers(library_name: str, output: Path, model=None, preprocessors=None, task=None): diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py index b1186b812c..10af2ce54e 100644 --- a/optimum/exporters/openvino/model_configs.py +++ b/optimum/exporters/openvino/model_configs.py @@ -598,6 +598,24 @@ def patch_model_for_export( return OVDecoderModelPatcher(self, model, model_kwargs=model_kwargs) +@register_in_tasks_manager( + "bitnet", + *[ + "feature-extraction", + "feature-extraction-with-past", + "text-generation", + "text-generation-with-past", + "text-classification", + ], + library_name="transformers", +) +class BitnetOpenVINOConfig(LlamaOnnxConfig): + def patch_model_for_export( + self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None + ) -> "ModelPatcher": + return LlamaModelPatcher(self, model, model_kwargs=model_kwargs) + + @register_in_tasks_manager( "exaone", *[ From aded8bc1aa2fe1018a8dfa537bf82d09c4efb9f7 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 11 Jun 2025 14:33:56 +0000 Subject: [PATCH 02/12] Apply style fixes --- optimum/exporters/openvino/__main__.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py index 7838cbeff3..f032e08729 100644 --- a/optimum/exporters/openvino/__main__.py +++ b/optimum/exporters/openvino/__main__.py @@ -360,9 +360,10 @@ class StoreAttr(object): GPTQQuantizer.post_init_model = post_init_model if do_bitnet_patching: - from transformers.integrations.bitnet import AutoBitLinear, unpack_weights import functools + from transformers.integrations.bitnet import AutoBitLinear, unpack_weights + orig_load_hook = AutoBitLinear.load_hook # rewrite load hook to save original weight @@ -370,7 +371,9 @@ class StoreAttr(object): def bitnet_load_hook(self, state_dict, prefix, *args, **kwargs): if (prefix + "weight") in state_dict and state_dict[prefix + "weight"].dtype != self.weight.dtype: self.original_weight = state_dict[prefix + "weight"] - state_dict[prefix + "weight"] = unpack_weights(state_dict[prefix + "weight"], dtype=self.weight.dtype).to(torch.device("meta")) + state_dict[prefix + "weight"] = unpack_weights( + state_dict[prefix + "weight"], dtype=self.weight.dtype + ).to(torch.device("meta")) return state_dict AutoBitLinear.load_hook = bitnet_load_hook From 5c526a2392f32d5ed01d1c22a268ba5ed13ec38f Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Tue, 12 Aug 2025 16:53:02 +0200 Subject: [PATCH 03/12] Fix conversion --- optimum/exporters/openvino/__main__.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py index f032e08729..ead8745d53 100644 --- a/optimum/exporters/openvino/__main__.py +++ b/optimum/exporters/openvino/__main__.py @@ -360,20 +360,16 @@ class StoreAttr(object): GPTQQuantizer.post_init_model = post_init_model if do_bitnet_patching: - import functools - - from transformers.integrations.bitnet import AutoBitLinear, unpack_weights + from transformers.integrations.bitnet import AutoBitLinear orig_load_hook = AutoBitLinear.load_hook # rewrite load hook to save original weight - @functools.wraps(orig_load_hook) def bitnet_load_hook(self, state_dict, prefix, *args, **kwargs): if (prefix + "weight") in state_dict and state_dict[prefix + "weight"].dtype != self.weight.dtype: self.original_weight = state_dict[prefix + "weight"] - state_dict[prefix + "weight"] = unpack_weights( - state_dict[prefix + "weight"], dtype=self.weight.dtype - ).to(torch.device("meta")) + w_shape = self.original_weight.shape + state_dict[prefix + "weight"] = torch.empty((w_shape[0] * 4, w_shape[1]), dtype=self.weight.dtype, device="meta") return state_dict AutoBitLinear.load_hook = bitnet_load_hook From 46ed11303ab651e772607225e3babbf07b80e506 Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Tue, 12 Aug 2025 16:46:53 +0200 Subject: [PATCH 04/12] Update optimum/exporters/openvino/model_configs.py --- optimum/exporters/openvino/model_configs.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py index 10af2ce54e..013becd867 100644 --- a/optimum/exporters/openvino/model_configs.py +++ b/optimum/exporters/openvino/model_configs.py @@ -601,11 +601,8 @@ def patch_model_for_export( @register_in_tasks_manager( "bitnet", *[ - "feature-extraction", - "feature-extraction-with-past", "text-generation", "text-generation-with-past", - "text-classification", ], library_name="transformers", ) From eab2dbd5f6397e293ea20384c6793d37b59c1b1b Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Wed, 13 Aug 2025 18:45:33 +0200 Subject: [PATCH 05/12] Fix patcher name --- optimum/exporters/openvino/model_configs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py index 013becd867..d71db57557 100644 --- a/optimum/exporters/openvino/model_configs.py +++ b/optimum/exporters/openvino/model_configs.py @@ -610,7 +610,7 @@ class BitnetOpenVINOConfig(LlamaOnnxConfig): def patch_model_for_export( self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None ) -> "ModelPatcher": - return LlamaModelPatcher(self, model, model_kwargs=model_kwargs) + return OVDecoderModelPatcher(self, model, model_kwargs=model_kwargs) @register_in_tasks_manager( From cd3bbb328cc1c36b37b3d9055c362ce3b5073c4a Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Tue, 19 Aug 2025 18:53:33 +0200 Subject: [PATCH 06/12] Add test --- tests/openvino/test_modeling.py | 4 ++++ tests/openvino/utils_tests.py | 1 + 2 files changed, 5 insertions(+) diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index a3c8247455..03f24bf5ea 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -137,6 +137,7 @@ ) from optimum.utils.testing_utils import require_diffusers +torch.compile = lambda func: func # Mock torch.compile to avoid compilation errors in tests TENSOR_ALIAS_TO_TYPE = { "pt": torch.Tensor, @@ -1185,6 +1186,8 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): if is_transformers_version(">=", "4.53.0"): SUPPORTED_ARCHITECTURES += ("arcee",) + if is_openvino_version(">=", "2025.3.0"): + SUPPORTED_ARCHITECTURES += ("bitnet",) if is_transformers_version(">=", "4.54.0"): SUPPORTED_ARCHITECTURES += ("ernie4_5",) @@ -1278,6 +1281,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "falcon-mamba": 0, "arcee": 2, "ernie4_5": 2, + "bitnet": 6, } # TODO: remove gptq/awq from here diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py index 8d8ba3e098..ed5a3fe7e6 100644 --- a/tests/openvino/utils_tests.py +++ b/tests/openvino/utils_tests.py @@ -38,6 +38,7 @@ "baichuan2-13b": "katuni4ka/tiny-random-baichuan2-13b", "bigbird_pegasus": "hf-internal-testing/tiny-random-bigbird_pegasus", "biogpt": "hf-tiny-model-private/tiny-random-BioGptForCausalLM", + "bitnet": "mvafin/tiny-bitnet", "blenderbot-small": "hf-internal-testing/tiny-random-BlenderbotModel", "blenderbot": "hf-internal-testing/tiny-random-BlenderbotModel", "bloom": "hf-internal-testing/tiny-random-BloomModel", From 31981ea8b4d8abef39627cd8cd8ae637ef5985e9 Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Wed, 20 Aug 2025 11:21:48 +0200 Subject: [PATCH 07/12] Fix style --- optimum/exporters/openvino/__main__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py index ead8745d53..8db5b9e65d 100644 --- a/optimum/exporters/openvino/__main__.py +++ b/optimum/exporters/openvino/__main__.py @@ -369,7 +369,9 @@ def bitnet_load_hook(self, state_dict, prefix, *args, **kwargs): if (prefix + "weight") in state_dict and state_dict[prefix + "weight"].dtype != self.weight.dtype: self.original_weight = state_dict[prefix + "weight"] w_shape = self.original_weight.shape - state_dict[prefix + "weight"] = torch.empty((w_shape[0] * 4, w_shape[1]), dtype=self.weight.dtype, device="meta") + state_dict[prefix + "weight"] = torch.empty( + (w_shape[0] * 4, w_shape[1]), dtype=self.weight.dtype, device="meta" + ) return state_dict AutoBitLinear.load_hook = bitnet_load_hook From 84174006abab1e0f83c30ff397007e71debdda8e Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 20 Aug 2025 12:11:29 +0000 Subject: [PATCH 08/12] Apply style fixes --- tests/openvino/test_modeling.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index becf8bae1a..784afe5050 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -137,6 +137,7 @@ ) from optimum.utils.testing_utils import require_diffusers + torch.compile = lambda func: func # Mock torch.compile to avoid compilation errors in tests TENSOR_ALIAS_TO_TYPE = { From 6f270d6de8a56fbd4b81d3e94040d9793bcaa54b Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Wed, 22 Oct 2025 12:08:38 +0200 Subject: [PATCH 09/12] Return test after merge --- tests/openvino/test_decoder.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/openvino/test_decoder.py b/tests/openvino/test_decoder.py index 45a9f59ca6..9096137b7e 100644 --- a/tests/openvino/test_decoder.py +++ b/tests/openvino/test_decoder.py @@ -25,10 +25,11 @@ from optimum.intel.pipelines import pipeline as optimum_pipeline from optimum.intel.utils.import_utils import is_openvino_version, is_transformers_version - if is_transformers_version(">=", "4.55"): from transformers import Mxfp4Config +torch.compile = lambda func: func # Mock torch.compile to avoid compilation errors in tests + SEED = 42 F32_CONFIG = {"INFERENCE_PRECISION_HINT": "f32"} TENSOR_ALIAS_TO_TYPE = {"pt": torch.Tensor, "np": np.ndarray} @@ -120,6 +121,8 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): if is_transformers_version(">=", "4.53.0"): SUPPORTED_ARCHITECTURES += ("arcee",) + if is_openvino_version(">=", "2025.3.0"): + SUPPORTED_ARCHITECTURES += ("bitnet",) if is_transformers_version(">=", "4.54.0"): # remote code models differs after transformers v4.54 @@ -216,6 +219,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "mamba": 0, "falcon-mamba": 0, "arcee": 2, + "bitnet": 6, } # TODO: remove gptq/awq from here From b4bb1ceefaa81656e5b39aaa7ba743ed1d8894b4 Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Fri, 24 Oct 2025 10:34:40 +0200 Subject: [PATCH 10/12] Apply suggestions from code review Co-authored-by: Nikita Savelyev --- optimum/exporters/openvino/__main__.py | 2 +- tests/openvino/test_decoder.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py index 8209e52492..5f55bbbce4 100644 --- a/optimum/exporters/openvino/__main__.py +++ b/optimum/exporters/openvino/__main__.py @@ -272,7 +272,7 @@ def main_export( supported_quant_methods = ["gptq"] if is_openvino_version(">=", "2024.6.0"): supported_quant_methods.append("awq") - if is_openvino_version(">=", "2025.3.0"): + if is_openvino_version(">=", "2025.4.0"): supported_quant_methods.append("bitnet") do_quant_patching = quant_method in supported_quant_methods do_gptq_patching = quant_method == "gptq" diff --git a/tests/openvino/test_decoder.py b/tests/openvino/test_decoder.py index 9096137b7e..8f375846ab 100644 --- a/tests/openvino/test_decoder.py +++ b/tests/openvino/test_decoder.py @@ -121,7 +121,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): if is_transformers_version(">=", "4.53.0"): SUPPORTED_ARCHITECTURES += ("arcee",) - if is_openvino_version(">=", "2025.3.0"): + if is_openvino_version(">=", "2025.4.0"): SUPPORTED_ARCHITECTURES += ("bitnet",) if is_transformers_version(">=", "4.54.0"): From 73bdf8b73c2f9d34b28a1422f9b446761b0ad04b Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Thu, 30 Oct 2025 12:44:36 +0100 Subject: [PATCH 11/12] Move model --- tests/openvino/utils_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py index 3e9f343b3e..c14b414e18 100644 --- a/tests/openvino/utils_tests.py +++ b/tests/openvino/utils_tests.py @@ -49,7 +49,7 @@ "baichuan2-13b": "optimum-intel-internal-testing/tiny-random-baichuan2-13b", "bigbird_pegasus": "optimum-intel-internal-testing/tiny-random-bigbird_pegasus", "biogpt": "optimum-intel-internal-testing/tiny-random-BioGptForCausalLM", - "bitnet": "mvafin/tiny-bitnet", + "bitnet": "optimum-intel-internal-testing/tiny-random-bitnet", "blenderbot-small": "optimum-intel-internal-testing/tiny-random-BlenderbotModel", "blenderbot": "optimum-intel-internal-testing/tiny-random-BlenderbotModel", "bloom": "optimum-intel-internal-testing/tiny-random-BloomModel", From 27e82648ae3c73d22f4b059191a9136b4cbddcab Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Thu, 30 Oct 2025 13:23:19 +0100 Subject: [PATCH 12/12] Fix style --- tests/openvino/test_decoder.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/openvino/test_decoder.py b/tests/openvino/test_decoder.py index cd06aa78eb..581fc4cb97 100644 --- a/tests/openvino/test_decoder.py +++ b/tests/openvino/test_decoder.py @@ -30,6 +30,7 @@ torch.compile = lambda func: func # Mock torch.compile to avoid compilation errors in tests + class OVModelForCausalLMIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES = ( "bart",