From 7015abd588905b27515c2eecf7a65f0f4c0d8780 Mon Sep 17 00:00:00 2001
From: Maxim Vafin <maxim.vafin@intel.com>
Date: Mon, 9 Jun 2025 13:28:27 +0200
Subject: [PATCH 01/12] Support bitnet models

---
 optimum/exporters/openvino/__main__.py      | 20 ++++++++++++++++++++
 optimum/exporters/openvino/model_configs.py | 18 ++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py
index 5c6d4addd6..7838cbeff3 100644
--- a/optimum/exporters/openvino/__main__.py
+++ b/optimum/exporters/openvino/__main__.py
@@ -258,8 +258,11 @@ def main_export(
         supported_quant_methods = ["gptq"]
         if is_openvino_version(">=", "2024.6.0"):
             supported_quant_methods.append("awq")
+        if is_openvino_version(">=", "2025.3.0"):
+            supported_quant_methods.append("bitnet")
         do_quant_patching = quantization_config and quantization_config["quant_method"] in supported_quant_methods
         do_gptq_patching = do_quant_patching and quantization_config["quant_method"] == "gptq"
+        do_bitnet_patching = do_quant_patching and quantization_config["quant_method"] == "bitnet"
         model_type = config.model_type
         if model_type not in TasksManager._SUPPORTED_MODEL_TYPE:
             custom_architecture = True
@@ -356,6 +359,21 @@ class StoreAttr(object):
                     return model
 
                 GPTQQuantizer.post_init_model = post_init_model
+            if do_bitnet_patching:
+                from transformers.integrations.bitnet import AutoBitLinear, unpack_weights
+                import functools
+
+                orig_load_hook = AutoBitLinear.load_hook
+
+                # rewrite load hook to save original weight
+                @functools.wraps(orig_load_hook)
+                def bitnet_load_hook(self, state_dict, prefix, *args, **kwargs):
+                    if (prefix + "weight") in state_dict and state_dict[prefix + "weight"].dtype != self.weight.dtype:
+                        self.original_weight = state_dict[prefix + "weight"]
+                        state_dict[prefix + "weight"] = unpack_weights(state_dict[prefix + "weight"], dtype=self.weight.dtype).to(torch.device("meta"))
+                    return state_dict
+
+                AutoBitLinear.load_hook = bitnet_load_hook
     elif library_name == "diffusers" and is_openvino_version(">=", "2024.6"):
         _loading_kwargs = {} if variant is None else {"variant": variant}
         if dtype == "auto" or dtype is None:
@@ -531,6 +549,8 @@ class StoreAttr(object):
             torch.cuda.is_available = orig_cuda_check
             if do_gptq_patching:
                 GPTQQuantizer.post_init_model = orig_post_init_model
+            if do_bitnet_patching:
+                AutoBitLinear.load_hook = orig_load_hook
 
 
 def maybe_convert_tokenizers(library_name: str, output: Path, model=None, preprocessors=None, task=None):
diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py
index b1186b812c..10af2ce54e 100644
--- a/optimum/exporters/openvino/model_configs.py
+++ b/optimum/exporters/openvino/model_configs.py
@@ -598,6 +598,24 @@ def patch_model_for_export(
         return OVDecoderModelPatcher(self, model, model_kwargs=model_kwargs)
 
 
+@register_in_tasks_manager(
+    "bitnet",
+    *[
+        "feature-extraction",
+        "feature-extraction-with-past",
+        "text-generation",
+        "text-generation-with-past",
+        "text-classification",
+    ],
+    library_name="transformers",
+)
+class BitnetOpenVINOConfig(LlamaOnnxConfig):
+    def patch_model_for_export(
+        self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None
+    ) -> "ModelPatcher":
+        return LlamaModelPatcher(self, model, model_kwargs=model_kwargs)
+
+
 @register_in_tasks_manager(
     "exaone",
     *[

From aded8bc1aa2fe1018a8dfa537bf82d09c4efb9f7 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Wed, 11 Jun 2025 14:33:56 +0000
Subject: [PATCH 02/12] Apply style fixes

---
 optimum/exporters/openvino/__main__.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py
index 7838cbeff3..f032e08729 100644
--- a/optimum/exporters/openvino/__main__.py
+++ b/optimum/exporters/openvino/__main__.py
@@ -360,9 +360,10 @@ class StoreAttr(object):
 
                 GPTQQuantizer.post_init_model = post_init_model
             if do_bitnet_patching:
-                from transformers.integrations.bitnet import AutoBitLinear, unpack_weights
                 import functools
 
+                from transformers.integrations.bitnet import AutoBitLinear, unpack_weights
+
                 orig_load_hook = AutoBitLinear.load_hook
 
                 # rewrite load hook to save original weight
@@ -370,7 +371,9 @@ class StoreAttr(object):
                 def bitnet_load_hook(self, state_dict, prefix, *args, **kwargs):
                     if (prefix + "weight") in state_dict and state_dict[prefix + "weight"].dtype != self.weight.dtype:
                         self.original_weight = state_dict[prefix + "weight"]
-                        state_dict[prefix + "weight"] = unpack_weights(state_dict[prefix + "weight"], dtype=self.weight.dtype).to(torch.device("meta"))
+                        state_dict[prefix + "weight"] = unpack_weights(
+                            state_dict[prefix + "weight"], dtype=self.weight.dtype
+                        ).to(torch.device("meta"))
                     return state_dict
 
                 AutoBitLinear.load_hook = bitnet_load_hook

From 5c526a2392f32d5ed01d1c22a268ba5ed13ec38f Mon Sep 17 00:00:00 2001
From: Maxim Vafin <maxim.vafin@intel.com>
Date: Tue, 12 Aug 2025 16:53:02 +0200
Subject: [PATCH 03/12] Fix conversion

---
 optimum/exporters/openvino/__main__.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py
index f032e08729..ead8745d53 100644
--- a/optimum/exporters/openvino/__main__.py
+++ b/optimum/exporters/openvino/__main__.py
@@ -360,20 +360,16 @@ class StoreAttr(object):
 
                 GPTQQuantizer.post_init_model = post_init_model
             if do_bitnet_patching:
-                import functools
-
-                from transformers.integrations.bitnet import AutoBitLinear, unpack_weights
+                from transformers.integrations.bitnet import AutoBitLinear
 
                 orig_load_hook = AutoBitLinear.load_hook
 
                 # rewrite load hook to save original weight
-                @functools.wraps(orig_load_hook)
                 def bitnet_load_hook(self, state_dict, prefix, *args, **kwargs):
                     if (prefix + "weight") in state_dict and state_dict[prefix + "weight"].dtype != self.weight.dtype:
                         self.original_weight = state_dict[prefix + "weight"]
-                        state_dict[prefix + "weight"] = unpack_weights(
-                            state_dict[prefix + "weight"], dtype=self.weight.dtype
-                        ).to(torch.device("meta"))
+                        w_shape = self.original_weight.shape
+                        state_dict[prefix + "weight"] = torch.empty((w_shape[0] * 4, w_shape[1]), dtype=self.weight.dtype, device="meta")
                     return state_dict
 
                 AutoBitLinear.load_hook = bitnet_load_hook

From 46ed11303ab651e772607225e3babbf07b80e506 Mon Sep 17 00:00:00 2001
From: Maxim Vafin <maxim.vafin@intel.com>
Date: Tue, 12 Aug 2025 16:46:53 +0200
Subject: [PATCH 04/12] Update optimum/exporters/openvino/model_configs.py

---
 optimum/exporters/openvino/model_configs.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py
index 10af2ce54e..013becd867 100644
--- a/optimum/exporters/openvino/model_configs.py
+++ b/optimum/exporters/openvino/model_configs.py
@@ -601,11 +601,8 @@ def patch_model_for_export(
 @register_in_tasks_manager(
     "bitnet",
     *[
-        "feature-extraction",
-        "feature-extraction-with-past",
         "text-generation",
         "text-generation-with-past",
-        "text-classification",
     ],
     library_name="transformers",
 )

From eab2dbd5f6397e293ea20384c6793d37b59c1b1b Mon Sep 17 00:00:00 2001
From: Maxim Vafin <maxim.vafin@intel.com>
Date: Wed, 13 Aug 2025 18:45:33 +0200
Subject: [PATCH 05/12] Fix patcher name

---
 optimum/exporters/openvino/model_configs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py
index 013becd867..d71db57557 100644
--- a/optimum/exporters/openvino/model_configs.py
+++ b/optimum/exporters/openvino/model_configs.py
@@ -610,7 +610,7 @@ class BitnetOpenVINOConfig(LlamaOnnxConfig):
     def patch_model_for_export(
         self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None
     ) -> "ModelPatcher":
-        return LlamaModelPatcher(self, model, model_kwargs=model_kwargs)
+        return OVDecoderModelPatcher(self, model, model_kwargs=model_kwargs)
 
 
 @register_in_tasks_manager(

From cd3bbb328cc1c36b37b3d9055c362ce3b5073c4a Mon Sep 17 00:00:00 2001
From: Maxim Vafin <maxim.vafin@intel.com>
Date: Tue, 19 Aug 2025 18:53:33 +0200
Subject: [PATCH 06/12] Add test

---
 tests/openvino/test_modeling.py | 4 ++++
 tests/openvino/utils_tests.py   | 1 +
 2 files changed, 5 insertions(+)

diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py
index a3c8247455..03f24bf5ea 100644
--- a/tests/openvino/test_modeling.py
+++ b/tests/openvino/test_modeling.py
@@ -137,6 +137,7 @@
 )
 from optimum.utils.testing_utils import require_diffusers
 
+torch.compile = lambda func: func  # Mock torch.compile to avoid compilation errors in tests
 
 TENSOR_ALIAS_TO_TYPE = {
     "pt": torch.Tensor,
@@ -1185,6 +1186,8 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase):
 
     if is_transformers_version(">=", "4.53.0"):
         SUPPORTED_ARCHITECTURES += ("arcee",)
+        if is_openvino_version(">=", "2025.3.0"):
+            SUPPORTED_ARCHITECTURES += ("bitnet",)
 
     if is_transformers_version(">=", "4.54.0"):
         SUPPORTED_ARCHITECTURES += ("ernie4_5",)
@@ -1278,6 +1281,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase):
         "falcon-mamba": 0,
         "arcee": 2,
         "ernie4_5": 2,
+        "bitnet": 6,
     }
 
     # TODO: remove gptq/awq from here
diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py
index 8d8ba3e098..ed5a3fe7e6 100644
--- a/tests/openvino/utils_tests.py
+++ b/tests/openvino/utils_tests.py
@@ -38,6 +38,7 @@
     "baichuan2-13b": "katuni4ka/tiny-random-baichuan2-13b",
     "bigbird_pegasus": "hf-internal-testing/tiny-random-bigbird_pegasus",
     "biogpt": "hf-tiny-model-private/tiny-random-BioGptForCausalLM",
+    "bitnet": "mvafin/tiny-bitnet",
     "blenderbot-small": "hf-internal-testing/tiny-random-BlenderbotModel",
     "blenderbot": "hf-internal-testing/tiny-random-BlenderbotModel",
     "bloom": "hf-internal-testing/tiny-random-BloomModel",

From 31981ea8b4d8abef39627cd8cd8ae637ef5985e9 Mon Sep 17 00:00:00 2001
From: Maxim Vafin <maxim.vafin@intel.com>
Date: Wed, 20 Aug 2025 11:21:48 +0200
Subject: [PATCH 07/12] Fix style

---
 optimum/exporters/openvino/__main__.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py
index ead8745d53..8db5b9e65d 100644
--- a/optimum/exporters/openvino/__main__.py
+++ b/optimum/exporters/openvino/__main__.py
@@ -369,7 +369,9 @@ def bitnet_load_hook(self, state_dict, prefix, *args, **kwargs):
                     if (prefix + "weight") in state_dict and state_dict[prefix + "weight"].dtype != self.weight.dtype:
                         self.original_weight = state_dict[prefix + "weight"]
                         w_shape = self.original_weight.shape
-                        state_dict[prefix + "weight"] = torch.empty((w_shape[0] * 4, w_shape[1]), dtype=self.weight.dtype, device="meta")
+                        state_dict[prefix + "weight"] = torch.empty(
+                            (w_shape[0] * 4, w_shape[1]), dtype=self.weight.dtype, device="meta"
+                        )
                     return state_dict
 
                 AutoBitLinear.load_hook = bitnet_load_hook

From 84174006abab1e0f83c30ff397007e71debdda8e Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Wed, 20 Aug 2025 12:11:29 +0000
Subject: [PATCH 08/12] Apply style fixes

---
 tests/openvino/test_modeling.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py
index becf8bae1a..784afe5050 100644
--- a/tests/openvino/test_modeling.py
+++ b/tests/openvino/test_modeling.py
@@ -137,6 +137,7 @@
 )
 from optimum.utils.testing_utils import require_diffusers
 
+
 torch.compile = lambda func: func  # Mock torch.compile to avoid compilation errors in tests
 
 TENSOR_ALIAS_TO_TYPE = {

From 6f270d6de8a56fbd4b81d3e94040d9793bcaa54b Mon Sep 17 00:00:00 2001
From: Maxim Vafin <maxim.vafin@intel.com>
Date: Wed, 22 Oct 2025 12:08:38 +0200
Subject: [PATCH 09/12] Return test after merge

---
 tests/openvino/test_decoder.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/openvino/test_decoder.py b/tests/openvino/test_decoder.py
index 45a9f59ca6..9096137b7e 100644
--- a/tests/openvino/test_decoder.py
+++ b/tests/openvino/test_decoder.py
@@ -25,10 +25,11 @@
 from optimum.intel.pipelines import pipeline as optimum_pipeline
 from optimum.intel.utils.import_utils import is_openvino_version, is_transformers_version
 
-
 if is_transformers_version(">=", "4.55"):
     from transformers import Mxfp4Config
 
+torch.compile = lambda func: func  # Mock torch.compile to avoid compilation errors in tests
+
 SEED = 42
 F32_CONFIG = {"INFERENCE_PRECISION_HINT": "f32"}
 TENSOR_ALIAS_TO_TYPE = {"pt": torch.Tensor, "np": np.ndarray}
@@ -120,6 +121,8 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase):
 
     if is_transformers_version(">=", "4.53.0"):
         SUPPORTED_ARCHITECTURES += ("arcee",)
+        if is_openvino_version(">=", "2025.3.0"):
+            SUPPORTED_ARCHITECTURES += ("bitnet",)
 
     if is_transformers_version(">=", "4.54.0"):
         # remote code models differs after transformers v4.54
@@ -216,6 +219,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase):
         "mamba": 0,
         "falcon-mamba": 0,
         "arcee": 2,
+        "bitnet": 6,
     }
 
     # TODO: remove gptq/awq from here

From b4bb1ceefaa81656e5b39aaa7ba743ed1d8894b4 Mon Sep 17 00:00:00 2001
From: Maxim Vafin <maxim.vafin@intel.com>
Date: Fri, 24 Oct 2025 10:34:40 +0200
Subject: [PATCH 10/12] Apply suggestions from code review

Co-authored-by: Nikita Savelyev <nikita.savelyev@intel.com>
---
 optimum/exporters/openvino/__main__.py | 2 +-
 tests/openvino/test_decoder.py         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py
index 8209e52492..5f55bbbce4 100644
--- a/optimum/exporters/openvino/__main__.py
+++ b/optimum/exporters/openvino/__main__.py
@@ -272,7 +272,7 @@ def main_export(
         supported_quant_methods = ["gptq"]
         if is_openvino_version(">=", "2024.6.0"):
             supported_quant_methods.append("awq")
-        if is_openvino_version(">=", "2025.3.0"):
+        if is_openvino_version(">=", "2025.4.0"):
             supported_quant_methods.append("bitnet")
         do_quant_patching = quant_method in supported_quant_methods
         do_gptq_patching = quant_method == "gptq"
diff --git a/tests/openvino/test_decoder.py b/tests/openvino/test_decoder.py
index 9096137b7e..8f375846ab 100644
--- a/tests/openvino/test_decoder.py
+++ b/tests/openvino/test_decoder.py
@@ -121,7 +121,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase):
 
     if is_transformers_version(">=", "4.53.0"):
         SUPPORTED_ARCHITECTURES += ("arcee",)
-        if is_openvino_version(">=", "2025.3.0"):
+        if is_openvino_version(">=", "2025.4.0"):
             SUPPORTED_ARCHITECTURES += ("bitnet",)
 
     if is_transformers_version(">=", "4.54.0"):

From 73bdf8b73c2f9d34b28a1422f9b446761b0ad04b Mon Sep 17 00:00:00 2001
From: Maxim Vafin <maxim.vafin@intel.com>
Date: Thu, 30 Oct 2025 12:44:36 +0100
Subject: [PATCH 11/12] Move model

---
 tests/openvino/utils_tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py
index 3e9f343b3e..c14b414e18 100644
--- a/tests/openvino/utils_tests.py
+++ b/tests/openvino/utils_tests.py
@@ -49,7 +49,7 @@
     "baichuan2-13b": "optimum-intel-internal-testing/tiny-random-baichuan2-13b",
     "bigbird_pegasus": "optimum-intel-internal-testing/tiny-random-bigbird_pegasus",
     "biogpt": "optimum-intel-internal-testing/tiny-random-BioGptForCausalLM",
-    "bitnet": "mvafin/tiny-bitnet",
+    "bitnet": "optimum-intel-internal-testing/tiny-random-bitnet",
     "blenderbot-small": "optimum-intel-internal-testing/tiny-random-BlenderbotModel",
     "blenderbot": "optimum-intel-internal-testing/tiny-random-BlenderbotModel",
     "bloom": "optimum-intel-internal-testing/tiny-random-BloomModel",

From 27e82648ae3c73d22f4b059191a9136b4cbddcab Mon Sep 17 00:00:00 2001
From: Maxim Vafin <maxim.vafin@intel.com>
Date: Thu, 30 Oct 2025 13:23:19 +0100
Subject: [PATCH 12/12] Fix style

---
 tests/openvino/test_decoder.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/openvino/test_decoder.py b/tests/openvino/test_decoder.py
index cd06aa78eb..581fc4cb97 100644
--- a/tests/openvino/test_decoder.py
+++ b/tests/openvino/test_decoder.py
@@ -30,6 +30,7 @@
 
 torch.compile = lambda func: func  # Mock torch.compile to avoid compilation errors in tests
 
+
 class OVModelForCausalLMIntegrationTest(unittest.TestCase):
     SUPPORTED_ARCHITECTURES = (
         "bart",