Fixes dummy inputs for summarization, feature_extraction tasks (#303)

xadupre · web-flow · commit a682d15b5b61 · 2025-11-14T10:20:14.000+01:00
* issue with latest transformers

* fix

* fix tasks
diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
@@ -4,6 +4,7 @@ Change Logs
 0.8.2
 +++++
 
+* :pr:`303`: fix inputs for summarization, feature extraction tasks
 * :pr:`302`: adds helpers to analyse onnxruntime profiling
 * :pr:`297`: experiment around a higher ops ``loop_for``
 * :pr:`292`, :pr:`293`, :pr:`294`, :pr:`295`: new patches for Qwen models
diff --git a/_unittests/ut_tasks/test_tasks.py b/_unittests/ut_tasks/test_tasks.py
@@ -1,12 +1,7 @@
 import os
 import unittest
 import torch
-from onnx_diagnostic.ext_test_case import (
-    ExtTestCase,
-    hide_stdout,
-    has_transformers,
-    requires_transformers,
-)
+from onnx_diagnostic.ext_test_case import ExtTestCase, hide_stdout, has_transformers
 from onnx_diagnostic.helpers.torch_helper import to_any, torch_deepcopy
 from onnx_diagnostic.torch_models.hghub.model_inputs import get_untrained_model_with_inputs
 from onnx_diagnostic.torch_export_patches import torch_export_patches
@@ -216,50 +211,6 @@ def test_fill_mask(self):
                 model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
             )
 
-    @hide_stdout()
-    @requires_transformers("4.53.99")
-    def test_feature_extraction_bart_base(self):
-        mid = "facebook/bart-base"
-        data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=True)
-        self.assertEqual(data["task"], "feature-extraction")
-        self.assertIn((data["size"], data["n_weights"]), [(557681664, 139420416)])
-        model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
-        model(**torch_deepcopy(inputs))
-        model(**data["inputs2"])
-        with torch_export_patches(patch_transformers=True, verbose=10):
-            torch.export.export(
-                model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
-            )
-
-    @hide_stdout()
-    def test_feature_extraction_tiny_bart(self):
-        mid = "hf-tiny-model-private/tiny-random-PLBartForConditionalGeneration"
-        data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=True)
-        self.assertEqual(data["task"], "text2text-generation")
-        self.assertIn((data["size"], data["n_weights"]), [(3243392, 810848)])
-        model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
-        model(**inputs)
-        model(**data["inputs2"])
-        with torch_export_patches(patch_transformers=True, verbose=10):
-            torch.export.export(
-                model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
-            )
-
-    @requires_transformers("4.51.999")
-    @hide_stdout()
-    def test_summarization(self):
-        mid = "facebook/bart-large-cnn"
-        data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=True)
-        self.assertEqual(data["task"], "summarization")
-        self.assertIn((data["size"], data["n_weights"]), [(1625161728, 406290432)])
-        model, inputs, _ds = data["model"], data["inputs"], data["dynamic_shapes"]
-        model(**inputs)
-        model(**data["inputs2"])
-        # with torch_export_patches(patch_transformers=True, verbose=10):
-        #    torch.export.export(
-        #        model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
-        #    )
-
     @hide_stdout()
     def test_text_classification(self):
         mid = "Intel/bert-base-uncased-mrpc"
diff --git a/_unittests/ut_tasks/test_tasks_feature_extraction.py b/_unittests/ut_tasks/test_tasks_feature_extraction.py
@@ -0,0 +1,55 @@
+import unittest
+import torch
+from onnx_diagnostic.ext_test_case import ExtTestCase, hide_stdout, requires_transformers
+from onnx_diagnostic.helpers.torch_helper import torch_deepcopy
+from onnx_diagnostic.torch_models.hghub.model_inputs import get_untrained_model_with_inputs
+from onnx_diagnostic.torch_export_patches import torch_export_patches
+from onnx_diagnostic.torch_export_patches.patch_inputs import use_dyn_not_str
+
+
+class TestTasksFeatureExtration(ExtTestCase):
+    @hide_stdout()
+    @requires_transformers("4.53.99")
+    def test_feature_extraction_bart_base(self):
+        """
+        ata=dict(
+            input_ids:T7s2x12,
+            attention_mask:T7s2x12,
+            past_key_values:EncoderDecoderCache(
+                self_attention_cache=DynamicCache(
+                      key_cache=#6[T1s2x12x30x64,...
+                    value_cache=#6[T1s2x12x30x64,...
+                cross_attention_cache=DynamicCache(
+                      key_cache=#6[T1s2x12x4x64
+                    value_cache=#6[T1s2x12x4x64
+        """
+        mid = "facebook/bart-base"
+        data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=True)
+        self.assertEqual(data["task"], "feature-extraction")
+        self.assertIn((data["size"], data["n_weights"]), [(409583616, 102395904)])
+        model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
+        print(f"-- {self.string_type(inputs, with_shape=True)}")
+        model(**torch_deepcopy(inputs))
+        model(**data["inputs2"])
+        with torch_export_patches(patch_transformers=True, verbose=10):
+            torch.export.export(
+                model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
+            )
+
+    @hide_stdout()
+    def test_feature_extraction_tiny_bart(self):
+        mid = "hf-tiny-model-private/tiny-random-PLBartForConditionalGeneration"
+        data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=True)
+        self.assertEqual(data["task"], "text2text-generation")
+        self.assertIn((data["size"], data["n_weights"]), [(3243392, 810848)])
+        model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
+        model(**inputs)
+        model(**data["inputs2"])
+        with torch_export_patches(patch_transformers=True, verbose=10):
+            torch.export.export(
+                model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
+            )
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/_unittests/ut_tasks/test_tasks_summarization.py b/_unittests/ut_tasks/test_tasks_summarization.py
@@ -0,0 +1,25 @@
+import unittest
+from onnx_diagnostic.ext_test_case import ExtTestCase, hide_stdout, requires_transformers
+from onnx_diagnostic.torch_models.hghub.model_inputs import get_untrained_model_with_inputs
+
+
+class TestTasksSummarization(ExtTestCase):
+    @requires_transformers("4.51.999")
+    @hide_stdout()
+    def test_summarization(self):
+        mid = "facebook/bart-large-cnn"
+        data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=True)
+        self.assertEqual(data["task"], "summarization")
+        self.assertIn((data["size"], data["n_weights"]), [(1427701760, 356925440)])
+        model, inputs, _ds = data["model"], data["inputs"], data["dynamic_shapes"]
+        print(f"-- {mid}: {self.string_type(inputs, with_shape=True)}")
+        model(**inputs)
+        model(**data["inputs2"])
+        # with torch_export_patches(patch_transformers=True, verbose=10):
+        #    torch.export.export(
+        #        model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
+        #    )
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/_unittests/ut_tasks/try_tasks.py b/_unittests/ut_tasks/try_tasks.py
@@ -530,7 +530,22 @@ def test_fill_mask(self):
         print("-- outputs", string_type(output, with_shape=True, with_min_max=True))
 
     @never_test()
-    def test_feature_extraction(self):
+    def test_feature_extraction_generate(self):
+        # clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k feature_ex
+        # https://huggingface.co/google-bert/bert-base-multilingual-cased
+
+        from transformers import BartTokenizer, BartModel
+
+        tokenizer = BartTokenizer.from_pretrained("facebook/bart-base")
+        model = BartModel.from_pretrained("facebook/bart-base")
+        text = "Replace me by any text you'd like."
+        encoded_input = tokenizer(text, return_tensors="pt")
+        print(f"-- {string_type(encoded_input, with_shape=True)}")
+        outputs = model(**encoded_input)
+        print(f"-- {string_type(outputs, with_shape=True)}")
+
+    @never_test()
+    def test_feature_extraction_check(self):
         # clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k feature_ex
         # https://huggingface.co/google-bert/bert-base-multilingual-cased
 
@@ -541,10 +556,14 @@ def test_feature_extraction(self):
         text = "Replace me by any text you'd like."
         encoded_input = tokenizer(text, return_tensors="pt")
         sequence_length, sequence_length2 = 30, 4
-        sequence_length = 3
-        batch_size, encoder_attention_heads, encoder_ffn_dim = 1, 12, 64
-        batch_size, decoder_attention_heads, decoder_ffn_dim = 1, 12, 64
+        # sequence_length = 3
+        batch_size, encoder_attention_heads, encoder_ffn_dim = 2, 12, 64
+        __________, decoder_attention_heads, decoder_ffn_dim = 2, 12, 64
         num_hidden_layers = 6
+        encoded_input["input_ids"] = encoded_input["input_ids"].expand((batch_size, -1))
+        encoded_input["attention_mask"] = encoded_input["attention_mask"].expand(
+            (batch_size, -1)
+        )
         encoded_input["past_key_values"] = make_encoder_decoder_cache(
             make_dynamic_cache(
                 [
@@ -586,9 +605,9 @@ def test_feature_extraction(self):
             ),
         )
         print()
-        print("-- inputs", string_type(encoded_input, with_shape=True, with_min_max=True))
+        print("-- inputs", string_type(encoded_input, with_shape=True))
         output = model(**encoded_input)
-        print("-- outputs", string_type(output, with_shape=True, with_min_max=True))
+        print("-- outputs", string_type(output, with_shape=True))
 
     @never_test()
     def test_text_classification(self):
diff --git a/_unittests/ut_torch_export_patches/test_patch_torch.py b/_unittests/ut_torch_export_patches/test_patch_torch.py
@@ -341,7 +341,7 @@ def forward(self, x, ind1, ind2):
                 self.assertIn("export 0/1 specialized due to hint of 1 for dimension", str(e))
 
         dynamic_shapes = use_dyn_not_str(dynamic_string, torch.export.Dim.AUTO)
-        if has_torch("2.9"):
+        if has_torch("2.9") and not has_torch("2.9.99"):
             with self.subTest(
                 name="expected shape should be broadcastable to (>= 2.9)",
                 dynamic_shapes=dynamic_shapes,
@@ -352,6 +352,9 @@ def forward(self, x, ind1, ind2):
                     raise AssertionError("torch fixed that case")
                 except RuntimeError as e:
                     self.assertIn("expected shape should be broadcastable to", str(e))
+        elif has_torch("2.9.99"):
+            with torch.fx.experimental._config.patch(backed_size_oblivious=True):
+                torch.export.export(model, inputs, dynamic_shapes=dynamic_shapes)
 
         if not has_torch("2.9"):
             with self.subTest(
diff --git a/onnx_diagnostic/tasks/feature_extraction.py b/onnx_diagnostic/tasks/feature_extraction.py
@@ -1,10 +1,6 @@
 from typing import Any, Callable, Dict, Optional, Tuple
 import torch
-from ..helpers.config_helper import (
-    update_config,
-    check_hasattr,
-    default_num_hidden_layers as nhl,
-)
+from ..helpers.config_helper import update_config, check_hasattr
 from ..helpers.cache_helper import make_dynamic_cache, make_encoder_decoder_cache
 
 
@@ -13,8 +9,9 @@
 
 def reduce_model_config(config: Any) -> Dict[str, Any]:
     """Reduces a model size."""
-    check_hasattr(config, "num_hidden_layers")
-    kwargs = dict(num_hidden_layers=min(config.num_hidden_layers, nhl()))
+    check_hasattr(config, "vocab_size")
+    # Bart architecture does not like too much that the number of layers is changed.
+    kwargs = dict(vocab_size=2056)
     update_config(config, kwargs)
     return kwargs
 
@@ -25,7 +22,8 @@ def get_inputs(
     batch_size: int,
     sequence_length: int,
     dummy_max_token_id: int,
-    sequence_length2: int = 3,
+    past_length: int = 30,
+    past_length2: int = 4,
     decoder_attention_heads: Optional[int] = None,
     encoder_attention_heads: Optional[int] = None,
     encoder_ffn_dim: Optional[int] = None,
@@ -73,13 +71,13 @@ def get_inputs(
                         torch.randn(
                             batch_size,
                             encoder_attention_heads,
-                            sequence_length,
+                            past_length,
                             encoder_ffn_dim,
                         ),
                         torch.randn(
                             batch_size,
                             encoder_attention_heads,
-                            sequence_length,
+                            past_length,
                             encoder_ffn_dim,
                         ),
                     )
@@ -92,13 +90,13 @@ def get_inputs(
                         torch.randn(
                             batch_size,
                             decoder_attention_heads,
-                            sequence_length2,
+                            past_length2,
                             decoder_ffn_dim,
                         ),
                         torch.randn(
                             batch_size,
                             decoder_attention_heads,
-                            sequence_length2,
+                            past_length2,
                             decoder_ffn_dim,
                         ),
                     )
@@ -124,7 +122,8 @@ def get_inputs(
             batch_size=batch_size + 1,
             sequence_length=sequence_length + add_second_input,
             dummy_max_token_id=dummy_max_token_id,
-            sequence_length2=sequence_length2,
+            past_length=past_length,
+            past_length2=past_length2,
             decoder_attention_heads=decoder_attention_heads,
             encoder_attention_heads=encoder_attention_heads,
             encoder_ffn_dim=encoder_ffn_dim,
@@ -146,7 +145,9 @@ def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
         check_hasattr(config, "vocab_size")
     kwargs = dict(
         batch_size=2,
-        sequence_length=30,
+        sequence_length=12,
+        past_length=30,
+        past_length2=4,
         dummy_max_token_id=31999 if config is None else (config.vocab_size - 1),
     )
     for att in [
diff --git a/onnx_diagnostic/tasks/summarization.py b/onnx_diagnostic/tasks/summarization.py