reduce multi head attention test runtime

sony · Jan 8, 2025 · 1254e6f · 1254e6f
1 parent 72cadff
commit 1254e6f
Showing 1 changed file with 17 additions and 0 deletions.
diff --git a/tests/pytorch_tests/model_tests/feature_models/multi_head_attention_test.py b/tests/pytorch_tests/model_tests/feature_models/multi_head_attention_test.py
@@ -22,6 +22,7 @@
 from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel
 from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities
 from model_compression_toolkit.core.pytorch.default_framework_info import DEFAULT_PYTORCH_INFO
+from tests.common_tests.helpers.generate_test_tp_model import generate_test_tp_model
 from tests.pytorch_tests.model_tests.base_pytorch_test import BasePytorchTest
 
 """
@@ -55,6 +56,22 @@ def create_inputs_shape(self):
                 [self.val_batch_size] + list(self.key_input_shape),
                 [self.val_batch_size] + list(self.value_input_shape)]
 
+    def get_tpc(self):
+        tpc = {
+            'no_quantization': generate_test_tp_model({
+                'weights_n_bits': 32,
+                'activation_n_bits': 32,
+                'enable_weights_quantization': False,
+                'enable_activation_quantization': False
+            })
+        }
+        if self.num_heads < 5:
+            tpc['all_4bit'] = generate_test_tp_model({'weights_n_bits': 4,
+                                                      'activation_n_bits': 4,
+                                                      'enable_weights_quantization': True,
+                                                      'enable_activation_quantization': True})
+        return tpc
+
 
 class MHANet(nn.Module):
     # This network based on single MHA layer