Support weight quantization for tf.gather in TPC imx500.v4.

sony · Sep 19, 2024 · 3fe2140 · 3fe2140
1 parent bb35a81
commit 3fe2140
Show file tree

Hide file tree

Showing 4 changed files with 16 additions and 4 deletions.
diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tp_model.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tp_model.py
@@ -189,6 +189,10 @@ def generate_tp_model(default_config: OpQuantizationConfig,
                                                                                    const_config_input16_per_tensor],
                                                                                   base_config=const_config_input16_per_tensor)
 
+    qpreserving_const_config = const_config.clone_and_edit(enable_activation_quantization=False,
+                                                           quantization_preserving=True)
+    qpreserving_const_config_options = tp.QuantizationConfigOptions([qpreserving_const_config])
+
     # Create a TargetPlatformModel and set its default quantization config.
     # This default configuration will be used for all operations
     # unless specified otherwise (see OperatorsSet, for example):
@@ -214,6 +218,7 @@ def generate_tp_model(default_config: OpQuantizationConfig,
                         default_qco.clone_and_edit(enable_activation_quantization=False,
                                                    quantization_preserving=True)
                         .clone_and_edit_weight_attribute(enable_weights_quantization=False))
+        tp.OperatorsSet("DimensionManipulationOpsWithWeights", qpreserving_const_config_options)
         tp.OperatorsSet("DimensionManipulationOps",
                         default_qco.clone_and_edit(enable_activation_quantization=False,
                                                    quantization_preserving=True,

diff --git a/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_keras.py b/model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_keras.py
@@ -74,10 +74,8 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel):
                                Dropout,
                                MaxPooling2D,
                                tf.split,
-                               tf.gather,
                                tf.cast,
                                tf.unstack,
-                               tf.compat.v1.gather,
                                tf.__operators__.getitem,
                                tf.strided_slice]
     quantization_preserving_list_16bit_input = [Reshape,
@@ -93,6 +91,7 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel):
         tp.OperationsSetToLayers("NoQuantization", no_quant_list)
         tp.OperationsSetToLayers("QuantizationPreserving", quantization_preserving)
         tp.OperationsSetToLayers("DimensionManipulationOps", quantization_preserving_list_16bit_input)
+        tp.OperationsSetToLayers("DimensionManipulationOpsWithWeights", [tf.gather, tf.compat.v1.gather])
         tp.OperationsSetToLayers("MergeOps", [tf.stack, tf.concat, Concatenate])
         tp.OperationsSetToLayers("Conv",
                                  [Conv2D,

diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/const_quantization_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/const_quantization_test.py
@@ -189,6 +189,11 @@ def create_networks(self):
         x3 = tf.add_n([x1, as_const(x), x2])
         x1 = tf.reshape(tf.stack([as_const(x1), x1, as_const(x1)], axis=1), (-1, 3*x1.shape[1], x1.shape[2], x1.shape[3]))
         x = tf.concat([x1, x2, as_const(x3), x3], 1)
+
+        inds = tf.reshape(tf.argmax(tf.reshape(x, (-1, 192 * 32, 38)), axis=1), (-1, 1, 1, 38))
+        b = tf.gather(np.random.random((100,)).astype(np.float32), inds)
+        x = tf.add(x, b)
+
         return tf.keras.models.Model(inputs=inputs, outputs=x)
 
     def compare(self, quantized_model, float_model, input_x=None, quantization_info=None):
@@ -199,7 +204,7 @@ def compare(self, quantized_model, float_model, input_x=None, quantization_info=
         self.unit_test.assertTrue(np.isclose(cs, 1, atol=1e-2), msg=f'fail cosine similarity check:{cs}')
 
         # check quantization layers:
-        for op in [tf.concat, tf.stack, layers.Add, layers.Multiply, layers.Concatenate]:
+        for op in [tf.concat, tf.stack, layers.Add, layers.Multiply, layers.Concatenate, tf.gather, tf.compat.v1.gather]:
             for qlayer in get_layers_from_model_by_type(quantized_model, op):
                 self.unit_test.assertTrue(isinstance(qlayer, KerasQuantizationWrapper),
                                           msg=f"{op} should be quantized.")
diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/const_representation_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/const_representation_test.py
@@ -155,7 +155,10 @@ def get_tpc(self):
     def create_networks(self):
         as_const = lambda v: np.random.random(v.shape.as_list()).astype(np.float32)
         inputs = layers.Input(shape=self.get_input_shapes()[0][1:])
-        x = layers.Concatenate()([inputs, np.random.random((1, 32, 32, 3)), inputs, np.random.random((1, 32, 32, 3))])
+        inds = tf.reshape(tf.argmax(tf.reshape(inputs, (-1, 32 * 32, 16)), axis=1), (-1, 1, 1, 16))
+        b = tf.gather(np.random.random((100,)).astype(np.float32), inds)
+        x = tf.add(inputs, b)
+        x = layers.Concatenate()([x, np.random.random((1, 32, 32, 3)), x, np.random.random((1, 32, 32, 3))])
         x1 = layers.Add()([np.random.random((1, x.shape[-1])), x, np.random.random((1, x.shape[-1]))])
         x2 = layers.Multiply()([x, np.random.random((1, x.shape[-1])), x, np.random.random((1, x.shape[-1]))])
         x3 = tf.add_n([x1, as_const(x), x2])