Fix erroneous quantization operations in TFLite model (#39)

kahrendt · Dec 21, 2024 · ac6502b · ac6502b
1 parent cdbf64e
commit ac6502b
Showing 1 changed file with 5 additions and 0 deletions.
diff --git a/microwakeword/utils.py b/microwakeword/utils.py
@@ -327,6 +327,11 @@ def representative_dataset_gen():
     converter = tf.lite.TFLiteConverter.from_saved_model(path_to_model)
     converter.optimizations = {tf.lite.Optimize.DEFAULT}
 
+    # Without this flag, the Streaming layer `state` variables are left as float32,
+    # resulting in Quantize and Dequantize operations before and after every `ReadVariable`
+    # and `AssignVariable` operation.
+    converter._experimental_variable_quantization = True
+
     if quantize:
         converter.target_spec.supported_ops = {tf.lite.OpsSet.TFLITE_BUILTINS_INT8}
         converter.inference_input_type = tf.int8