From 1e20fbb9851cb2f14269d7097619accd23334238 Mon Sep 17 00:00:00 2001 From: Ofir Gordon Date: Tue, 12 Mar 2024 16:22:10 +0200 Subject: [PATCH] Fix target KPI usage in tutorials --- .../mixed_precision_quantization_config.py | 11 +++++++++++ .../example_keras_mobilenet_gptq_mixed_precision.py | 4 ++-- .../keras/ptq/example_keras_effdet_lite0.ipynb | 4 ++-- .../ptq/example_keras_mobilenet_mixed_precision.ipynb | 4 ++-- .../ptq/example_keras_mobilenet_mixed_precision.py | 2 +- .../example_keras_mobilenet_mixed_precision_lut.py | 2 +- .../notebooks/keras/ptq/example_keras_yolov8n.ipynb | 2 +- .../keras/ptq/keras_yolov8n_for_imx500.ipynb | 2 +- .../ptq/example_pytorch_mobilenet_mixed_precision.py | 2 +- .../example_pytorch_mobilenet_mixed_precision_lut.py | 2 +- ...pytorch_mobilenetv2_cifar100_mixed_precision.ipynb | 4 ++-- ...le_pytorch_mobilenetv2_cifar100_mixed_precision.py | 2 +- tutorials/quick_start/keras_fw/quant.py | 4 +--- tutorials/quick_start/pytorch_fw/quant.py | 4 +--- 14 files changed, 28 insertions(+), 21 deletions(-) diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_quantization_config.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_quantization_config.py index c1f557fa0..c44c5a570 100644 --- a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_quantization_config.py +++ b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_quantization_config.py @@ -66,3 +66,14 @@ def __init__(self, self.norm_scores = norm_scores self.metric_normalization_threshold = metric_normalization_threshold + + def set_target_kpi(self, target_kpi: KPI): + """ + Setting target KPI in mixed precision config. + + Args: + target_kpi: A target KPI to set. + + """ + + self.target_kpi = target_kpi diff --git a/tutorials/notebooks/keras/gptq/example_keras_mobilenet_gptq_mixed_precision.py b/tutorials/notebooks/keras/gptq/example_keras_mobilenet_gptq_mixed_precision.py index 01df81a5f..fea0ecece 100644 --- a/tutorials/notebooks/keras/gptq/example_keras_mobilenet_gptq_mixed_precision.py +++ b/tutorials/notebooks/keras/gptq/example_keras_mobilenet_gptq_mixed_precision.py @@ -137,6 +137,7 @@ def representative_data_gen() -> list: # examples: # weights_compression_ratio = 0.75 - About 0.75 of the model's weights memory size when quantized with 8 bits. kpi = mct.core.KPI(kpi_data.weights_memory * args.weights_compression_ratio) + config.mixed_precision_config.set_target_kpi(kpi) # Create a GPTQ quantization configuration and set the number of training iterations. gptq_config = mct.gptq.get_keras_gptq_config(n_epochs=args.num_gptq_training_iterations, @@ -146,8 +147,7 @@ def representative_data_gen() -> list: representative_data_gen, gptq_config=gptq_config, core_config=config, - target_platform_capabilities=target_platform_cap, - target_kpi=kpi) + target_platform_capabilities=target_platform_cap) # Export quantized model to TFLite and Keras. # For more details please see: https://github.com/sony/model_optimization/blob/main/model_compression_toolkit/exporter/README.md diff --git a/tutorials/notebooks/keras/ptq/example_keras_effdet_lite0.ipynb b/tutorials/notebooks/keras/ptq/example_keras_effdet_lite0.ipynb index 20b269a39..ce9a52a6d 100644 --- a/tutorials/notebooks/keras/ptq/example_keras_effdet_lite0.ipynb +++ b/tutorials/notebooks/keras/ptq/example_keras_effdet_lite0.ipynb @@ -387,12 +387,12 @@ "# set weights memory size, so the quantized model will fit the IMX500 memory\n", "kpi = mct.core.KPI(weights_memory=2674291)\n", "# set MixedPrecision configuration for compressing the weights\n", - "mp_config = mct.core.MixedPrecisionQuantizationConfig(use_hessian_based_scores=False)\n", + "mp_config = mct.core.MixedPrecisionQuantizationConfig(use_hessian_based_scores=False,\n", + " target_kpi=kpi)\n", "core_config = mct.core.CoreConfig(mixed_precision_config=mp_config)\n", "quant_model, _ = mct.ptq.keras_post_training_quantization(\n", " model,\n", " get_representative_dataset(20),\n", - " target_kpi=kpi,\n", " core_config=core_config,\n", " target_platform_capabilities=tpc)" ], diff --git a/tutorials/notebooks/keras/ptq/example_keras_mobilenet_mixed_precision.ipynb b/tutorials/notebooks/keras/ptq/example_keras_mobilenet_mixed_precision.ipynb index 3e67bc606..d5a0854b8 100644 --- a/tutorials/notebooks/keras/ptq/example_keras_mobilenet_mixed_precision.ipynb +++ b/tutorials/notebooks/keras/ptq/example_keras_mobilenet_mixed_precision.ipynb @@ -266,7 +266,8 @@ "# while the bias will not)\n", "# examples:\n", "weights_compression_ratio = 0.75 # About 0.75 of the model's weights memory size when quantized with 8 bits.\n", - "kpi = mct.core.KPI(kpi_data.weights_memory * weights_compression_ratio)" + "kpi = mct.core.KPI(kpi_data.weights_memory * weights_compression_ratio)\n", + "core_config.mixed_precision_config.set_target_kpi(kpi)" ], "metadata": { "collapsed": false @@ -296,7 +297,6 @@ "quantized_model, quantization_info = mct.ptq.keras_post_training_quantization(\n", " float_model,\n", " representative_dataset_gen,\n", - " target_kpi=kpi,\n", " core_config=core_config,\n", " target_platform_capabilities=tpc)" ] diff --git a/tutorials/notebooks/keras/ptq/example_keras_mobilenet_mixed_precision.py b/tutorials/notebooks/keras/ptq/example_keras_mobilenet_mixed_precision.py index f9e0f2ca1..84e2dcbfe 100644 --- a/tutorials/notebooks/keras/ptq/example_keras_mobilenet_mixed_precision.py +++ b/tutorials/notebooks/keras/ptq/example_keras_mobilenet_mixed_precision.py @@ -130,12 +130,12 @@ def representative_data_gen() -> list: # examples: # weights_compression_ratio = 0.75 - About 0.75 of the model's weights memory size when quantized with 8 bits. kpi = mct.core.KPI(kpi_data.weights_memory * args.weights_compression_ratio) + configuration.mixed_precision_config.set_target_kpi(kpi) # It is also possible to constraint only part of the KPI metric, e.g., by providing only weights_memory target # in the past KPI object, e.g., kpi = mct.core.KPI(kpi_data.weights_memory * 0.75) quantized_model, quantization_info = mct.ptq.keras_post_training_quantization(model, representative_data_gen, - target_kpi=kpi, core_config=configuration, target_platform_capabilities=target_platform_cap) diff --git a/tutorials/notebooks/keras/ptq/example_keras_mobilenet_mixed_precision_lut.py b/tutorials/notebooks/keras/ptq/example_keras_mobilenet_mixed_precision_lut.py index bd9f1c790..281891338 100644 --- a/tutorials/notebooks/keras/ptq/example_keras_mobilenet_mixed_precision_lut.py +++ b/tutorials/notebooks/keras/ptq/example_keras_mobilenet_mixed_precision_lut.py @@ -136,10 +136,10 @@ def representative_data_gen() -> list: # weights_compression_ratio = 0.4 - About 0.4 of the model's weights memory size when quantized with 8 bits. kpi = mct.core.KPI(kpi_data.weights_memory * args.weights_compression_ratio) # Note that in this example, activations are quantized with fixed bit-width (non mixed-precision) of 8-bit. + configuration.mixed_precision_config.set_target_kpi(kpi) quantized_model, quantization_info = mct.ptq.keras_post_training_quantization(model, representative_data_gen, - target_kpi=kpi, core_config=configuration, target_platform_capabilities=target_platform_cap) diff --git a/tutorials/notebooks/keras/ptq/example_keras_yolov8n.ipynb b/tutorials/notebooks/keras/ptq/example_keras_yolov8n.ipynb index f32db0e74..3cc4412bb 100644 --- a/tutorials/notebooks/keras/ptq/example_keras_yolov8n.ipynb +++ b/tutorials/notebooks/keras/ptq/example_keras_yolov8n.ipynb @@ -297,11 +297,11 @@ " config,\n", " target_platform_capabilities=tpc)\n", "kpi = mct.core.KPI(kpi_data.weights_memory * 0.75)\n", + "config.mixed_precision_config.set_target_kpi(kpi)\n", "\n", "# Perform post training quantization\n", "quant_model, _ = mct.ptq.keras_post_training_quantization(model,\n", " representative_dataset_gen,\n", - " target_kpi=kpi,\n", " core_config=config,\n", " target_platform_capabilities=tpc)\n", "print('Quantized model is ready')" diff --git a/tutorials/notebooks/keras/ptq/keras_yolov8n_for_imx500.ipynb b/tutorials/notebooks/keras/ptq/keras_yolov8n_for_imx500.ipynb index f0fb703a6..9ef8a4a04 100644 --- a/tutorials/notebooks/keras/ptq/keras_yolov8n_for_imx500.ipynb +++ b/tutorials/notebooks/keras/ptq/keras_yolov8n_for_imx500.ipynb @@ -193,11 +193,11 @@ " config,\n", " target_platform_capabilities=tpc)\n", "kpi = mct.core.KPI(kpi_data.weights_memory * 0.75)\n", + "config.mixed_precision_config.set_target_kpi(kpi)\n", "\n", "# Perform post training quantization\n", "quant_model, _ = mct.ptq.keras_post_training_quantization(model,\n", " representative_dataset_gen,\n", - " target_kpi=kpi,\n", " core_config=config,\n", " target_platform_capabilities=tpc)\n", "print('Quantized model is ready')" diff --git a/tutorials/notebooks/pytorch/ptq/example_pytorch_mobilenet_mixed_precision.py b/tutorials/notebooks/pytorch/ptq/example_pytorch_mobilenet_mixed_precision.py index 3ce77c6f3..63113dc8b 100644 --- a/tutorials/notebooks/pytorch/ptq/example_pytorch_mobilenet_mixed_precision.py +++ b/tutorials/notebooks/pytorch/ptq/example_pytorch_mobilenet_mixed_precision.py @@ -123,13 +123,13 @@ def representative_data_gen() -> list: # examples: # weights_compression_ratio = 0.75 - About 0.75 of the model's weights memory size when quantized with 8 bits. kpi = mct.core.KPI(kpi_data.weights_memory * args.weights_compression_ratio) + configuration.mixed_precision_config.set_target_kpi(kpi) # It is also possible to constraint only part of the KPI metric, e.g., by providing only weights_memory target # in the past KPI object, e.g., kpi = mct.core.KPI(kpi_data.weights_memory * 0.75) quantized_model, quantization_info = mct.ptq.pytorch_post_training_quantization(model, representative_data_gen, - target_kpi=kpi, core_config=configuration, target_platform_capabilities=target_platform_cap) diff --git a/tutorials/notebooks/pytorch/ptq/example_pytorch_mobilenet_mixed_precision_lut.py b/tutorials/notebooks/pytorch/ptq/example_pytorch_mobilenet_mixed_precision_lut.py index 66e5077c7..4b0b9b533 100644 --- a/tutorials/notebooks/pytorch/ptq/example_pytorch_mobilenet_mixed_precision_lut.py +++ b/tutorials/notebooks/pytorch/ptq/example_pytorch_mobilenet_mixed_precision_lut.py @@ -128,10 +128,10 @@ def representative_data_gen() -> list: # weights_compression_ratio = 0.4 - About 0.4 of the model's weights memory size when quantized with 8 bits. kpi = mct.core.KPI(kpi_data.weights_memory * args.weights_compression_ratio) # Note that in this example, activations are quantized with fixed bit-width (non mixed-precision) of 8-bit. + configuration.mixed_precision_config.set_target_kpi(kpi) quantized_model, quantization_info = mct.ptq.pytorch_post_training_quantization(model, representative_data_gen, - target_kpi=kpi, core_config=configuration, target_platform_capabilities=target_platform_cap) diff --git a/tutorials/notebooks/pytorch/ptq/example_pytorch_mobilenetv2_cifar100_mixed_precision.ipynb b/tutorials/notebooks/pytorch/ptq/example_pytorch_mobilenetv2_cifar100_mixed_precision.ipynb index 6e967f7fd..0a4ca7abc 100644 --- a/tutorials/notebooks/pytorch/ptq/example_pytorch_mobilenetv2_cifar100_mixed_precision.ipynb +++ b/tutorials/notebooks/pytorch/ptq/example_pytorch_mobilenetv2_cifar100_mixed_precision.ipynb @@ -515,7 +515,8 @@ "# while the bias will not)\n", "# examples:\n", "# weights_compression_ratio = 0.75 - About 0.75 of the model's weights memory size when quantized with 8 bits.\n", - "kpi = mct.core.KPI(kpi_data.weights_memory * 0.75)" + "kpi = mct.core.KPI(kpi_data.weights_memory * 0.75)\n", + "configuration.mixed_precision_config.set_target_kpi(kpi)" ] }, { @@ -537,7 +538,6 @@ "source": [ "quantized_model, quantization_info = mct.ptq.pytorch_post_training_quantization(model,\n", " representative_data_gen,\n", - " target_kpi=kpi,\n", " core_config=configuration,\n", " target_platform_capabilities=target_platform_cap)\n", " " diff --git a/tutorials/notebooks/pytorch/ptq/example_pytorch_mobilenetv2_cifar100_mixed_precision.py b/tutorials/notebooks/pytorch/ptq/example_pytorch_mobilenetv2_cifar100_mixed_precision.py index d39208562..59bea1c58 100644 --- a/tutorials/notebooks/pytorch/ptq/example_pytorch_mobilenetv2_cifar100_mixed_precision.py +++ b/tutorials/notebooks/pytorch/ptq/example_pytorch_mobilenetv2_cifar100_mixed_precision.py @@ -241,12 +241,12 @@ def representative_data_gen() -> list: # examples: # weights_compression_ratio = 0.75 - About 0.75 of the model's weights memory size when quantized with 8 bits. kpi = mct.core.KPI(kpi_data.weights_memory * args.weights_compression_ratio) + configuration.mixed_precision_config.set_target_kpi(kpi) # It is also possible to constraint only part of the KPI metric, e.g., by providing only weights_memory target # in the past KPI object, e.g., kpi = mct.core.KPI(kpi_data.weights_memory * 0.75) quantized_model, quantization_info = mct.ptq.pytorch_post_training_quantization(model, representative_data_gen, - target_kpi=kpi, core_config=configuration, target_platform_capabilities=target_platform_cap) # Finally, we evaluate the quantized model: diff --git a/tutorials/quick_start/keras_fw/quant.py b/tutorials/quick_start/keras_fw/quant.py index d26f4e1c2..69e847f29 100644 --- a/tutorials/quick_start/keras_fw/quant.py +++ b/tutorials/quick_start/keras_fw/quant.py @@ -100,10 +100,10 @@ def quantize(model: tf.keras.Model, shift_negative_activation_correction=True), mixed_precision_config=mp_conf) target_kpi = get_target_kpi(model, mp_wcr, representative_data_gen, core_conf, tpc) + core_conf.mixed_precision_config.set_target_kpi(target_kpi) else: core_conf = CoreConfig(quantization_config=mct.core.QuantizationConfig( shift_negative_activation_correction=True)) - target_kpi = None # Quantize model if args.get('gptq', False): @@ -118,7 +118,6 @@ def quantize(model: tf.keras.Model, quantized_model, quantization_info = \ mct.gptq.keras_gradient_post_training_quantization(model, representative_data_gen=representative_data_gen, - target_kpi=target_kpi, core_config=core_conf, gptq_config=gptq_conf, gptq_representative_data_gen=representative_data_gen, @@ -130,7 +129,6 @@ def quantize(model: tf.keras.Model, quantized_model, quantization_info = \ mct.ptq.keras_post_training_quantization(model, representative_data_gen=representative_data_gen, - target_kpi=target_kpi, core_config=core_conf, target_platform_capabilities=tpc) diff --git a/tutorials/quick_start/pytorch_fw/quant.py b/tutorials/quick_start/pytorch_fw/quant.py index 5d445779c..6f5738470 100644 --- a/tutorials/quick_start/pytorch_fw/quant.py +++ b/tutorials/quick_start/pytorch_fw/quant.py @@ -101,10 +101,10 @@ def quantize(model: nn.Module, shift_negative_activation_correction=True), mixed_precision_config=mp_conf) target_kpi = get_target_kpi(model, mp_wcr, representative_data_gen, core_conf, tpc) + core_conf.mixed_precision_config.set_target_kpi(target_kpi) else: core_conf = CoreConfig(quantization_config=mct.core.QuantizationConfig( shift_negative_activation_correction=True)) - target_kpi = None # Quantize model if args.get('gptq', False): @@ -119,7 +119,6 @@ def quantize(model: nn.Module, quantized_model, quantization_info = \ mct.gptq.pytorch_gradient_post_training_quantization(model, representative_data_gen=representative_data_gen, - target_kpi=target_kpi, core_config=core_conf, gptq_config=gptq_conf, gptq_representative_data_gen=representative_data_gen, @@ -131,7 +130,6 @@ def quantize(model: nn.Module, quantized_model, quantization_info = \ mct.ptq.pytorch_post_training_quantization(model, representative_data_gen=representative_data_gen, - target_kpi=target_kpi, core_config=core_conf, target_platform_capabilities=tpc)