From eb847372a6214d40fe864323ab133c391497f3f2 Mon Sep 17 00:00:00 2001 From: Reuven <44209964+reuvenperetz@users.noreply.github.com> Date: Mon, 26 Feb 2024 12:02:57 +0200 Subject: [PATCH] Remove legacy package (#952) --------- Co-authored-by: reuvenp --- .github/labeler.yml | 3 - model_compression_toolkit/__init__.py | 4 - model_compression_toolkit/legacy/__init__.py | 14 - .../legacy/keras_quantization_facade.py | 304 ------------------ .../legacy/pytorch_quantization_facade.py | 284 ---------------- .../feature_networks/old_api_test.py | 88 ----- .../test_features_runner.py | 7 - .../feature_models/old_api_test.py | 122 ------- .../model_tests/test_feature_models_runner.py | 7 - 9 files changed, 833 deletions(-) delete mode 100644 model_compression_toolkit/legacy/__init__.py delete mode 100644 model_compression_toolkit/legacy/keras_quantization_facade.py delete mode 100644 model_compression_toolkit/legacy/pytorch_quantization_facade.py delete mode 100644 tests/keras_tests/feature_networks_tests/feature_networks/old_api_test.py delete mode 100644 tests/pytorch_tests/model_tests/feature_models/old_api_test.py diff --git a/.github/labeler.yml b/.github/labeler.yml index e20759bf4..ce0db3416 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -20,9 +20,6 @@ auto:exporter: auto:gptq: - model_compression_toolkit/gptq/** -auto:legacy: -- model_compression_toolkit/legacy/** - auto:ptq: - model_compression_toolkit/ptq/** diff --git a/model_compression_toolkit/__init__.py b/model_compression_toolkit/__init__.py index 371f8a38a..d30ec2da2 100644 --- a/model_compression_toolkit/__init__.py +++ b/model_compression_toolkit/__init__.py @@ -18,8 +18,6 @@ from model_compression_toolkit.target_platform_capabilities.tpc_models.get_target_platform_capabilities import get_target_platform_capabilities from model_compression_toolkit import core from model_compression_toolkit.logger import set_log_folder -from model_compression_toolkit.legacy.keras_quantization_facade import keras_post_training_quantization, keras_post_training_quantization_mixed_precision -from model_compression_toolkit.legacy.pytorch_quantization_facade import pytorch_post_training_quantization, pytorch_post_training_quantization_mixed_precision from model_compression_toolkit import trainable_infrastructure from model_compression_toolkit import ptq from model_compression_toolkit import qat @@ -41,8 +39,6 @@ from model_compression_toolkit.logger import set_log_folder from model_compression_toolkit.core.common.data_loader import FolderImageLoader from model_compression_toolkit.core.common.framework_info import FrameworkInfo, ChannelAxis -from model_compression_toolkit.legacy.keras_quantization_facade import keras_post_training_quantization, keras_post_training_quantization_mixed_precision -from model_compression_toolkit.legacy.pytorch_quantization_facade import pytorch_post_training_quantization, pytorch_post_training_quantization_mixed_precision from model_compression_toolkit.core.keras.kpi_data_facade import keras_kpi_data from model_compression_toolkit.core.pytorch.kpi_data_facade import pytorch_kpi_data from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig diff --git a/model_compression_toolkit/legacy/__init__.py b/model_compression_toolkit/legacy/__init__.py deleted file mode 100644 index 2147ec284..000000000 --- a/model_compression_toolkit/legacy/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== diff --git a/model_compression_toolkit/legacy/keras_quantization_facade.py b/model_compression_toolkit/legacy/keras_quantization_facade.py deleted file mode 100644 index 57a8c1532..000000000 --- a/model_compression_toolkit/legacy/keras_quantization_facade.py +++ /dev/null @@ -1,304 +0,0 @@ -# Copyright 2021 Sony Semiconductor Israel, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -from typing import Callable, List, Tuple - -from model_compression_toolkit.core.common.visualization.tensorboard_writer import init_tensorboard_writer -from model_compression_toolkit.logger import Logger -from model_compression_toolkit.constants import TENSORFLOW -from model_compression_toolkit.core.common.user_info import UserInformation -from model_compression_toolkit.gptq import GradientPTQConfig, GradientPTQConfigV2 -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI -from model_compression_toolkit.core.common.framework_info import FrameworkInfo -from model_compression_toolkit.core.common.network_editors.actions import EditRule -from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import \ - MixedPrecisionQuantizationConfig, DEFAULT_MIXEDPRECISION_CONFIG -from model_compression_toolkit.core.common.quantization.quantization_config import QuantizationConfig -from model_compression_toolkit.core.common.quantization.core_config import CoreConfig -from model_compression_toolkit.core.common.quantization.debug_config import DebugConfig -from model_compression_toolkit.core.common.quantization.quantization_config import DEFAULTCONFIG -from model_compression_toolkit.core.runner import core_runner -from model_compression_toolkit.gptq.runner import gptq_runner -from model_compression_toolkit.ptq.runner import ptq_runner -from model_compression_toolkit.core.exporter import export_model -from model_compression_toolkit.core.analyzer import analyzer_model_quantization - -from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework import TargetPlatformCapabilities -from model_compression_toolkit.constants import FOUND_TF - -if FOUND_TF: - from model_compression_toolkit.core.keras.default_framework_info import DEFAULT_KERAS_INFO - from model_compression_toolkit.core.keras.keras_implementation import KerasImplementation - from model_compression_toolkit.core.keras.keras_model_validation import KerasModelValidation - from tensorflow.keras.models import Model - from model_compression_toolkit.target_platform_capabilities.constants import DEFAULT_TP_MODEL - - from model_compression_toolkit import get_target_platform_capabilities - - DEFAULT_KERAS_TPC = get_target_platform_capabilities(TENSORFLOW, DEFAULT_TP_MODEL) - - - def keras_post_training_quantization(in_model: Model, - representative_data_gen: Callable, - n_iter: int = 500, - quant_config: QuantizationConfig = DEFAULTCONFIG, - fw_info: FrameworkInfo = DEFAULT_KERAS_INFO, - network_editor: List[EditRule] = [], - gptq_config: GradientPTQConfig = None, - analyze_similarity: bool = False, - target_platform_capabilities: TargetPlatformCapabilities = DEFAULT_KERAS_TPC) -> \ - Tuple[Model, UserInformation]: - """ - Quantize a pretrained Keras model using post-training quantization. By default, the model is quantized - using a symmetric constraint quantization thresholds (power of two) as defined in the default TargetPlatformCapabilities. - The model is first optimized using several transformations (e.g. BatchNormalization folding to - preceding layers). Then, using a given dataset, statistics (e.g. min/max, histogram, etc.) are - being collected for each layer's output (and input, depends on the quantization configuration). - Thresholds are then being calculated using the collected statistics and the model is quantized - (both coefficients and activations by default). - If a gptq_config is passed, the quantized weights are optimized using gradient based post - training quantization by comparing points between the float and quantized models, and minimizing the observed - loss. - - Args: - in_model (Model): Keras model to quantize. - representative_data_gen (Callable): Dataset used for calibration. - n_iter (int): Number of calibration iterations to run. - quant_config (QuantizationConfig): QuantizationConfig containing parameters of how the model should be quantized. `Default configuration. `_ - fw_info (FrameworkInfo): Information needed for quantization about the specific framework (e.g., kernel channels indices, groups of layers by how they should be quantized, etc.). `Default Keras info `_ - network_editor (List[EditRule]): List of EditRules. Each EditRule consists of a node filter and an action to change quantization settings of the filtered nodes. - gptq_config (GradientPTQConfig): Configuration for using gptq (e.g. optimizer). - analyze_similarity (bool): Whether to plot similarity figures within TensorBoard (when logger is enabled) or not. - target_platform_capabilities (TargetPlatformCapabilities): TargetPlatformCapabilities to optimize the Keras model according to. - - Returns: - A quantized model and information the user may need to handle the quantized model. - - Examples: - - Import a Keras model: - - >>> from tensorflow.keras.applications.mobilenet import MobileNet - >>> model = MobileNet() - - Create a random dataset generator: - - >>> import numpy as np - >>> def repr_datagen(): return [np.random.random((1,224,224,3))] - - Import mct and pass the model with the representative dataset generator to get a quantized model: - - >>> import model_compression_toolkit as mct - >>> quantized_model, quantization_info = mct.keras_post_training_quantization(model, repr_datagen, n_iter=1) - - """ - Logger.warning('keras_post_training_quantization is deprecated and will be removed ' - 'in the future. Please use mct.ptq.keras_post_training_quantization_experimental instead.') - - KerasModelValidation(model=in_model, - fw_info=fw_info).validate() - - core_config = CoreConfig(quantization_config=quant_config, - debug_config=DebugConfig(analyze_similarity=analyze_similarity, - network_editor=network_editor) - ) - - tb_w = init_tensorboard_writer(fw_info) - - fw_impl = KerasImplementation() - - # convert old representative dataset generation to a generator - def _representative_data_gen(): - for _ in range(n_iter): - yield representative_data_gen() - - # Ignore initialized hessian service as it is not used here - tg, bit_widths_config, _ = core_runner(in_model=in_model, - representative_data_gen=_representative_data_gen, - core_config=core_config, - fw_info=fw_info, - fw_impl=fw_impl, - tpc=target_platform_capabilities, - tb_w=tb_w) - - if gptq_config is None: - tg = ptq_runner(tg, _representative_data_gen, core_config, fw_info, fw_impl, tb_w) - else: - gptq_config_v2 = GradientPTQConfigV2.from_v1(n_iter, gptq_config) - tg = gptq_runner(tg, core_config, gptq_config_v2, _representative_data_gen, _representative_data_gen, - fw_info, fw_impl, tb_w) - - if core_config.debug_config.analyze_similarity: - analyzer_model_quantization(_representative_data_gen, tb_w, tg, fw_impl, fw_info) - - quantized_model, user_info = export_model(tg, fw_info, fw_impl, tb_w, bit_widths_config) - - return quantized_model, user_info - - - def keras_post_training_quantization_mixed_precision(in_model: Model, - representative_data_gen: Callable, - target_kpi: KPI, - n_iter: int = 500, - quant_config: MixedPrecisionQuantizationConfig = DEFAULT_MIXEDPRECISION_CONFIG, - fw_info: FrameworkInfo = DEFAULT_KERAS_INFO, - network_editor: List[EditRule] = [], - gptq_config: GradientPTQConfig = None, - analyze_similarity: bool = False, - target_platform_capabilities: TargetPlatformCapabilities = DEFAULT_KERAS_TPC) -> \ - Tuple[Model, UserInformation]: - """ - Quantize a pretrained Keras model using post-training quantization. By default, the model is quantized - using a symmetric constraint quantization thresholds (power of two) as defined in the default - TargetPlatformCapabilities. - The model is first optimized using several transformations (e.g. BatchNormalization folding to - preceding layers). Then, using a given dataset, statistics (e.g. min/max, histogram, etc.) are - being collected for each layer's output (and input, depends on the quantization configuration). - For each possible bit width (per operator, as defined in the TargetPlatformCapabilities) a - threshold is then being calculated using the collected statistics. - Then, using an ILP solver we find a mixed-precision configuration, and set a bit width - for each quantizer (for both activations and weights quantizers, by default). - In order to limit the maximal model's size, a target KPI need to be passed after weights_memory - or activation_memory (or both) is set (in bytes). - The model is then quantized (both coefficients and activations by default). - If gptq_config is passed, the quantized weights are optimized using gradient based post - training quantization by comparing points between the float and quantized models, and minimizing the - observed loss. - Notice that this feature is experimental. - - Args: - in_model (Model): Keras model to quantize. - representative_data_gen (Callable): Dataset used for calibration. - target_kpi (KPI): KPI object to limit the search of the mixed-precision configuration as desired. - n_iter (int): Number of calibration iterations to run. - quant_config (MixedPrecisionQuantizationConfig): QuantizationConfig containing parameters of how the model should be quantized. - fw_info (FrameworkInfo): Information needed for quantization about the specific framework (e.g., kernel channels indices, groups of layers by how they should be quantized, etc.). `Default Keras info `_ - network_editor (List[EditRule]): List of EditRules. Each EditRule consists of a node filter and an action to change quantization settings of the filtered nodes. - gptq_config (GradientPTQConfig): Configuration for using GPTQ (e.g. optimizer). - analyze_similarity (bool): Whether to plot similarity figures within TensorBoard (when logger is enabled) or not. - target_platform_capabilities (TargetPlatformCapabilities): TargetPlatformCapabilities to optimize the Keras model according to. - - - Returns: - A quantized model and information the user may need to handle the quantized model. - - Examples: - - Import MCT: - - >>> import model_compression_toolkit as mct - - Import a Keras model: - - >>> from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2 - >>> model = MobileNetV2() - - Create a random dataset generator: - - >>> import numpy as np - >>> def repr_datagen(): return [np.random.random((1,224,224,3))] - - Create a mixed-precision configuration, to quantize a model with different bitwidths for different layers. - The candidates bitwidth for quantization should be defined in the target platform model: - - >>> config = mct.core.MixedPrecisionQuantizationConfig() - - Create a KPI object to limit our returned model's size. Note that this value affects only coefficients - that should be quantized (for example, the kernel of Conv2D in Keras will be affected by this value, - while the bias will not): - - >>> kpi = mct.core.KPI(model.count_params() * 0.75) # About 0.75 of the model size when quantized with 8 bits. - - Pass the model, the representative dataset generator, the configuration and the target KPI to get a - quantized model: - - >>> quantized_model, quantization_info = mct.keras_post_training_quantization_mixed_precision(model,repr_datagen, target_kpi=kpi, n_iter=10, quant_config=config) - - For more configuration options, please take a look at our `API documentation `_. - - """ - Logger.warning('keras_post_training_quantization_mixed_precision is deprecated and will be removed ' - 'in the future. Please use mct.ptq.keras_post_training_quantization_experimental instead.') - - KerasModelValidation(model=in_model, - fw_info=fw_info).validate() - - if not isinstance(quant_config, MixedPrecisionQuantizationConfig): - Logger.error("Given quantization config to mixed-precision facade is not of type " - "MixedPrecisionQuantizationConfig. Please use keras_post_training_quantization API," - "or pass a valid mixed precision configuration.") - - Logger.info("Using experimental mixed-precision quantization. " - "If you encounter an issue please file a bug.") - - quantization_config, mp_config = quant_config.separate_configs() - core_config = CoreConfig(quantization_config=quantization_config, - mixed_precision_config=mp_config, - debug_config=DebugConfig(analyze_similarity=analyze_similarity, - network_editor=network_editor) - ) - - tb_w = init_tensorboard_writer(fw_info) - - fw_impl = KerasImplementation() - - # convert old representative dataset generation to a generator - def _representative_data_gen(): - for _ in range(n_iter): - yield representative_data_gen() - - # Ignore hessian info service since it is not used here - tg, bit_widths_config, _ = core_runner(in_model=in_model, - representative_data_gen=_representative_data_gen, - core_config=core_config, - fw_info=fw_info, - fw_impl=fw_impl, - tpc=target_platform_capabilities, - target_kpi=target_kpi, - tb_w=tb_w) - - if gptq_config is None: - tg = ptq_runner(tg, _representative_data_gen, core_config, fw_info, fw_impl, tb_w) - else: - gptq_config_v2 = GradientPTQConfigV2.from_v1(n_iter, gptq_config) - tg = gptq_runner(tg, - core_config, - gptq_config_v2, - _representative_data_gen, - _representative_data_gen, - fw_info, - fw_impl, - tb_w) - - if core_config.debug_config.analyze_similarity: - analyzer_model_quantization(_representative_data_gen, tb_w, tg, fw_impl, fw_info) - - quantized_model, user_info = export_model(tg, fw_info, fw_impl, tb_w, bit_widths_config) - - return quantized_model, user_info - -else: - # If tensorflow is not installed, - # we raise an exception when trying to use these functions. - def keras_post_training_quantization(*args, **kwargs): - Logger.critical('Installing tensorflow is mandatory ' - 'when using keras_post_training_quantization. ' - 'Could not find Tensorflow package.') # pragma: no cover - - - def keras_post_training_quantization_mixed_precision(*args, **kwargs): - Logger.critical('Installing tensorflow is mandatory ' - 'when using keras_post_training_quantization_mixed_precision. ' - 'Could not find Tensorflow package.') # pragma: no cover diff --git a/model_compression_toolkit/legacy/pytorch_quantization_facade.py b/model_compression_toolkit/legacy/pytorch_quantization_facade.py deleted file mode 100644 index a28b9d7d7..000000000 --- a/model_compression_toolkit/legacy/pytorch_quantization_facade.py +++ /dev/null @@ -1,284 +0,0 @@ -# Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -from typing import Callable, List, Tuple - -from model_compression_toolkit.core.common.visualization.tensorboard_writer import init_tensorboard_writer -from model_compression_toolkit.logger import Logger -from model_compression_toolkit.constants import PYTORCH -from model_compression_toolkit.core.common.user_info import UserInformation -from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig, GradientPTQConfigV2 -from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities -from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI -from model_compression_toolkit.core.common.framework_info import FrameworkInfo -from model_compression_toolkit.core.common.network_editors.actions import EditRule -from model_compression_toolkit.core.common.quantization.core_config import CoreConfig -from model_compression_toolkit.core.common.quantization.debug_config import DebugConfig -from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import \ - MixedPrecisionQuantizationConfig, DEFAULT_MIXEDPRECISION_CONFIG -from model_compression_toolkit.core.common.quantization.quantization_config import QuantizationConfig -from model_compression_toolkit.core.common.quantization.quantization_config import DEFAULTCONFIG -from model_compression_toolkit.core.runner import core_runner -from model_compression_toolkit.gptq.runner import gptq_runner -from model_compression_toolkit.ptq.runner import ptq_runner -from model_compression_toolkit.core.exporter import export_model -from model_compression_toolkit.core.analyzer import analyzer_model_quantization -from model_compression_toolkit.constants import FOUND_TORCH - -if FOUND_TORCH: - from model_compression_toolkit.core.pytorch.default_framework_info import DEFAULT_PYTORCH_INFO - from model_compression_toolkit.core.pytorch.pytorch_implementation import PytorchImplementation - from model_compression_toolkit.target_platform_capabilities.constants import DEFAULT_TP_MODEL - from torch.nn import Module - - from model_compression_toolkit import get_target_platform_capabilities - DEFAULT_PYTORCH_TPC = get_target_platform_capabilities(PYTORCH, DEFAULT_TP_MODEL) - - def pytorch_post_training_quantization(in_module: Module, - representative_data_gen: Callable, - n_iter: int = 500, - quant_config: QuantizationConfig = DEFAULTCONFIG, - fw_info: FrameworkInfo = DEFAULT_PYTORCH_INFO, - network_editor: List[EditRule] = [], - gptq_config: GradientPTQConfig = None, - analyze_similarity: bool = False, - target_platform_capabilities: TargetPlatformCapabilities = DEFAULT_PYTORCH_TPC) -> Tuple[Module, UserInformation]: - """ - Quantize a trained Pytorch module using post-training quantization. - By default, the module is quantized using a symmetric constraint quantization thresholds - (power of two) as defined in the default TargetPlatformCapabilities. - The module is first optimized using several transformations (e.g. BatchNormalization folding to - preceding layers). Then, using a given dataset, statistics (e.g. min/max, histogram, etc.) are - being collected for each layer's output (and input, depends on the quantization configuration). - Thresholds are then being calculated using the collected statistics and the module is quantized - (both coefficients and activations by default). - If gptq_config is passed, the quantized weights are optimized using gradient based post - training quantization by comparing points between the float and quantized modules, and minimizing the - observed loss. - - Args: - in_module (Module): Pytorch module to quantize. - representative_data_gen (Callable): Dataset used for calibration. - n_iter (int): Number of calibration iterations to run. - quant_config (QuantizationConfig): QuantizationConfig containing parameters of how the module should be quantized. `Default configuration. `_ - fw_info (FrameworkInfo): Information needed for quantization about the specific framework (e.g., kernel channels indices, groups of layers by how they should be quantized, etc.). `Default PyTorch info `_ - network_editor (List[EditRule]): List of EditRules. Each EditRule consists of a node filter and an action to change quantization settings of the filtered nodes. - gptq_config (GradientPTQConfig): Configuration for using gptq (e.g. optimizer). - analyze_similarity (bool): Whether to plot similarity figures within TensorBoard (when logger is enabled) or not. - target_platform_capabilities (TargetPlatformCapabilities): TargetPlatformCapabilities to optimize the PyTorch model according to. - - - Returns: - A quantized module and information the user may need to handle the quantized module. - - Examples: - - Import a Pytorch module: - - >>> import torchvision.models.mobilenet_v2 as models - >>> module = models.mobilenet_v2() - - Create a random dataset generator: - - >>> import numpy as np - >>> def repr_datagen(): return [np.random.random((1,224,224,3))] - - Import mct and pass the module with the representative dataset generator to get a quantized module: - - >>> import model_compression_toolkit as mct - >>> quantized_module, quantization_info = mct.pytorch_post_training_quantization(module, repr_datagen) - - """ - Logger.warning('pytorch_post_training_quantization is deprecated and will be removed ' - 'in the future. Please use mct.ptq.pytorch_post_training_quantization_experimental instead.') - - core_config = CoreConfig(quant_config, - debug_config=DebugConfig(analyze_similarity=analyze_similarity, - network_editor=network_editor)) - - tb_w = init_tensorboard_writer(fw_info) - - fw_impl = PytorchImplementation() - - # convert old representative dataset generation to a generator - def _representative_data_gen(): - for _ in range(n_iter): - yield representative_data_gen() - - # Ignore trace hessian service as we do not use it here - tg, bit_widths_config, _ = core_runner(in_model=in_module, - representative_data_gen=_representative_data_gen, - core_config=core_config, - fw_info=fw_info, - fw_impl=fw_impl, - tpc=target_platform_capabilities, - tb_w=tb_w) - - if gptq_config is None: - tg = ptq_runner(tg, _representative_data_gen, core_config, fw_info, fw_impl, tb_w) - else: - gptq_config_v2 = GradientPTQConfigV2.from_v1(n_iter, gptq_config) - tg = gptq_runner(tg, core_config, gptq_config_v2, _representative_data_gen, _representative_data_gen, - fw_info, fw_impl, tb_w) - - if core_config.debug_config.analyze_similarity: - analyzer_model_quantization(_representative_data_gen, tb_w, tg, fw_impl, fw_info) - - quantized_model, user_info = export_model(tg, fw_info, fw_impl, tb_w, bit_widths_config) - - return quantized_model, user_info - - - def pytorch_post_training_quantization_mixed_precision(in_model: Module, - representative_data_gen: Callable, - target_kpi: KPI, - n_iter: int = 500, - quant_config: MixedPrecisionQuantizationConfig = DEFAULT_MIXEDPRECISION_CONFIG, - fw_info: FrameworkInfo = DEFAULT_PYTORCH_INFO, - network_editor: List[EditRule] = [], - gptq_config: GradientPTQConfig = None, - analyze_similarity: bool = False, - target_platform_capabilities: TargetPlatformCapabilities = DEFAULT_PYTORCH_TPC) -> Tuple[Module, UserInformation]: - """ - Quantize a pretrained Pytorch model using post-training quantization. By default, the model is - quantized using a symmetric constraint quantization thresholds (power of two) as defined in the - default TargetPlatformCapabilities. - The model is first optimized using several transformations (e.g. BatchNormalization folding to - preceding layers). Then, using a given dataset, statistics (e.g. min/max, histogram, etc.) are - being collected for each layer's output (and input, depends on the quantization configuration). - For each possible bit width (per layer) a threshold is then being calculated using the collected - statistics. Then, using an ILP solver we find a mixed-precision configuration, and set a bit width - for each quantizer (for both activations and weights quantizers, by default). - In order to limit the maximal model's size, a target KPI need to be passed after weights_memory - is set (in bytes). - The model is then quantized (both coefficients and activations by default). - If gptq_config is passed, the quantized weights are optimized using gradient based post - training quantization by comparing points between the float and quantized models, and minimizing the - observed loss. - Notice that this feature is experimental. - - Args: - in_model (Model): Pytorch model to quantize. - representative_data_gen (Callable): Dataset used for calibration. - target_kpi (KPI): KPI object to limit the search of the mixed-precision configuration as desired. - n_iter (int): Number of calibration iterations to run. - quant_config (MixedPrecisionQuantizationConfig): QuantizationConfig containing parameters of how the model should be quantized. - fw_info (FrameworkInfo): Information needed for quantization about the specific framework (e.g., kernel channels indices, groups of layers by how they should be quantized, etc.). `Default PyTorch info `_ - network_editor (List[EditRule]): List of EditRules. Each EditRule consists of a node filter and an action to change quantization settings of the filtered nodes. - gptq_config (GradientPTQConfig): Configuration for using GPTQ (e.g. optimizer). - analyze_similarity (bool): Whether to plot similarity figures within TensorBoard (when logger is enabled) or not. - target_platform_capabilities (TargetPlatformCapabilities): TargetPlatformCapabilities to optimize the PyTorch model according to. - - Returns: - A quantized model and information the user may need to handle the quantized model. - - Examples: - - Import MCT: - - >>> import model_compression_toolkit as mct - - Import a Pytorch model: - - >>> import torchvision.models.mobilenet_v2 as models - >>> module = models.mobilenet_v2() - - Create a random dataset generator: - - >>> import numpy as np - >>> def repr_datagen(): return [np.random.random((1,224,224,3))] - - Create a mixed-precision configuration, to quantize a model with different bitwidths for different layers. - The candidates bitwidth for quantization should be defined in the target platform model: - - >>> config = mct.core.MixedPrecisionQuantizationConfig() - - Create a KPI object to limit our returned model's size. Note that this value affects only coefficients - that should be quantized (for example, the kernel of Conv2D in PyTorch will be affected by this value, - while the bias will not): - - >>> kpi = mct.core.KPI(sum(p.numel() for p in module.parameters()) * 0.75) # About 0.75 of the model size when quantized with 8 bits. - - Pass the model, the representative dataset generator, the configuration and the target KPI to get a - quantized model: - - >>> quantized_model, quantization_info = mct.pytorch_post_training_quantization_mixed_precision(module, repr_datagen, n_iter=10, quant_config=config, target_kpi=kpi) - - For more configuration options, please take a look at our `API documentation `_. - - """ - - Logger.warning('pytorch_post_training_quantization_mixed_precision is deprecated and will be removed ' - 'in the future. Please use mct.ptq.pytorch_post_training_quantization_experimental instead.') - - if not isinstance(quant_config, MixedPrecisionQuantizationConfig): - Logger.error("Given quantization config to mixed-precision facade is not of type " - "MixedPrecisionQuantizationConfig. Please use pytorch_post_training_quantization API, " - "or pass a valid mixed precision configuration.") - - Logger.info("Using experimental mixed-precision quantization. " - "If you encounter an issue please file a bug.") - - quantization_config, mp_config = quant_config.separate_configs() - core_config = CoreConfig(quantization_config=quantization_config, - mixed_precision_config=mp_config, - debug_config=DebugConfig(analyze_similarity=analyze_similarity, - network_editor=network_editor)) - - tb_w = init_tensorboard_writer(fw_info) - - fw_impl = PytorchImplementation() - - # convert old representative dataset generation to a generator - def _representative_data_gen(): - for _ in range(n_iter): - yield representative_data_gen() - - # Ignore hessian service as it is not used here - tg, bit_widths_config, _ = core_runner(in_model=in_model, - representative_data_gen=_representative_data_gen, - core_config=core_config, - fw_info=fw_info, - fw_impl=fw_impl, - tpc=target_platform_capabilities, - target_kpi=target_kpi, - tb_w=tb_w) - - if gptq_config is None: - tg = ptq_runner(tg, _representative_data_gen, core_config, fw_info, fw_impl, tb_w) - else: - gptq_config_v2 = GradientPTQConfigV2.from_v1(n_iter, gptq_config) - tg = gptq_runner(tg, core_config, gptq_config_v2, _representative_data_gen, _representative_data_gen, - fw_info, fw_impl, tb_w) - - if core_config.debug_config.analyze_similarity: - analyzer_model_quantization(_representative_data_gen, tb_w, tg, fw_impl, fw_info) - - quantized_model, user_info = export_model(tg, fw_info, fw_impl, tb_w, bit_widths_config) - - return quantized_model, user_info - - -else: - # If torch is not installed, - # we raise an exception when trying to use these functions. - def pytorch_post_training_quantization(*args, **kwargs): - Logger.critical('Installing Pytorch is mandatory ' - 'when using pytorch_post_training_quantization. ' - 'Could not find the torch package.') # pragma: no cover - - def pytorch_post_training_quantization_mixed_precision(*args, **kwargs): - Logger.critical('Installing tensorflow is mandatory ' - 'when using pytorch_post_training_quantization_mixed_precision. ' - 'Could not find Tensorflow package.') # pragma: no cover diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/old_api_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/old_api_test.py deleted file mode 100644 index 4e93706be..000000000 --- a/tests/keras_tests/feature_networks_tests/feature_networks/old_api_test.py +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -import tensorflow as tf -import numpy as np - -import model_compression_toolkit as mct -from tests.common_tests.helpers.generate_test_tp_model import generate_test_op_qc, generate_test_attr_configs -from tests.keras_tests.feature_networks_tests.base_keras_feature_test import BaseKerasFeatureNetworkTest -from tests.keras_tests.tpc_keras import get_weights_only_mp_tpc_keras -from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import get_op_quantization_configs - - -keras = tf.keras -layers = keras.layers - - -class OldApiTest(BaseKerasFeatureNetworkTest): - def __init__(self, unit_test, mp_enable=False, gptq_enable=False): - super().__init__(unit_test, val_batch_size=1, num_calibration_iter=100) - self.mp_enable = mp_enable - self.gptq_enable = gptq_enable - - def get_tpc(self): - base_config = generate_test_op_qc(activation_n_bits=16, - **generate_test_attr_configs(default_cfg_nbits=16, - kernel_cfg_nbits=16)) - - default_config = base_config.clone_and_edit(attr_weights_configs_mapping={}) - - return get_weights_only_mp_tpc_keras(base_config=base_config, - default_config=default_config, - mp_bitwidth_candidates_list=[(8, 16), (2, 16), (4, 16), (16, 16)], - name="old_api_test") - - def get_kpi(self): - return mct.KPI() - - def create_networks(self): - inputs = layers.Input(shape=self.get_input_shapes()[0][1:]) - outputs = layers.Conv2D(1, 1)(inputs) - outputs = layers.ReLU()(outputs) - return keras.Model(inputs=inputs, outputs=outputs) - - def run_test(self, experimental_exporter=False): - model_float = self.create_networks() - core_config = self.get_core_config() - quant_config = core_config.quantization_config - gptq_config = mct.GradientPTQConfig(1, keras.optimizers.Adam(learning_rate=1e-12)) if self.gptq_enable else None - if self.mp_enable: - quant_config = mct.MixedPrecisionQuantizationConfig(quant_config, num_of_images=1) - facade_fn = mct.keras_post_training_quantization_mixed_precision - ptq_model, quantization_info = facade_fn(model_float, - self.representative_data_gen, - self.get_kpi(), - n_iter=self.num_calibration_iter, - quant_config=quant_config, - gptq_config=gptq_config, - target_platform_capabilities=self.get_tpc(), - ) - else: - facade_fn = mct.keras_post_training_quantization - ptq_model, quantization_info = facade_fn(model_float, - self.representative_data_gen, - n_iter=self.num_calibration_iter, - quant_config=quant_config, - gptq_config=gptq_config, - target_platform_capabilities=self.get_tpc(), - ) - - self.compare(ptq_model, model_float, input_x=self.representative_data_gen(), - quantization_info=quantization_info) - - def compare(self, quant_model, float_model, input_x=None, quantization_info=None): - out_float = float_model(input_x[0]).numpy() - out_quant = quant_model(input_x[0]).numpy() - self.unit_test.assertTrue(np.isclose(np.linalg.norm(np.abs(out_float-out_quant)), 0, atol=0.01)) diff --git a/tests/keras_tests/feature_networks_tests/test_features_runner.py b/tests/keras_tests/feature_networks_tests/test_features_runner.py index 0ef94b2ec..294c9be2c 100644 --- a/tests/keras_tests/feature_networks_tests/test_features_runner.py +++ b/tests/keras_tests/feature_networks_tests/test_features_runner.py @@ -124,7 +124,6 @@ MixedPercisionSearchKPI4BitsAvgTest, MixedPercisionSearchKPI2BitsAvgTest, MixedPrecisionActivationDisabled, \ MixedPercisionSearchLastLayerDistanceTest, MixedPercisionSearchActivationKPINonConfNodesTest, \ MixedPercisionSearchTotalKPINonConfNodesTest, MixedPercisionSearchPartWeightsLayersTest, MixedPercisionCombinedNMSTest -from tests.keras_tests.feature_networks_tests.feature_networks.old_api_test import OldApiTest from tests.keras_tests.feature_networks_tests.feature_networks.matmul_substitution_test import MatmulToDenseSubstitutionTest from tests.keras_tests.feature_networks_tests.feature_networks.const_representation_test import ConstRepresentationTest, \ ConstRepresentationMultiInputTest, ConstRepresentationMatMulTest @@ -744,12 +743,6 @@ def test_qat(self): QATWrappersMixedPrecisionCfgTest(self).run_test() QATWrappersMixedPrecisionCfgTest(self,kpi_weights=17920 * 4 / 8, kpi_activation=5408 * 4 / 8, expected_mp_cfg=[0, 4, 1, 1]).run_test() - def test_old_api(self): - OldApiTest(self).run_test() - OldApiTest(self, mp_enable=True).run_test() - OldApiTest(self, mp_enable=True, gptq_enable=True).run_test() - OldApiTest(self, gptq_enable=True).run_test() - if __name__ == '__main__': unittest.main() diff --git a/tests/pytorch_tests/model_tests/feature_models/old_api_test.py b/tests/pytorch_tests/model_tests/feature_models/old_api_test.py deleted file mode 100644 index f4b008b41..000000000 --- a/tests/pytorch_tests/model_tests/feature_models/old_api_test.py +++ /dev/null @@ -1,122 +0,0 @@ -# Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -import torch -import numpy as np - -import model_compression_toolkit as mct -from model_compression_toolkit.core.pytorch.utils import to_torch_tensor, torch_tensor_to_numpy, set_model -from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, WEIGHTS_N_BITS -from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1.tp_model import generate_tp_model -from tests.pytorch_tests.model_tests.base_pytorch_test import BasePytorchTest -from tests.pytorch_tests.tpc_pytorch import get_pytorch_test_tpc_dict -from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import get_op_quantization_configs - - -class OneLayerConv2dNet(torch.nn.Module): - def __init__(self): - super(OneLayerConv2dNet, self).__init__() - self.conv1 = torch.nn.Conv2d(3, 3, 1, bias=True) - self.relu = torch.nn.ReLU() - - def forward(self, x): - x = self.conv1(x) - x = self.relu(x) - return x - - -class OldApiTest(BasePytorchTest): - def __init__(self, unit_test, mp_enable=False, gptq_enable=False): - super().__init__(unit_test) - self.num_calibration_iter = 100 - self.mp_enable = mp_enable - self.gptq_enable = gptq_enable - self.input_shape = [(1, 3, 8, 8)] - - def get_mp_tpc(self): - base_config, _, default_config = get_op_quantization_configs() - base_config = base_config.clone_and_edit(attr_to_edit={KERNEL_ATTR: {WEIGHTS_N_BITS: 16}}, - activation_n_bits=16) - default_config = default_config.clone_and_edit(activation_n_bits=16) - - mp_bitwidth_candidates_list = [(8, 16), (2, 16), (4, 16), (16, 16)] - mp_op_cfg_list = [] - for weights_n_bits, activation_n_bits in mp_bitwidth_candidates_list: - candidate_cfg = base_config.clone_and_edit(attr_to_edit={KERNEL_ATTR: {WEIGHTS_N_BITS: weights_n_bits}}, - activation_n_bits=activation_n_bits) - mp_op_cfg_list.append(candidate_cfg) - - tp_model = generate_tp_model(default_config=default_config, - base_config=base_config, - mixed_precision_cfg_list=mp_op_cfg_list, - name='default_tp_model') - return get_pytorch_test_tpc_dict(tp_model=tp_model, - test_name='mixed_precision_model', - ftp_name='mixed_precision_pytorch_test') - - def get_mp_quant_config(self): - qc = mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.MSE, - mct.core.QuantizationErrorMethod.MSE, - weights_bias_correction=True, - activation_channel_equalization=False, - relu_bound_to_power_of_2=False, - input_scaling=False) - return mct.core.MixedPrecisionQuantizationConfig(qc, num_of_images=1) - - def get_kpi(self): - return mct.core.KPI() - - def create_networks(self): - return OneLayerConv2dNet() - - def get_gptq_config(self): - return mct.gptq.GradientPTQConfig(5, optimizer=torch.optim.Adam([torch.Tensor([])], lr=1e-14)) - - def generate_inputs(self): - return to_torch_tensor([torch.randn(*self.input_shape) for in_shape in self.input_shape]) - - def run_test(self, experimental_exporter=False): - model_float = self.create_networks() - core_config = self.get_core_config() - quant_config = core_config.quantization_config - gptq_config = self.get_gptq_config() if self.gptq_enable else None - if self.mp_enable: - quant_config = self.get_mp_quant_config() - facade_fn = mct.pytorch_post_training_quantization_mixed_precision - ptq_model, quantization_info = facade_fn(model_float, - self.representative_data_gen, - self.get_kpi(), - n_iter=self.num_calibration_iter, - quant_config=quant_config, - gptq_config=gptq_config, - target_platform_capabilities=self.get_mp_tpc()['mixed_precision_model'], - ) - else: - facade_fn = mct.pytorch_post_training_quantization - ptq_model, quantization_info = facade_fn(model_float, - self.representative_data_gen, - n_iter=self.num_calibration_iter, - quant_config=quant_config, - gptq_config=gptq_config, - target_platform_capabilities=self.get_tpc()['all_32bit'], - ) - - self.compare(ptq_model, model_float, input_x=self.representative_data_gen(), - quantization_info=quantization_info) - - def compare(self, quant_model, float_model, input_x=None, quantization_info=None): - set_model(float_model) - out_float = torch_tensor_to_numpy(float_model(input_x[0])) - out_quant = torch_tensor_to_numpy(quant_model(input_x[0])) - self.unit_test.assertTrue(np.isclose(np.linalg.norm(np.abs(out_float-out_quant)), 0, atol=0.01)) diff --git a/tests/pytorch_tests/model_tests/test_feature_models_runner.py b/tests/pytorch_tests/model_tests/test_feature_models_runner.py index c4e5ac7ea..135b7ce0a 100644 --- a/tests/pytorch_tests/model_tests/test_feature_models_runner.py +++ b/tests/pytorch_tests/model_tests/test_feature_models_runner.py @@ -82,7 +82,6 @@ GPTQLearnRateZeroTest from tests.pytorch_tests.model_tests.feature_models.uniform_activation_test import \ UniformActivationTest -from tests.pytorch_tests.model_tests.feature_models.old_api_test import OldApiTest from tests.pytorch_tests.model_tests.feature_models.const_representation_test import ConstRepresentationTest, \ ConstRepresentationMultiInputTest from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod @@ -570,12 +569,6 @@ def test_qat(self): QuantizationAwareTrainingMixedPrecisionCfgTest(self).run_test() QuantizationAwareTrainingMixedPrecisionKpiCfgTest(self).run_test() - def test_old_api(self): - OldApiTest(self).run_test() - OldApiTest(self, mp_enable=True).run_test() - OldApiTest(self, mp_enable=True, gptq_enable=True).run_test() - OldApiTest(self, gptq_enable=True).run_test() - if __name__ == '__main__': unittest.main()