diff --git a/docsrc/images/tpc_diagram.png b/docsrc/images/tpc_diagram.png new file mode 100644 index 000000000..3f172b5e9 Binary files /dev/null and b/docsrc/images/tpc_diagram.png differ diff --git a/docsrc/source/api/api_docs/classes/GradientPTQConfig.rst b/docsrc/source/api/api_docs/classes/GradientPTQConfig.rst index c14ec1e7c..711c4f2d1 100644 --- a/docsrc/source/api/api_docs/classes/GradientPTQConfig.rst +++ b/docsrc/source/api/api_docs/classes/GradientPTQConfig.rst @@ -8,7 +8,7 @@ GradientPTQConfig Class ================================= -**The following API can be used to create a GradientPTQConfig instance which can be used for post training quantization using knowledge distillation from a teacher (float Keras model) to a student (the quantized Keras model)** +**The following API can be used to create a GradientPTQConfig instance which can be used for post training quantization using knowledge distillation from a teacher (float model) to a student (the quantized model)** .. autoclass:: model_compression_toolkit.gptq.GradientPTQConfig :members: @@ -30,3 +30,22 @@ RoundingType .. autoclass:: model_compression_toolkit.gptq.RoundingType :members: + + +===================================== +GradualActivationQuantizationConfig +===================================== + +**The following API can be used to configure the gradual activation quantization when using GPTQ.** + +.. autoclass:: model_compression_toolkit.gptq.GradualActivationQuantizationConfig + :members: + + +===================================== +QFractionLinearAnnealingConfig +===================================== + +.. autoclass:: model_compression_toolkit.gptq.QFractionLinearAnnealingConfig + :members: + diff --git a/docsrc/source/api/api_docs/modules/target_platform_capabilities.rst b/docsrc/source/api/api_docs/modules/target_platform_capabilities.rst index bbd025ff7..5e0dd9252 100644 --- a/docsrc/source/api/api_docs/modules/target_platform_capabilities.rst +++ b/docsrc/source/api/api_docs/modules/target_platform_capabilities.rst @@ -27,7 +27,7 @@ Models for IMX500, TFLite and qnnpack can be observed `here Kernel, Bias"] + + OQC1 -->|contains| AQC + OQC2 -->|contains| AQC + OQC3 -->|contains| AQC + end + + Mixed -->|contains| OQC1 + Mixed -->|contains| OQC2 + Mixed -->|contains| OQC3 + end + + subgraph OPS["Operators Sets"] + Conv["Conv, Conv Transpose,
Depthwise Conv"] + Act["ReLU, ReLU6,
Leaky ReLU, etc."] + NoQuantOps["Dropout, Flatten,
Reshape, etc."] + end + + subgraph FP["Fusing Patterns"] + FP1["Conv + Activation"] + end + + Mixed -->|attached to| Conv + Default -->|attached to| Act + NoQuant -->|attached to| NoQuantOps + + FP1 -.-> Conv + FP1 -.-> Act + end + + style TPC fill:#e6f3ff,stroke:#333 + style QCO fill:#e6ffe6,stroke:#333 + style OQC fill:#fff9e6,stroke:#333 + style OPS fill:#ffe6e6,stroke:#333 + style FP fill:#ffe6f0,stroke:#333 diff --git a/model_compression_toolkit/core/common/quantization/quantization_config.py b/model_compression_toolkit/core/common/quantization/quantization_config.py index 5c8df6b33..8591e6f13 100644 --- a/model_compression_toolkit/core/common/quantization/quantization_config.py +++ b/model_compression_toolkit/core/common/quantization/quantization_config.py @@ -78,9 +78,6 @@ class QuantizationConfig: >>> qc = mct.core.QuantizationConfig(activation_error_method=mct.core.QuantizationErrorMethod.NOCLIPPING, weights_error_method=mct.core.QuantizationErrorMethod.MSE, relu_bound_to_power_of_2=True, weights_bias_correction=True) - The QuantizationConfig instance can then be used in the quantization workflow, - such as with Keras in the function: :func:~model_compression_toolkit.ptq.keras_post_training_quantization`. - """ activation_error_method: QuantizationErrorMethod = QuantizationErrorMethod.MSE diff --git a/model_compression_toolkit/gptq/pytorch/quantization_facade.py b/model_compression_toolkit/gptq/pytorch/quantization_facade.py index c16ec0ab6..0ba7581a3 100644 --- a/model_compression_toolkit/gptq/pytorch/quantization_facade.py +++ b/model_compression_toolkit/gptq/pytorch/quantization_facade.py @@ -77,6 +77,7 @@ def get_pytorch_gptq_config(n_epochs: int, regularization_factor (float): A floating point number that defines the regularization factor. hessian_batch_size (int): Batch size for Hessian computation in Hessian-based weights GPTQ. use_hessian_sample_attention (bool): whether to use Sample-Layer Attention score for weighted loss. + gradual_activation_quantization (bool, GradualActivationQuantizationConfig): If False, GradualActivationQuantization is disabled. If True, GradualActivationQuantization is enabled with the default settings. GradualActivationQuantizationConfig object can be passed to use non-default settings. returns: a GradientPTQConfig object to use when fine-tuning the quantized model using gptq. diff --git a/model_compression_toolkit/gptq/pytorch/quantizer/quantization_builder.py b/model_compression_toolkit/gptq/pytorch/quantizer/quantization_builder.py index ba5e590c1..f68b72d0c 100644 --- a/model_compression_toolkit/gptq/pytorch/quantizer/quantization_builder.py +++ b/model_compression_toolkit/gptq/pytorch/quantizer/quantization_builder.py @@ -16,12 +16,9 @@ from model_compression_toolkit.gptq import GradientPTQConfig from model_compression_toolkit.core import common -from model_compression_toolkit.exporter.model_wrapper.pytorch.builder.node_to_quantizer import \ - get_activation_inferable_quantizer_kwargs from model_compression_toolkit.gptq.pytorch.quantizer.base_pytorch_gptq_quantizer import \ BasePytorchGPTQTrainableQuantizer from mct_quantizers import QuantizationTarget -from mct_quantizers.common.get_quantizers import get_inferable_quantizer_class from mct_quantizers.pytorch.quantizers import BasePyTorchInferableQuantizer from model_compression_toolkit.logger import Logger diff --git a/model_compression_toolkit/qat/pytorch/quantization_facade.py b/model_compression_toolkit/qat/pytorch/quantization_facade.py index 938471034..f58215ed7 100644 --- a/model_compression_toolkit/qat/pytorch/quantization_facade.py +++ b/model_compression_toolkit/qat/pytorch/quantization_facade.py @@ -110,7 +110,6 @@ def pytorch_quantization_aware_training_init_experimental(in_model: Module, User information that may be needed to handle the quantized model. Examples: - Import MCT: >>> import model_compression_toolkit as mct @@ -120,21 +119,19 @@ def pytorch_quantization_aware_training_init_experimental(in_model: Module, >>> from torchvision.models import mobilenet_v2 >>> model = mobilenet_v2(pretrained=True) - Create a random dataset generator, for required number of calibration iterations (num_calibration_batches): - In this example a random dataset of 10 batches each containing 4 images is used. + Create a random dataset generator, for required number of calibration iterations (num_calibration_batches). In this example, a random dataset of 10 batches each containing 4 images is used: - >>> import numpy as np - >>> num_calibration_batches = 10 - >>> def repr_datagen(): - >>> for _ in range(num_calibration_batches): - >>> yield [np.random.random((4, 3, 224, 224))] + >>> import numpy as np + >>> num_calibration_batches = 10 + >>> def repr_datagen(): + >>> for _ in range(num_calibration_batches): + >>> yield [np.random.random((4, 3, 224, 224))] Create a MCT core config, containing the quantization configuration: >>> config = mct.core.CoreConfig() - Pass the model, the representative dataset generator, the configuration and the target resource utilization to get a - quantized model. Now the model contains quantizer wrappers for fine tunning the weights: + Pass the model, the representative dataset generator, the configuration and the target resource utilization to get a quantized model. Now the model contains quantizer wrappers for fine tunning the weights: >>> quantized_model, quantization_info = mct.qat.pytorch_quantization_aware_training_init_experimental(model, repr_datagen, core_config=config) @@ -149,8 +146,8 @@ def pytorch_quantization_aware_training_init_experimental(in_model: Module, if core_config.is_mixed_precision_enabled: if not isinstance(core_config.mixed_precision_config, MixedPrecisionQuantizationConfig): Logger.critical("Given quantization config to mixed-precision facade is not of type " - "MixedPrecisionQuantizationConfig. Please use pytorch_post_training_quantization API," - "or pass a valid mixed precision configuration.") + "MixedPrecisionQuantizationConfig. Please use pytorch_post_training_quantization API," + "or pass a valid mixed precision configuration.") tb_w = init_tensorboard_writer(DEFAULT_PYTORCH_INFO) fw_impl = PytorchImplementation()