From 79a800ca901d408b92036486493fd59ace810536 Mon Sep 17 00:00:00 2001 From: reuvenp Date: Thu, 7 Mar 2024 16:55:01 +0200 Subject: [PATCH 1/9] remove tfmot - docsrc --- docsrc/source/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docsrc/source/index.rst b/docsrc/source/index.rst index bd91fe81f..c6a9b0be0 100644 --- a/docsrc/source/index.rst +++ b/docsrc/source/index.rst @@ -41,7 +41,7 @@ A nightly version is also available (unstable): For using with Tensorflow please install the packages: `tensorflow `_ -`tensorflow-model-optimization `_ + For using with Pytorch please install the package: `torch `_ From 3dd18320fec3ec847bd872858be4c0ae59e64002 Mon Sep 17 00:00:00 2001 From: reuvenp Date: Thu, 7 Mar 2024 17:11:20 +0200 Subject: [PATCH 2/9] update mct internal packaging - docsrc --- .../api/api_docs/methods/keras_kpi_data.rst | 2 +- .../keras_post_training_quantization.rst | 2 +- ..._training_quantization_mixed_precision.rst | 10 -------- .../api/api_docs/methods/pytorch_kpi_data.rst | 2 +- .../pytorch_post_training_quantization.rst | 2 +- ..._training_quantization_mixed_precision.rst | 10 -------- .../mixed_precision_quantization_config.rst | 4 ++-- .../classes/FolderImageLoader.rst | 2 +- .../classes/FrameworkInfo.rst | 4 ++-- .../classes/GradientPTQConfig.rst | 9 ------- .../keras_data_generation_experimental.rst | 4 ++-- ...ost_training_quantization_experimental.rst | 4 ++-- .../methods/keras_kpi_data_experimental.rst | 4 ++-- ...ost_training_quantization_experimental.rst | 4 ++-- ...s_quantization_aware_training_finalize.rst | 4 ++-- ...keras_quantization_aware_training_init.rst | 4 ++-- ...ost_training_quantization_experimental.rst | 4 ++-- .../methods/pytorch_kpi_data_experimental.rst | 4 ++-- ...ost_training_quantization_experimental.rst | 4 ++-- ...h_quantization_aware_training_finalize.rst | 4 ++-- ...torch_quantization_aware_training_init.rst | 4 ++-- .../modules/debug_config.rst | 2 +- .../mixed_precision_quantization_config.rst | 4 ++-- .../modules/network_editor.rst | 24 +++++++++---------- .../modules/quantization_config.rst | 4 ++-- 25 files changed, 48 insertions(+), 77 deletions(-) delete mode 100644 docsrc/source/api/api_docs/methods/keras_post_training_quantization_mixed_precision.rst delete mode 100644 docsrc/source/api/api_docs/methods/pytorch_post_training_quantization_mixed_precision.rst diff --git a/docsrc/source/api/api_docs/methods/keras_kpi_data.rst b/docsrc/source/api/api_docs/methods/keras_kpi_data.rst index d1af078dc..a0205814f 100644 --- a/docsrc/source/api/api_docs/methods/keras_kpi_data.rst +++ b/docsrc/source/api/api_docs/methods/keras_kpi_data.rst @@ -7,4 +7,4 @@ Get KPI information for Keras Models ======================================= -.. autofunction:: model_compression_toolkit.keras_kpi_data +.. autofunction:: model_compression_toolkit.core.keras_kpi_data diff --git a/docsrc/source/api/api_docs/methods/keras_post_training_quantization.rst b/docsrc/source/api/api_docs/methods/keras_post_training_quantization.rst index 9a14b4510..82a351716 100644 --- a/docsrc/source/api/api_docs/methods/keras_post_training_quantization.rst +++ b/docsrc/source/api/api_docs/methods/keras_post_training_quantization.rst @@ -7,4 +7,4 @@ Keras Post Training Quantization ================================= -.. autofunction:: model_compression_toolkit.keras_post_training_quantization +.. autofunction:: model_compression_toolkit.ptq.keras_post_training_quantization diff --git a/docsrc/source/api/api_docs/methods/keras_post_training_quantization_mixed_precision.rst b/docsrc/source/api/api_docs/methods/keras_post_training_quantization_mixed_precision.rst deleted file mode 100644 index 650cc4c70..000000000 --- a/docsrc/source/api/api_docs/methods/keras_post_training_quantization_mixed_precision.rst +++ /dev/null @@ -1,10 +0,0 @@ -:orphan: - -.. _ug-keras_post_training_quantization_mixed_precision: - - -================================================================== -Keras Post Training Mixed Precision Quantization -================================================================== - -.. autofunction:: model_compression_toolkit.keras_post_training_quantization_mixed_precision diff --git a/docsrc/source/api/api_docs/methods/pytorch_kpi_data.rst b/docsrc/source/api/api_docs/methods/pytorch_kpi_data.rst index be83da8d2..86f512eae 100644 --- a/docsrc/source/api/api_docs/methods/pytorch_kpi_data.rst +++ b/docsrc/source/api/api_docs/methods/pytorch_kpi_data.rst @@ -7,4 +7,4 @@ Get KPI information for PyTorch Models ======================================= -.. autofunction:: model_compression_toolkit.pytorch_kpi_data +.. autofunction:: model_compression_toolkit.core.pytorch_kpi_data diff --git a/docsrc/source/api/api_docs/methods/pytorch_post_training_quantization.rst b/docsrc/source/api/api_docs/methods/pytorch_post_training_quantization.rst index e3ddfb201..46feca7ba 100644 --- a/docsrc/source/api/api_docs/methods/pytorch_post_training_quantization.rst +++ b/docsrc/source/api/api_docs/methods/pytorch_post_training_quantization.rst @@ -7,4 +7,4 @@ Pytorch Post Training Quantization ===================================== -.. autofunction:: model_compression_toolkit.pytorch_post_training_quantization +.. autofunction:: model_compression_toolkit.ptq.pytorch_post_training_quantization diff --git a/docsrc/source/api/api_docs/methods/pytorch_post_training_quantization_mixed_precision.rst b/docsrc/source/api/api_docs/methods/pytorch_post_training_quantization_mixed_precision.rst deleted file mode 100644 index 72d318522..000000000 --- a/docsrc/source/api/api_docs/methods/pytorch_post_training_quantization_mixed_precision.rst +++ /dev/null @@ -1,10 +0,0 @@ -:orphan: - -.. _ug-pytorch_post_training_quantization_mixed_precision: - - -================================================================== -PyTorch Post Training Mixed Precision Quantization -================================================================== - -.. autofunction:: model_compression_toolkit.pytorch_post_training_quantization_mixed_precision diff --git a/docsrc/source/api/api_docs/modules/mixed_precision_quantization_config.rst b/docsrc/source/api/api_docs/modules/mixed_precision_quantization_config.rst index e48b66381..4745b0b69 100644 --- a/docsrc/source/api/api_docs/modules/mixed_precision_quantization_config.rst +++ b/docsrc/source/api/api_docs/modules/mixed_precision_quantization_config.rst @@ -12,7 +12,7 @@ KPI ================================ **Object to configure resources to use when searching for a mixed-precision configuration for a model:** -.. autoclass:: model_compression_toolkit.KPI +.. autoclass:: model_compression_toolkit.core.KPI :noindex: @@ -23,5 +23,5 @@ MixedPrecisionQuantizationConfig ================================= **Class to configure the quantization process of the model when quantizing in mixed-precision:** -.. autoclass:: model_compression_toolkit.MixedPrecisionQuantizationConfig +.. autoclass:: model_compression_toolkit.core.MixedPrecisionQuantizationConfig diff --git a/docsrc/source/api/experimental_api_docs/classes/FolderImageLoader.rst b/docsrc/source/api/experimental_api_docs/classes/FolderImageLoader.rst index e9fda9d43..6897e353f 100644 --- a/docsrc/source/api/experimental_api_docs/classes/FolderImageLoader.rst +++ b/docsrc/source/api/experimental_api_docs/classes/FolderImageLoader.rst @@ -8,7 +8,7 @@ Folder Image Loader API **The following API can be used to load a folder of images to create a representative dataset for PTQ calibration** -.. autoclass:: model_compression_toolkit.FolderImageLoader +.. autoclass:: model_compression_toolkit.core.FolderImageLoader :members: diff --git a/docsrc/source/api/experimental_api_docs/classes/FrameworkInfo.rst b/docsrc/source/api/experimental_api_docs/classes/FrameworkInfo.rst index 8aa3bcd52..2338a7dd4 100644 --- a/docsrc/source/api/experimental_api_docs/classes/FrameworkInfo.rst +++ b/docsrc/source/api/experimental_api_docs/classes/FrameworkInfo.rst @@ -9,7 +9,7 @@ FrameworkInfo Class **The following API can be used to pass MCT framework-related information to use when optimizing the network** -.. autoclass:: model_compression_toolkit.FrameworkInfo +.. autoclass:: model_compression_toolkit.core.FrameworkInfo | @@ -19,5 +19,5 @@ ChannelAxis ========================== **Enum to select the output channels format in the model:** -.. autoclass:: model_compression_toolkit.ChannelAxis +.. autoclass:: model_compression_toolkit.core.ChannelAxis diff --git a/docsrc/source/api/experimental_api_docs/classes/GradientPTQConfig.rst b/docsrc/source/api/experimental_api_docs/classes/GradientPTQConfig.rst index caea133e0..11a79e1c8 100644 --- a/docsrc/source/api/experimental_api_docs/classes/GradientPTQConfig.rst +++ b/docsrc/source/api/experimental_api_docs/classes/GradientPTQConfig.rst @@ -2,15 +2,6 @@ .. _ug-GradientPTQConfig: -================================= -GradientPTQConfigV2 Class -================================= - -**The following API can be used to create a GradientPTQConfigV2 instance which can be used for post training quantization using knowledge distillation from a teacher (float model) to a student (the quantized model). This is experimental and subject to future changes.** - -.. autoclass:: model_compression_toolkit.gptq.GradientPTQConfigV2 - :members: - ================================= GradientPTQConfig Class diff --git a/docsrc/source/api/experimental_api_docs/methods/keras_data_generation_experimental.rst b/docsrc/source/api/experimental_api_docs/methods/keras_data_generation_experimental.rst index 20865d616..66dfa8dcf 100644 --- a/docsrc/source/api/experimental_api_docs/methods/keras_data_generation_experimental.rst +++ b/docsrc/source/api/experimental_api_docs/methods/keras_data_generation_experimental.rst @@ -1,10 +1,10 @@ :orphan: -.. _ug-tensorflow_data_generation_experimental: +.. _ug-keras_data_generation_experimental: ===================================== Keras Data Generation ===================================== -.. autofunction:: model_compression_toolkit.data_generation.tensorflow_data_generation_experimental +.. autofunction:: model_compression_toolkit.data_generation.keras_data_generation_experimental diff --git a/docsrc/source/api/experimental_api_docs/methods/keras_gradient_post_training_quantization_experimental.rst b/docsrc/source/api/experimental_api_docs/methods/keras_gradient_post_training_quantization_experimental.rst index fa99048f1..afd2eabf6 100644 --- a/docsrc/source/api/experimental_api_docs/methods/keras_gradient_post_training_quantization_experimental.rst +++ b/docsrc/source/api/experimental_api_docs/methods/keras_gradient_post_training_quantization_experimental.rst @@ -1,10 +1,10 @@ :orphan: -.. _ug-keras_gradient_post_training_quantization_experimental: +.. _ug-keras_gradient_post_training_quantization: ================================================ Keras Gradient Based Post Training Quantization ================================================ -.. autofunction:: model_compression_toolkit.gptq.keras_gradient_post_training_quantization_experimental +.. autofunction:: model_compression_toolkit.gptq.keras_gradient_post_training_quantization diff --git a/docsrc/source/api/experimental_api_docs/methods/keras_kpi_data_experimental.rst b/docsrc/source/api/experimental_api_docs/methods/keras_kpi_data_experimental.rst index 608808bca..a0205814f 100644 --- a/docsrc/source/api/experimental_api_docs/methods/keras_kpi_data_experimental.rst +++ b/docsrc/source/api/experimental_api_docs/methods/keras_kpi_data_experimental.rst @@ -1,10 +1,10 @@ :orphan: -.. _ug-keras_kpi_data_experimental: +.. _ug-keras_kpi_data: ======================================= Get KPI information for Keras Models ======================================= -.. autofunction:: model_compression_toolkit.core.keras_kpi_data_experimental +.. autofunction:: model_compression_toolkit.core.keras_kpi_data diff --git a/docsrc/source/api/experimental_api_docs/methods/keras_post_training_quantization_experimental.rst b/docsrc/source/api/experimental_api_docs/methods/keras_post_training_quantization_experimental.rst index ca1e912ee..82a351716 100644 --- a/docsrc/source/api/experimental_api_docs/methods/keras_post_training_quantization_experimental.rst +++ b/docsrc/source/api/experimental_api_docs/methods/keras_post_training_quantization_experimental.rst @@ -1,10 +1,10 @@ :orphan: -.. _ug-keras_post_training_quantization_experimental: +.. _ug-keras_post_training_quantization: ================================= Keras Post Training Quantization ================================= -.. autofunction:: model_compression_toolkit.ptq.keras_post_training_quantization_experimental +.. autofunction:: model_compression_toolkit.ptq.keras_post_training_quantization diff --git a/docsrc/source/api/experimental_api_docs/methods/keras_quantization_aware_training_finalize.rst b/docsrc/source/api/experimental_api_docs/methods/keras_quantization_aware_training_finalize.rst index 1e2bac84a..54ff74a3f 100644 --- a/docsrc/source/api/experimental_api_docs/methods/keras_quantization_aware_training_finalize.rst +++ b/docsrc/source/api/experimental_api_docs/methods/keras_quantization_aware_training_finalize.rst @@ -1,10 +1,10 @@ :orphan: -.. _ug-keras_quantization_aware_training_finalize: +.. _ug-keras_quantization_aware_training_finalize_experimental: ================================================ Keras Quantization Aware Training Model Finalize ================================================ -.. autofunction:: model_compression_toolkit.qat.keras_quantization_aware_training_finalize +.. autofunction:: model_compression_toolkit.qat.keras_quantization_aware_training_finalize_experimental diff --git a/docsrc/source/api/experimental_api_docs/methods/keras_quantization_aware_training_init.rst b/docsrc/source/api/experimental_api_docs/methods/keras_quantization_aware_training_init.rst index f055e1621..b89c3367e 100644 --- a/docsrc/source/api/experimental_api_docs/methods/keras_quantization_aware_training_init.rst +++ b/docsrc/source/api/experimental_api_docs/methods/keras_quantization_aware_training_init.rst @@ -1,10 +1,10 @@ :orphan: -.. _ug-keras_quantization_aware_training_init: +.. _ug-keras_quantization_aware_training_init_experimental: ================================================ Keras Quantization Aware Training Model Init ================================================ -.. autofunction:: model_compression_toolkit.qat.keras_quantization_aware_training_init +.. autofunction:: model_compression_toolkit.qat.keras_quantization_aware_training_init_experimental diff --git a/docsrc/source/api/experimental_api_docs/methods/pytorch_gradient_post_training_quantization_experimental.rst b/docsrc/source/api/experimental_api_docs/methods/pytorch_gradient_post_training_quantization_experimental.rst index 4c30d210b..95dcf10f0 100644 --- a/docsrc/source/api/experimental_api_docs/methods/pytorch_gradient_post_training_quantization_experimental.rst +++ b/docsrc/source/api/experimental_api_docs/methods/pytorch_gradient_post_training_quantization_experimental.rst @@ -1,10 +1,10 @@ :orphan: -.. _ug-pytorch_gradient_post_training_quantization_experimental: +.. _ug-pytorch_gradient_post_training_quantization: ==================================================== Pytorch Gradient Based Post Training Quantization ==================================================== -.. autofunction:: model_compression_toolkit.gptq.pytorch_gradient_post_training_quantization_experimental +.. autofunction:: model_compression_toolkit.gptq.pytorch_gradient_post_training_quantization diff --git a/docsrc/source/api/experimental_api_docs/methods/pytorch_kpi_data_experimental.rst b/docsrc/source/api/experimental_api_docs/methods/pytorch_kpi_data_experimental.rst index 155a7e1c0..86f512eae 100644 --- a/docsrc/source/api/experimental_api_docs/methods/pytorch_kpi_data_experimental.rst +++ b/docsrc/source/api/experimental_api_docs/methods/pytorch_kpi_data_experimental.rst @@ -1,10 +1,10 @@ :orphan: -.. _ug-pytorch_kpi_data_experimental: +.. _ug-pytorch_kpi_data: ======================================= Get KPI information for PyTorch Models ======================================= -.. autofunction:: model_compression_toolkit.core.pytorch_kpi_data_experimental +.. autofunction:: model_compression_toolkit.core.pytorch_kpi_data diff --git a/docsrc/source/api/experimental_api_docs/methods/pytorch_post_training_quantization_experimental.rst b/docsrc/source/api/experimental_api_docs/methods/pytorch_post_training_quantization_experimental.rst index 48fcdfa6f..46feca7ba 100644 --- a/docsrc/source/api/experimental_api_docs/methods/pytorch_post_training_quantization_experimental.rst +++ b/docsrc/source/api/experimental_api_docs/methods/pytorch_post_training_quantization_experimental.rst @@ -1,10 +1,10 @@ :orphan: -.. _ug-pytorch_post_training_quantization_experimental: +.. _ug-pytorch_post_training_quantization: ===================================== Pytorch Post Training Quantization ===================================== -.. autofunction:: model_compression_toolkit.ptq.pytorch_post_training_quantization_experimental +.. autofunction:: model_compression_toolkit.ptq.pytorch_post_training_quantization diff --git a/docsrc/source/api/experimental_api_docs/methods/pytorch_quantization_aware_training_finalize.rst b/docsrc/source/api/experimental_api_docs/methods/pytorch_quantization_aware_training_finalize.rst index b1e5dcd8c..e391be8d7 100644 --- a/docsrc/source/api/experimental_api_docs/methods/pytorch_quantization_aware_training_finalize.rst +++ b/docsrc/source/api/experimental_api_docs/methods/pytorch_quantization_aware_training_finalize.rst @@ -1,10 +1,10 @@ :orphan: -.. _ug-pytorch_quantization_aware_training_finalize: +.. _ug-pytorch_quantization_aware_training_finalize_experimental: =================================================== PyTorch Quantization Aware Training Model Finalize =================================================== -.. autofunction:: model_compression_toolkit.qat.pytorch_quantization_aware_training_finalize +.. autofunction:: model_compression_toolkit.qat.pytorch_quantization_aware_training_finalize_experimental diff --git a/docsrc/source/api/experimental_api_docs/methods/pytorch_quantization_aware_training_init.rst b/docsrc/source/api/experimental_api_docs/methods/pytorch_quantization_aware_training_init.rst index 899379d6b..4e14c7478 100644 --- a/docsrc/source/api/experimental_api_docs/methods/pytorch_quantization_aware_training_init.rst +++ b/docsrc/source/api/experimental_api_docs/methods/pytorch_quantization_aware_training_init.rst @@ -1,10 +1,10 @@ :orphan: -.. _ug-pytorch_quantization_aware_training_init: +.. _ug-pytorch_quantization_aware_training_init_experimental: ================================================ PyTorch Quantization Aware Training Model Init ================================================ -.. autofunction:: model_compression_toolkit.qat.pytorch_quantization_aware_training_init +.. autofunction:: model_compression_toolkit.qat.pytorch_quantization_aware_training_init_experimental diff --git a/docsrc/source/api/experimental_api_docs/modules/debug_config.rst b/docsrc/source/api/experimental_api_docs/modules/debug_config.rst index f1da2735b..16654f433 100644 --- a/docsrc/source/api/experimental_api_docs/modules/debug_config.rst +++ b/docsrc/source/api/experimental_api_docs/modules/debug_config.rst @@ -12,6 +12,6 @@ DebugConfig ========================== **Class to configure params for debugging purposes:** -.. autoclass:: model_compression_toolkit.DebugConfig +.. autoclass:: model_compression_toolkit.core.DebugConfig diff --git a/docsrc/source/api/experimental_api_docs/modules/mixed_precision_quantization_config.rst b/docsrc/source/api/experimental_api_docs/modules/mixed_precision_quantization_config.rst index 996479716..f3d6aba8c 100644 --- a/docsrc/source/api/experimental_api_docs/modules/mixed_precision_quantization_config.rst +++ b/docsrc/source/api/experimental_api_docs/modules/mixed_precision_quantization_config.rst @@ -12,7 +12,7 @@ KPI ================================ **Object to configure resources to use when searching for a mixed-precision configuration for a model:** -.. autoclass:: model_compression_toolkit.KPI +.. autoclass:: model_compression_toolkit.core.KPI | @@ -22,5 +22,5 @@ MixedPrecisionQuantizationConfigV2 =================================== **Class to configure the quantization process of the model when quantizing in mixed-precision:** -.. autoclass:: model_compression_toolkit.core.MixedPrecisionQuantizationConfigV2 +.. autoclass:: model_compression_toolkit.core.MixedPrecisionQuantizationConfig diff --git a/docsrc/source/api/experimental_api_docs/modules/network_editor.rst b/docsrc/source/api/experimental_api_docs/modules/network_editor.rst index a97594e39..8df62a390 100644 --- a/docsrc/source/api/experimental_api_docs/modules/network_editor.rst +++ b/docsrc/source/api/experimental_api_docs/modules/network_editor.rst @@ -11,52 +11,52 @@ network_editor Module EditRule ========== -.. autoclass:: model_compression_toolkit.network_editor.EditRule +.. autoclass:: model_compression_toolkit.core.network_editor.EditRule Filters ========== -.. autoclass:: model_compression_toolkit.network_editor.NodeTypeFilter +.. autoclass:: model_compression_toolkit.core.network_editor.NodeTypeFilter | -.. autoclass:: model_compression_toolkit.network_editor.NodeNameFilter +.. autoclass:: model_compression_toolkit.core.network_editor.NodeNameFilter | -.. autoclass:: model_compression_toolkit.network_editor.NodeNameScopeFilter +.. autoclass:: model_compression_toolkit.core.network_editor.NodeNameScopeFilter Actions ========== -.. autoclass:: model_compression_toolkit.network_editor.ChangeFinalWeightsQuantConfigAttr +.. autoclass:: model_compression_toolkit.core.network_editor.ChangeFinalWeightsQuantConfigAttr | -.. autoclass:: model_compression_toolkit.network_editor.ChangeCandidatesWeightsQuantConfigAttr +.. autoclass:: model_compression_toolkit.core.network_editor.ChangeCandidatesWeightsQuantConfigAttr | -.. autoclass:: model_compression_toolkit.network_editor.ChangeFinalActivationQuantConfigAttr +.. autoclass:: model_compression_toolkit.core.network_editor.ChangeFinalActivationQuantConfigAttr | -.. autoclass:: model_compression_toolkit.network_editor.ChangeCandidatesActivationQuantConfigAttr +.. autoclass:: model_compression_toolkit.core.network_editor.ChangeCandidatesActivationQuantConfigAttr | -.. autoclass:: model_compression_toolkit.network_editor.ChangeQuantizationParamFunction +.. autoclass:: model_compression_toolkit.core.network_editor.ChangeQuantizationParamFunction | -.. autoclass:: model_compression_toolkit.network_editor.ChangeFinalWeightsQuantizationMethod +.. autoclass:: model_compression_toolkit.core.network_editor.ChangeFinalWeightsQuantizationMethod | -.. autoclass:: model_compression_toolkit.network_editor.ChangeCandidatesWeightsQuantizationMethod +.. autoclass:: model_compression_toolkit.core.network_editor.ChangeCandidatesWeightsQuantizationMethod | -.. autoclass:: model_compression_toolkit.network_editor.ChangeCandidatesActivationQuantizationMethod +.. autoclass:: model_compression_toolkit.core.network_editor.ChangeCandidatesActivationQuantizationMethod diff --git a/docsrc/source/api/experimental_api_docs/modules/quantization_config.rst b/docsrc/source/api/experimental_api_docs/modules/quantization_config.rst index 68b9599eb..7997f6f1b 100644 --- a/docsrc/source/api/experimental_api_docs/modules/quantization_config.rst +++ b/docsrc/source/api/experimental_api_docs/modules/quantization_config.rst @@ -11,7 +11,7 @@ QuantizationErrorMethod ========================== **Enum to select a method for quantization parameters' selection:** -.. autoclass:: model_compression_toolkit.QuantizationErrorMethod +.. autoclass:: model_compression_toolkit.core.QuantizationErrorMethod | @@ -21,5 +21,5 @@ QuantizationConfig ========================== **Class to configure the quantization process of the model:** -.. autoclass:: model_compression_toolkit.QuantizationConfig +.. autoclass:: model_compression_toolkit.core.QuantizationConfig From 0e242ac2948c6078df1311a239031a57173657e4 Mon Sep 17 00:00:00 2001 From: reuvenp Date: Thu, 7 Mar 2024 17:52:05 +0200 Subject: [PATCH 3/9] update docsrc --- .../classes/DefaultDict.rst | 0 .../classes/FolderImageLoader.rst | 0 .../classes/FrameworkInfo.rst | 0 .../classes/GradientPTQConfig.rst | 0 .../api/api_docs/classes/PruningConfig.rst | 9 +++ .../api/api_docs/classes/PruningInfo.rst | 10 +++ docsrc/source/api/api_docs/index.rst | 54 ++++++++++---- .../get_keras_data_generation_config.rst | 0 .../methods/get_keras_gptq_config.rst | 0 .../get_pytorch_data_generation_config.rst | 0 .../methods/get_pytroch_gptq_config.rst | 0 .../get_target_platform_capabilities.rst | 0 .../keras_data_generation_experimental.rst | 0 ...ost_training_quantization_experimental.rst | 0 .../api/api_docs/methods/keras_kpi_data.rst | 10 --- .../methods/keras_kpi_data_experimental.rst | 0 .../methods/keras_load_quantizad_model.rst | 0 .../keras_post_training_quantization.rst | 10 --- ...ost_training_quantization_experimental.rst | 0 .../methods/keras_pruning_experimental.rst | 12 ++++ ...s_quantization_aware_training_finalize.rst | 0 ...keras_quantization_aware_training_init.rst | 0 .../pytorch_data_generation_experimental.rst | 0 ...ost_training_quantization_experimental.rst | 0 .../api/api_docs/methods/pytorch_kpi_data.rst | 10 --- .../methods/pytorch_kpi_data_experimental.rst | 0 .../pytorch_post_training_quantization.rst | 10 --- ...ost_training_quantization_experimental.rst | 0 .../methods/pytorch_pruning_experimental.rst | 15 ++++ ...h_quantization_aware_training_finalize.rst | 0 ...torch_quantization_aware_training_init.rst | 0 .../methods/set_logger_path.rst | 0 .../modules/core_config.rst | 0 .../modules/debug_config.rst | 0 .../modules/exporter.rst | 0 .../modules/layer_filters.rst | 0 .../mixed_precision_quantization_config.rst | 11 ++- .../modules/network_editor.rst | 0 .../modules/qat_config.rst | 0 .../modules/quantization_config.rst | 0 .../modules/target_platform.rst | 0 .../modules/trainable_infrastructure.rst | 0 .../notes/experimental_api_note.rst | 0 .../notes/tpc_note.rst | 0 .../api/experimental_api_docs/index.rst | 70 ------------------- .../methods/keras_pruning_experimental.rst | 25 ------- .../methods/pytorch_pruning_experimental.rst | 25 ------- .../mixed_precision_quantization_config.rst | 26 ------- docsrc/source/index.rst | 26 +++---- 49 files changed, 105 insertions(+), 218 deletions(-) rename docsrc/source/api/{experimental_api_docs => api_docs}/classes/DefaultDict.rst (100%) rename docsrc/source/api/{experimental_api_docs => api_docs}/classes/FolderImageLoader.rst (100%) rename docsrc/source/api/{experimental_api_docs => api_docs}/classes/FrameworkInfo.rst (100%) rename docsrc/source/api/{experimental_api_docs => api_docs}/classes/GradientPTQConfig.rst (100%) create mode 100644 docsrc/source/api/api_docs/classes/PruningConfig.rst create mode 100644 docsrc/source/api/api_docs/classes/PruningInfo.rst rename docsrc/source/api/{experimental_api_docs => api_docs}/methods/get_keras_data_generation_config.rst (100%) rename docsrc/source/api/{experimental_api_docs => api_docs}/methods/get_keras_gptq_config.rst (100%) rename docsrc/source/api/{experimental_api_docs => api_docs}/methods/get_pytorch_data_generation_config.rst (100%) rename docsrc/source/api/{experimental_api_docs => api_docs}/methods/get_pytroch_gptq_config.rst (100%) rename docsrc/source/api/{experimental_api_docs => api_docs}/methods/get_target_platform_capabilities.rst (100%) rename docsrc/source/api/{experimental_api_docs => api_docs}/methods/keras_data_generation_experimental.rst (100%) rename docsrc/source/api/{experimental_api_docs => api_docs}/methods/keras_gradient_post_training_quantization_experimental.rst (100%) delete mode 100644 docsrc/source/api/api_docs/methods/keras_kpi_data.rst rename docsrc/source/api/{experimental_api_docs => api_docs}/methods/keras_kpi_data_experimental.rst (100%) rename docsrc/source/api/{experimental_api_docs => api_docs}/methods/keras_load_quantizad_model.rst (100%) delete mode 100644 docsrc/source/api/api_docs/methods/keras_post_training_quantization.rst rename docsrc/source/api/{experimental_api_docs => api_docs}/methods/keras_post_training_quantization_experimental.rst (100%) create mode 100644 docsrc/source/api/api_docs/methods/keras_pruning_experimental.rst rename docsrc/source/api/{experimental_api_docs => api_docs}/methods/keras_quantization_aware_training_finalize.rst (100%) rename docsrc/source/api/{experimental_api_docs => api_docs}/methods/keras_quantization_aware_training_init.rst (100%) rename docsrc/source/api/{experimental_api_docs => api_docs}/methods/pytorch_data_generation_experimental.rst (100%) rename docsrc/source/api/{experimental_api_docs => api_docs}/methods/pytorch_gradient_post_training_quantization_experimental.rst (100%) delete mode 100644 docsrc/source/api/api_docs/methods/pytorch_kpi_data.rst rename docsrc/source/api/{experimental_api_docs => api_docs}/methods/pytorch_kpi_data_experimental.rst (100%) delete mode 100644 docsrc/source/api/api_docs/methods/pytorch_post_training_quantization.rst rename docsrc/source/api/{experimental_api_docs => api_docs}/methods/pytorch_post_training_quantization_experimental.rst (100%) create mode 100644 docsrc/source/api/api_docs/methods/pytorch_pruning_experimental.rst rename docsrc/source/api/{experimental_api_docs => api_docs}/methods/pytorch_quantization_aware_training_finalize.rst (100%) rename docsrc/source/api/{experimental_api_docs => api_docs}/methods/pytorch_quantization_aware_training_init.rst (100%) rename docsrc/source/api/{experimental_api_docs => api_docs}/methods/set_logger_path.rst (100%) rename docsrc/source/api/{experimental_api_docs => api_docs}/modules/core_config.rst (100%) rename docsrc/source/api/{experimental_api_docs => api_docs}/modules/debug_config.rst (100%) rename docsrc/source/api/{experimental_api_docs => api_docs}/modules/exporter.rst (100%) rename docsrc/source/api/{experimental_api_docs => api_docs}/modules/layer_filters.rst (100%) rename docsrc/source/api/{experimental_api_docs => api_docs}/modules/network_editor.rst (100%) rename docsrc/source/api/{experimental_api_docs => api_docs}/modules/qat_config.rst (100%) rename docsrc/source/api/{experimental_api_docs => api_docs}/modules/quantization_config.rst (100%) rename docsrc/source/api/{experimental_api_docs => api_docs}/modules/target_platform.rst (100%) rename docsrc/source/api/{experimental_api_docs => api_docs}/modules/trainable_infrastructure.rst (100%) rename docsrc/source/api/{experimental_api_docs => api_docs}/notes/experimental_api_note.rst (100%) rename docsrc/source/api/{experimental_api_docs => api_docs}/notes/tpc_note.rst (100%) delete mode 100644 docsrc/source/api/experimental_api_docs/index.rst delete mode 100644 docsrc/source/api/experimental_api_docs/methods/keras_pruning_experimental.rst delete mode 100644 docsrc/source/api/experimental_api_docs/methods/pytorch_pruning_experimental.rst delete mode 100644 docsrc/source/api/experimental_api_docs/modules/mixed_precision_quantization_config.rst diff --git a/docsrc/source/api/experimental_api_docs/classes/DefaultDict.rst b/docsrc/source/api/api_docs/classes/DefaultDict.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/classes/DefaultDict.rst rename to docsrc/source/api/api_docs/classes/DefaultDict.rst diff --git a/docsrc/source/api/experimental_api_docs/classes/FolderImageLoader.rst b/docsrc/source/api/api_docs/classes/FolderImageLoader.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/classes/FolderImageLoader.rst rename to docsrc/source/api/api_docs/classes/FolderImageLoader.rst diff --git a/docsrc/source/api/experimental_api_docs/classes/FrameworkInfo.rst b/docsrc/source/api/api_docs/classes/FrameworkInfo.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/classes/FrameworkInfo.rst rename to docsrc/source/api/api_docs/classes/FrameworkInfo.rst diff --git a/docsrc/source/api/experimental_api_docs/classes/GradientPTQConfig.rst b/docsrc/source/api/api_docs/classes/GradientPTQConfig.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/classes/GradientPTQConfig.rst rename to docsrc/source/api/api_docs/classes/GradientPTQConfig.rst diff --git a/docsrc/source/api/api_docs/classes/PruningConfig.rst b/docsrc/source/api/api_docs/classes/PruningConfig.rst new file mode 100644 index 000000000..0b57b743a --- /dev/null +++ b/docsrc/source/api/api_docs/classes/PruningConfig.rst @@ -0,0 +1,9 @@ +:orphan: + +.. _ug-PruningConfig: + +================================================ +Pruning Configuration +================================================ + +.. autofunction:: model_compression_toolkit.pruning.PruningConfig \ No newline at end of file diff --git a/docsrc/source/api/api_docs/classes/PruningInfo.rst b/docsrc/source/api/api_docs/classes/PruningInfo.rst new file mode 100644 index 000000000..7bd02336c --- /dev/null +++ b/docsrc/source/api/api_docs/classes/PruningInfo.rst @@ -0,0 +1,10 @@ +:orphan: + +.. _ug-PruningInfo: + +================================================ +Pruning Information +================================================ + +.. autofunction:: model_compression_toolkit.pruning.PruningInfo + diff --git a/docsrc/source/api/api_docs/index.rst b/docsrc/source/api/api_docs/index.rst index 28f078466..072f21f1e 100644 --- a/docsrc/source/api/api_docs/index.rst +++ b/docsrc/source/api/api_docs/index.rst @@ -7,8 +7,6 @@ API Docs ========= -.. note:: This API will be removed in future releases. Please switch to the :ref:`new API` - **Init module for MCT API.** .. code-block:: python @@ -17,30 +15,60 @@ API Docs | +.. include:: ./notes/experimental_api_note.rst + +| + + Functions ========= -- :ref:`pytorch_post_training_quantization`: Function to use for post training quantization of Pytorch models. -- :ref:`pytorch_post_training_quantization_mixed_precision`: Function to use for mixed-precision post training quantization of Pytorch models (experimental). -- :ref:`keras_post_training_quantization`: Function to use for post training quantization of Keras models. -- :ref:`keras_post_training_quantization_mixed_precision`: Function to use for mixed-precision post training quantization of Keras models (experimental). -- :ref:`get_keras_gptq_config`: Function to create a GradientPTQConfig instance to use for Keras models when using GPTQ (experimental). -- :ref:`get_target_platform_capabilities`: Function to get a target platform model for Tensorflow and Pytorch. -- :ref:`keras_kpi_data`: Function to compute KPI data that can be used to calculate the desired target KPI for mixed-precision quantization of Keras models. -- :ref:`pytorch_kpi_data`: Function to compute KPI data that can be used to calculate the desired target KPI for mixed-precision quantization of PyTorch models. +- :ref:`pytorch_post_training_quantization`: A function to use for post training quantization of PyTorch models. +- :ref:`keras_post_training_quantization`: A function to use for post training quantization of Keras models. + +- :ref:`keras_gradient_post_training_quantization`: A function to use for gradient-based post training quantization of Keras models. +- :ref:`get_keras_gptq_config`: A function to create a GradientPTQConfig instance to use for Keras models when using GPTQ. + +- :ref:`pytorch_gradient_post_training_quantization`: A function to use for gradient-based post training quantization of Pytorch models. +- :ref:`get_pytorch_gptq_config`: A function to create a GradientPTQConfig instance to use for Pytorch models when using GPTQ. + +- :ref:`keras_quantization_aware_training_init`: A function to use for preparing a model for Quantization Aware Training (Experimental) +- :ref:`keras_quantization_aware_training_finalize`: A function to finalize a model after Quantization Aware Training to a model without QuantizeWrappers (Experimental) + +- :ref:`keras_data_generation_experimental`: A function to generate data for a Keras model (experimental). +- :ref:`get_keras_data_generation_config`: A function to generate a DataGenerationConfig for Tensorflow data generation(experimental). + +- :ref:`pytorch_data_generation_experimental`: A function to generate data for a Pytorch model (experimental). +- :ref:`get_pytorch_data_generation_config`: A function to load a DataGenerationConfig for Pytorch data generation (experimental). + +- :ref:`keras_pruning_experimental`: A function to apply structured pruning for Keras models (experimental). +- :ref:`pytorch_pruning_experimental`: A function to apply structured pruning for Pytorch models (experimental). + +- :ref:`keras_kpi_data`: A function to compute KPI data that can be used to calculate the desired target KPI for mixed-precision quantization of Keras models. +- :ref:`pytorch_kpi_data`: A function to compute KPI data that can be used to calculate the desired target KPI for mixed-precision quantization of PyTorch models. + +- :ref:`get_target_platform_capabilities`: A function to get a target platform model for Tensorflow and Pytorch. +- :ref:`keras_load_quantized_model`: A function to load a quantized keras model. Modules ========= +- :ref:`core_config`: Module to contain configurations of the optimization process. - :ref:`quantization_config`: Module to configure the quantization process. -- :ref:`mixed_precision_quantization_config`: Module to configure the quantization process when using mixed-precision PTQ. -- :ref:`network_editor`: Module to edit your model during the quantization process. +- :ref:`mixed_precision_quantization_config`: Module to configure the quantization process when using mixed-precision PTQ. +- :ref:`debug_config`: Module to configure options for debugging the optimization process. - :ref:`target_platform`: Module to create and model hardware-related settings to optimize the model according to, by the hardware the optimized model will use during inference. +- :ref:`qat_config`: Module to create quantization configuration for Quantization-aware Training. +- :ref:`exporter`: Module that enables to export a quantized model in different serialization formats. +- :ref:`trainable_infrastructure`: Module that contains quantization abstraction and quantizers for hardware-oriented model optimization tools. Classes ========= -- :ref:`GradientPTQConfig`: Class to configure GradientPTQC options for gradient based post training quantization. +- :ref:`GradientPTQConfig`: Class to configure GradientPTQ options for gradient based post training quantization. - :ref:`FolderImageLoader`: Class to use an images directory as a representative dataset. - :ref:`FrameworkInfo`: Class to wrap framework information to be used by MCT when optimizing models. +- :ref:`PruningConfig`: PruningConfig +- :ref:`PruningInfo`: PruningInfo + Indices and tables ================== diff --git a/docsrc/source/api/experimental_api_docs/methods/get_keras_data_generation_config.rst b/docsrc/source/api/api_docs/methods/get_keras_data_generation_config.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/methods/get_keras_data_generation_config.rst rename to docsrc/source/api/api_docs/methods/get_keras_data_generation_config.rst diff --git a/docsrc/source/api/experimental_api_docs/methods/get_keras_gptq_config.rst b/docsrc/source/api/api_docs/methods/get_keras_gptq_config.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/methods/get_keras_gptq_config.rst rename to docsrc/source/api/api_docs/methods/get_keras_gptq_config.rst diff --git a/docsrc/source/api/experimental_api_docs/methods/get_pytorch_data_generation_config.rst b/docsrc/source/api/api_docs/methods/get_pytorch_data_generation_config.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/methods/get_pytorch_data_generation_config.rst rename to docsrc/source/api/api_docs/methods/get_pytorch_data_generation_config.rst diff --git a/docsrc/source/api/experimental_api_docs/methods/get_pytroch_gptq_config.rst b/docsrc/source/api/api_docs/methods/get_pytroch_gptq_config.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/methods/get_pytroch_gptq_config.rst rename to docsrc/source/api/api_docs/methods/get_pytroch_gptq_config.rst diff --git a/docsrc/source/api/experimental_api_docs/methods/get_target_platform_capabilities.rst b/docsrc/source/api/api_docs/methods/get_target_platform_capabilities.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/methods/get_target_platform_capabilities.rst rename to docsrc/source/api/api_docs/methods/get_target_platform_capabilities.rst diff --git a/docsrc/source/api/experimental_api_docs/methods/keras_data_generation_experimental.rst b/docsrc/source/api/api_docs/methods/keras_data_generation_experimental.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/methods/keras_data_generation_experimental.rst rename to docsrc/source/api/api_docs/methods/keras_data_generation_experimental.rst diff --git a/docsrc/source/api/experimental_api_docs/methods/keras_gradient_post_training_quantization_experimental.rst b/docsrc/source/api/api_docs/methods/keras_gradient_post_training_quantization_experimental.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/methods/keras_gradient_post_training_quantization_experimental.rst rename to docsrc/source/api/api_docs/methods/keras_gradient_post_training_quantization_experimental.rst diff --git a/docsrc/source/api/api_docs/methods/keras_kpi_data.rst b/docsrc/source/api/api_docs/methods/keras_kpi_data.rst deleted file mode 100644 index a0205814f..000000000 --- a/docsrc/source/api/api_docs/methods/keras_kpi_data.rst +++ /dev/null @@ -1,10 +0,0 @@ -:orphan: - -.. _ug-keras_kpi_data: - - -======================================= -Get KPI information for Keras Models -======================================= - -.. autofunction:: model_compression_toolkit.core.keras_kpi_data diff --git a/docsrc/source/api/experimental_api_docs/methods/keras_kpi_data_experimental.rst b/docsrc/source/api/api_docs/methods/keras_kpi_data_experimental.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/methods/keras_kpi_data_experimental.rst rename to docsrc/source/api/api_docs/methods/keras_kpi_data_experimental.rst diff --git a/docsrc/source/api/experimental_api_docs/methods/keras_load_quantizad_model.rst b/docsrc/source/api/api_docs/methods/keras_load_quantizad_model.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/methods/keras_load_quantizad_model.rst rename to docsrc/source/api/api_docs/methods/keras_load_quantizad_model.rst diff --git a/docsrc/source/api/api_docs/methods/keras_post_training_quantization.rst b/docsrc/source/api/api_docs/methods/keras_post_training_quantization.rst deleted file mode 100644 index 82a351716..000000000 --- a/docsrc/source/api/api_docs/methods/keras_post_training_quantization.rst +++ /dev/null @@ -1,10 +0,0 @@ -:orphan: - -.. _ug-keras_post_training_quantization: - - -================================= -Keras Post Training Quantization -================================= - -.. autofunction:: model_compression_toolkit.ptq.keras_post_training_quantization diff --git a/docsrc/source/api/experimental_api_docs/methods/keras_post_training_quantization_experimental.rst b/docsrc/source/api/api_docs/methods/keras_post_training_quantization_experimental.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/methods/keras_post_training_quantization_experimental.rst rename to docsrc/source/api/api_docs/methods/keras_post_training_quantization_experimental.rst diff --git a/docsrc/source/api/api_docs/methods/keras_pruning_experimental.rst b/docsrc/source/api/api_docs/methods/keras_pruning_experimental.rst new file mode 100644 index 000000000..508a11e47 --- /dev/null +++ b/docsrc/source/api/api_docs/methods/keras_pruning_experimental.rst @@ -0,0 +1,12 @@ +:orphan: + +.. _ug-keras_pruning_experimental: + + +================================================ +Keras Structured Pruning +================================================ + +.. autofunction:: model_compression_toolkit.pruning.keras_pruning_experimental + + diff --git a/docsrc/source/api/experimental_api_docs/methods/keras_quantization_aware_training_finalize.rst b/docsrc/source/api/api_docs/methods/keras_quantization_aware_training_finalize.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/methods/keras_quantization_aware_training_finalize.rst rename to docsrc/source/api/api_docs/methods/keras_quantization_aware_training_finalize.rst diff --git a/docsrc/source/api/experimental_api_docs/methods/keras_quantization_aware_training_init.rst b/docsrc/source/api/api_docs/methods/keras_quantization_aware_training_init.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/methods/keras_quantization_aware_training_init.rst rename to docsrc/source/api/api_docs/methods/keras_quantization_aware_training_init.rst diff --git a/docsrc/source/api/experimental_api_docs/methods/pytorch_data_generation_experimental.rst b/docsrc/source/api/api_docs/methods/pytorch_data_generation_experimental.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/methods/pytorch_data_generation_experimental.rst rename to docsrc/source/api/api_docs/methods/pytorch_data_generation_experimental.rst diff --git a/docsrc/source/api/experimental_api_docs/methods/pytorch_gradient_post_training_quantization_experimental.rst b/docsrc/source/api/api_docs/methods/pytorch_gradient_post_training_quantization_experimental.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/methods/pytorch_gradient_post_training_quantization_experimental.rst rename to docsrc/source/api/api_docs/methods/pytorch_gradient_post_training_quantization_experimental.rst diff --git a/docsrc/source/api/api_docs/methods/pytorch_kpi_data.rst b/docsrc/source/api/api_docs/methods/pytorch_kpi_data.rst deleted file mode 100644 index 86f512eae..000000000 --- a/docsrc/source/api/api_docs/methods/pytorch_kpi_data.rst +++ /dev/null @@ -1,10 +0,0 @@ -:orphan: - -.. _ug-pytorch_kpi_data: - - -======================================= -Get KPI information for PyTorch Models -======================================= - -.. autofunction:: model_compression_toolkit.core.pytorch_kpi_data diff --git a/docsrc/source/api/experimental_api_docs/methods/pytorch_kpi_data_experimental.rst b/docsrc/source/api/api_docs/methods/pytorch_kpi_data_experimental.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/methods/pytorch_kpi_data_experimental.rst rename to docsrc/source/api/api_docs/methods/pytorch_kpi_data_experimental.rst diff --git a/docsrc/source/api/api_docs/methods/pytorch_post_training_quantization.rst b/docsrc/source/api/api_docs/methods/pytorch_post_training_quantization.rst deleted file mode 100644 index 46feca7ba..000000000 --- a/docsrc/source/api/api_docs/methods/pytorch_post_training_quantization.rst +++ /dev/null @@ -1,10 +0,0 @@ -:orphan: - -.. _ug-pytorch_post_training_quantization: - - -===================================== -Pytorch Post Training Quantization -===================================== - -.. autofunction:: model_compression_toolkit.ptq.pytorch_post_training_quantization diff --git a/docsrc/source/api/experimental_api_docs/methods/pytorch_post_training_quantization_experimental.rst b/docsrc/source/api/api_docs/methods/pytorch_post_training_quantization_experimental.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/methods/pytorch_post_training_quantization_experimental.rst rename to docsrc/source/api/api_docs/methods/pytorch_post_training_quantization_experimental.rst diff --git a/docsrc/source/api/api_docs/methods/pytorch_pruning_experimental.rst b/docsrc/source/api/api_docs/methods/pytorch_pruning_experimental.rst new file mode 100644 index 000000000..4f050cfe4 --- /dev/null +++ b/docsrc/source/api/api_docs/methods/pytorch_pruning_experimental.rst @@ -0,0 +1,15 @@ +:orphan: + +.. _ug-pytorch_pruning_experimental: + + +================================================ +Pytorch Structured Pruning +================================================ + +.. autofunction:: model_compression_toolkit.pruning.pytorch_pruning_experimental + + + + + diff --git a/docsrc/source/api/experimental_api_docs/methods/pytorch_quantization_aware_training_finalize.rst b/docsrc/source/api/api_docs/methods/pytorch_quantization_aware_training_finalize.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/methods/pytorch_quantization_aware_training_finalize.rst rename to docsrc/source/api/api_docs/methods/pytorch_quantization_aware_training_finalize.rst diff --git a/docsrc/source/api/experimental_api_docs/methods/pytorch_quantization_aware_training_init.rst b/docsrc/source/api/api_docs/methods/pytorch_quantization_aware_training_init.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/methods/pytorch_quantization_aware_training_init.rst rename to docsrc/source/api/api_docs/methods/pytorch_quantization_aware_training_init.rst diff --git a/docsrc/source/api/experimental_api_docs/methods/set_logger_path.rst b/docsrc/source/api/api_docs/methods/set_logger_path.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/methods/set_logger_path.rst rename to docsrc/source/api/api_docs/methods/set_logger_path.rst diff --git a/docsrc/source/api/experimental_api_docs/modules/core_config.rst b/docsrc/source/api/api_docs/modules/core_config.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/modules/core_config.rst rename to docsrc/source/api/api_docs/modules/core_config.rst diff --git a/docsrc/source/api/experimental_api_docs/modules/debug_config.rst b/docsrc/source/api/api_docs/modules/debug_config.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/modules/debug_config.rst rename to docsrc/source/api/api_docs/modules/debug_config.rst diff --git a/docsrc/source/api/experimental_api_docs/modules/exporter.rst b/docsrc/source/api/api_docs/modules/exporter.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/modules/exporter.rst rename to docsrc/source/api/api_docs/modules/exporter.rst diff --git a/docsrc/source/api/experimental_api_docs/modules/layer_filters.rst b/docsrc/source/api/api_docs/modules/layer_filters.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/modules/layer_filters.rst rename to docsrc/source/api/api_docs/modules/layer_filters.rst diff --git a/docsrc/source/api/api_docs/modules/mixed_precision_quantization_config.rst b/docsrc/source/api/api_docs/modules/mixed_precision_quantization_config.rst index 4745b0b69..f3d6aba8c 100644 --- a/docsrc/source/api/api_docs/modules/mixed_precision_quantization_config.rst +++ b/docsrc/source/api/api_docs/modules/mixed_precision_quantization_config.rst @@ -1,11 +1,11 @@ :orphan: -.. _ug-mixed_precision_quantization_config: +.. _ug-mixed_precision_quantization_config_v2: -=========================================== +=========================================================== mixed_precision_quantization_config Module -=========================================== +=========================================================== KPI @@ -13,14 +13,13 @@ KPI **Object to configure resources to use when searching for a mixed-precision configuration for a model:** .. autoclass:: model_compression_toolkit.core.KPI - :noindex: | -MixedPrecisionQuantizationConfig -================================= +MixedPrecisionQuantizationConfigV2 +=================================== **Class to configure the quantization process of the model when quantizing in mixed-precision:** .. autoclass:: model_compression_toolkit.core.MixedPrecisionQuantizationConfig diff --git a/docsrc/source/api/experimental_api_docs/modules/network_editor.rst b/docsrc/source/api/api_docs/modules/network_editor.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/modules/network_editor.rst rename to docsrc/source/api/api_docs/modules/network_editor.rst diff --git a/docsrc/source/api/experimental_api_docs/modules/qat_config.rst b/docsrc/source/api/api_docs/modules/qat_config.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/modules/qat_config.rst rename to docsrc/source/api/api_docs/modules/qat_config.rst diff --git a/docsrc/source/api/experimental_api_docs/modules/quantization_config.rst b/docsrc/source/api/api_docs/modules/quantization_config.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/modules/quantization_config.rst rename to docsrc/source/api/api_docs/modules/quantization_config.rst diff --git a/docsrc/source/api/experimental_api_docs/modules/target_platform.rst b/docsrc/source/api/api_docs/modules/target_platform.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/modules/target_platform.rst rename to docsrc/source/api/api_docs/modules/target_platform.rst diff --git a/docsrc/source/api/experimental_api_docs/modules/trainable_infrastructure.rst b/docsrc/source/api/api_docs/modules/trainable_infrastructure.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/modules/trainable_infrastructure.rst rename to docsrc/source/api/api_docs/modules/trainable_infrastructure.rst diff --git a/docsrc/source/api/experimental_api_docs/notes/experimental_api_note.rst b/docsrc/source/api/api_docs/notes/experimental_api_note.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/notes/experimental_api_note.rst rename to docsrc/source/api/api_docs/notes/experimental_api_note.rst diff --git a/docsrc/source/api/experimental_api_docs/notes/tpc_note.rst b/docsrc/source/api/api_docs/notes/tpc_note.rst similarity index 100% rename from docsrc/source/api/experimental_api_docs/notes/tpc_note.rst rename to docsrc/source/api/api_docs/notes/tpc_note.rst diff --git a/docsrc/source/api/experimental_api_docs/index.rst b/docsrc/source/api/experimental_api_docs/index.rst deleted file mode 100644 index 12e023554..000000000 --- a/docsrc/source/api/experimental_api_docs/index.rst +++ /dev/null @@ -1,70 +0,0 @@ -:orphan: - -.. _ug-experimental_api_docs: - - -========= -API Docs -========= - -**Init module for MCT API.** - -.. code-block:: python - - import model_compression_toolkit as mct - -| - -.. include:: ./notes/experimental_api_note.rst - -| - - -Functions -========= -- :ref:`pytorch_post_training_quantization_experimental`: A function to use for post training quantization of PyTorch models (experimental). -- :ref:`keras_post_training_quantization_experimental`: A function to use for post training quantization of Keras models (experimental). -- :ref:`keras_gradient_post_training_quantization_experimental`: A function to use for gradient-based post training quantization of Keras models (experimental). -- :ref:`pytorch_gradient_post_training_quantization_experimental`: A function to use for gradient-based post training quantization of Pytorch models (experimental). -- :ref:`keras_quantization_aware_training_init`: A function to use for preparing a model for Quantization Aware Training (Experimental) -- :ref:`keras_quantization_aware_training_finalize`: A function to finalize a model after Quantization Aware Training to a model without QuantizeWrappers(Experimental) -- :ref:`get_keras_gptq_config`: A function to create a GradientPTQConfig instance to use for Keras models when using GPTQ (experimental). -- :ref:`get_pytorch_gptq_config`: A function to create a GradientPTQConfig instance to use for Pytorch models when using GPTQ (experimental). -- :ref:`get_target_platform_capabilities`: A function to get a target platform model for Tensorflow and Pytorch. -- :ref:`keras_kpi_data_experimental`: A function to compute KPI data that can be used to calculate the desired target KPI for mixed-precision quantization of Keras models (experimental). -- :ref:`pytorch_kpi_data_experimental`: A function to compute KPI data that can be used to calculate the desired target KPI for mixed-precision quantization of PyTorch models (experimental). -- :ref:`keras_load_quantized_model`: A function to load a quantized keras model (experimental). -- :ref:`tensorflow_data_generation_experimental`: A function to generate data for a Keras model (experimental). -- :ref:`get_keras_data_generation_config`: A function to generate a DataGenerationConfig for Tensorflow data generation(experimental). -- :ref:`pytorch_data_generation_experimental`: A function to generate data for a Pytorch model (experimental). -- :ref:`get_pytorch_data_generation_config`: A function to load a DataGenerationConfig for Pytorch data generation (experimental). -- :ref:`keras_pruning_experimental`: A function to apply structured pruning for Keras models (experimental). - - -Modules -========= -- :ref:`core_config`: Module to contain configurations of the optimization process. -- :ref:`quantization_config`: Module to configure the quantization process. -- :ref:`mixed_precision_quantization_config`: Module to configure the quantization process when using mixed-precision PTQ. -- :ref:`debug_config`: Module to configure options for debugging the optimization process. -- :ref:`target_platform`: Module to create and model hardware-related settings to optimize the model according to, by the hardware the optimized model will use during inference. -- :ref:`qat_config`: Module to create quantization configuration for Quantization-aware Training. -- :ref:`exporter`: Module that enables to export a quantized model in different serialization formats. -- :ref:`trainable_infrastructure`: Module that contains quantization abstraction and quantizers for hardware-oriented model optimization tools. - -Classes -========= -- :ref:`GradientPTQConfig`: Class to configure GradientPTQ options for gradient based post training quantization. -- :ref:`FolderImageLoader`: Class to use an images directory as a representative dataset. -- :ref:`FrameworkInfo`: Class to wrap framework information to be used by MCT when optimizing models. - - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` - -.. note:: This documentation is auto-generated using Sphinx - diff --git a/docsrc/source/api/experimental_api_docs/methods/keras_pruning_experimental.rst b/docsrc/source/api/experimental_api_docs/methods/keras_pruning_experimental.rst deleted file mode 100644 index 787bb2452..000000000 --- a/docsrc/source/api/experimental_api_docs/methods/keras_pruning_experimental.rst +++ /dev/null @@ -1,25 +0,0 @@ -:orphan: - -.. _ug-keras_pruning_experimental: - - -================================================ -Keras Structured Pruning -================================================ - -.. autofunction:: model_compression_toolkit.pruning.keras_pruning_experimental - -================================================ -Pruning Configuration -================================================ - -.. autofunction:: model_compression_toolkit.pruning.PruningConfig - - - -================================================ -Pruning Information -================================================ - -.. autofunction:: model_compression_toolkit.pruning.PruningInfo - diff --git a/docsrc/source/api/experimental_api_docs/methods/pytorch_pruning_experimental.rst b/docsrc/source/api/experimental_api_docs/methods/pytorch_pruning_experimental.rst deleted file mode 100644 index f52e130f5..000000000 --- a/docsrc/source/api/experimental_api_docs/methods/pytorch_pruning_experimental.rst +++ /dev/null @@ -1,25 +0,0 @@ -:orphan: - -.. _ug-pytorch_pruning_experimental: - - -================================================ -Pytorch Structured Pruning -================================================ - -.. autofunction:: model_compression_toolkit.pruning.pytorch_pruning_experimental - -================================================ -Pruning Configuration -================================================ - -.. autofunction:: model_compression_toolkit.pruning.PruningConfig - - - -================================================ -Pruning Information -================================================ - -.. autofunction:: model_compression_toolkit.pruning.PruningInfo - diff --git a/docsrc/source/api/experimental_api_docs/modules/mixed_precision_quantization_config.rst b/docsrc/source/api/experimental_api_docs/modules/mixed_precision_quantization_config.rst deleted file mode 100644 index f3d6aba8c..000000000 --- a/docsrc/source/api/experimental_api_docs/modules/mixed_precision_quantization_config.rst +++ /dev/null @@ -1,26 +0,0 @@ -:orphan: - -.. _ug-mixed_precision_quantization_config_v2: - - -=========================================================== -mixed_precision_quantization_config Module -=========================================================== - - -KPI -================================ -**Object to configure resources to use when searching for a mixed-precision configuration for a model:** - -.. autoclass:: model_compression_toolkit.core.KPI - - -| - - -MixedPrecisionQuantizationConfigV2 -=================================== -**Class to configure the quantization process of the model when quantizing in mixed-precision:** - -.. autoclass:: model_compression_toolkit.core.MixedPrecisionQuantizationConfig - diff --git a/docsrc/source/index.rst b/docsrc/source/index.rst index c6a9b0be0..2c4713162 100644 --- a/docsrc/source/index.rst +++ b/docsrc/source/index.rst @@ -52,21 +52,21 @@ Supported Features Keras: -* :ref:`Post Training Quantization` [1] -* :ref:`Gradient based post training using knowledge distillation` -* :ref:`Mixed-precision post training quantization` -* :ref:`Init model for Quantization Aware Training` (Experimental) -* :ref:`Finalize model after Quantization Aware Training` (Experimental) -* :ref:`Structured Pruning` (Experimental) +* :ref:`Post Training Quantization` [1] +* :ref:`Gradient based post training using knowledge distillation` +* :ref:`Init model for Quantization Aware Training` (Experimental) +* :ref:`Finalize model after Quantization Aware Training` (Experimental) +* :ref:`Structured pruning` (Experimental) +* :ref:`Data generation` (Experimental) Pytorch: -* :ref:`Post Training Quantization` [1] -* :ref:`Gradient based post training using knowledge distillation` -* :ref:`Mixed-precision post training quantization` -* :ref:`Init model for Quantization Aware Training` (Experimental) -* :ref:`Finalize model after Quantization Aware Training` (Experimental) - +* :ref:`Post Training Quantization` [1] +* :ref:`Gradient based post training using knowledge distillation` +* :ref:`Init model for Quantization Aware Training` (Experimental) +* :ref:`Finalize model after Quantization Aware Training` (Experimental) +* :ref:`Structured pruning` (Experimental) +* :ref:`Data generation` (Experimental) Visualization: @@ -101,7 +101,7 @@ Please visit the MCT API documentation here :titlesonly: :maxdepth: 1 - API Documentation<../api/experimental_api_docs/index> + API Documentation<../api/api_docs/index> Technical Constraints ========================= From 1ca7bbb7a8deffe159f993b3a0df78c8594e4683 Mon Sep 17 00:00:00 2001 From: reuvenp Date: Thu, 7 Mar 2024 17:52:56 +0200 Subject: [PATCH 4/9] fix comments of OperationsSetToLayers --- .../targetplatform2framework/operations_to_layers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/operations_to_layers.py b/model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/operations_to_layers.py index 9fdc38dc7..6a1809f12 100644 --- a/model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/operations_to_layers.py +++ b/model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/operations_to_layers.py @@ -35,9 +35,9 @@ def __init__(self, Args: op_set_name (str): Name of OperatorsSet to associate with layers. layers (List[Any]): List of layers/FilterLayerParams to associate with OperatorsSet. - attr_mapping (dict): A mapping between a general attribute name to a DefaultDict that maps a layer -+ type to the layer's framework name of this attribute (the dictionary type is not specified to -+ handle circular dependency). + attr_mapping (dict): A mapping between a general attribute name to a DefaultDict that maps a layer type + to the layer's framework name of this attribute (the dictionary type is not specified to handle circular + dependency). """ self.layers = layers self.attr_mapping = attr_mapping From 385b9f4e7493a6355e94dc052c42a35367193531 Mon Sep 17 00:00:00 2001 From: reuvenp Date: Thu, 7 Mar 2024 18:06:11 +0200 Subject: [PATCH 5/9] automate makefile --- docsrc/Makefile | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docsrc/Makefile b/docsrc/Makefile index a31637a91..b869c2d44 100644 --- a/docsrc/Makefile +++ b/docsrc/Makefile @@ -6,7 +6,17 @@ github: @cp -a ../docs/html/. ../docs @rm -rf ../docs/html/ @rm -rf ../docs/doctrees/ + @find ../docs -type f -exec sed -i 's|_images/|images/|g' {} + + @find ../docs -type f -exec sed -i 's|_static/|static/|g' {} + + @rm -rf ../docs/images + @rm -rf ../docs/static + @mv ../docs/_images/ ../docs/images + @mv ../docs/_static/ ../docs/static + + +#@find ../docs -type f -exec sed -i 's|_images/|images/|g' {} + +#@find ../docs -type f -exec sed -i 's|_static/|static/|g' {} + # rm -rf ../docs/images/ # rm -rf ../docs/statis/ # mv ../docs/_images/ ../docs/images From 2cc4a9b08d4c8337f8d5ac61457bab83a87d9399 Mon Sep 17 00:00:00 2001 From: reuvenp Date: Sat, 9 Mar 2024 20:08:28 +0200 Subject: [PATCH 6/9] Add export pytorch example --- docsrc/source/api/api_docs/index.rst | 4 - .../source/api/api_docs/modules/exporter.rst | 154 +++++++++++++++++- .../api_docs/notes/experimental_api_note.rst | 7 - 3 files changed, 153 insertions(+), 12 deletions(-) delete mode 100644 docsrc/source/api/api_docs/notes/experimental_api_note.rst diff --git a/docsrc/source/api/api_docs/index.rst b/docsrc/source/api/api_docs/index.rst index 072f21f1e..3b8eec621 100644 --- a/docsrc/source/api/api_docs/index.rst +++ b/docsrc/source/api/api_docs/index.rst @@ -15,10 +15,6 @@ API Docs | -.. include:: ./notes/experimental_api_note.rst - -| - Functions ========= diff --git a/docsrc/source/api/api_docs/modules/exporter.rst b/docsrc/source/api/api_docs/modules/exporter.rst index 987cbe483..7977cc700 100644 --- a/docsrc/source/api/api_docs/modules/exporter.rst +++ b/docsrc/source/api/api_docs/modules/exporter.rst @@ -9,7 +9,13 @@ exporter Module Allows to export a quantized model in different serialization formats and quantization formats. For more details about the export formats and options, please refer to the project's GitHub `README file `_. -Note that this feature is experimental and subject to future changes. If you have any questions or issues, please open an issue in this GitHub repository. +If you have any questions or issues, please open an issue in this GitHub repository. + + +QuantizationFormat +========================== + +.. autoclass:: model_compression_toolkit.exporter.QuantizationFormat KerasExportSerializationFormat @@ -39,3 +45,149 @@ Allows to export a Pytorch model that was quantized via MCT. .. autoclass:: model_compression_toolkit.exporter.pytorch_export_model + + +Pytorch Tutorial +========================== + +To export a Pytorch model as a quantized model, it is necessary to first apply quantization +to the model using MCT: + +.. code-block:: shell + + ! pip install -q mct-nightly + +In order to export your quantized model to ONNX format, and use it for inference, some additional packages are needed. Notice, this is needed only for models exported to ONNX format, so this part can be skipped if this is not planned: + +.. code-block:: shell + + ! pip install -q onnx onnxruntime onnxruntime-extensions + +Now, let's start the export demonstration by quantizing the model using MCT: + +.. code-block:: python + + import model_compression_toolkit as mct + import numpy as np + import torch + from torchvision.models.mobilenetv2 import mobilenet_v2 + + # Create a model + float_model = mobilenet_v2() + + + # Notice that here the representative dataset is random for demonstration only. + def representative_data_gen(): + yield [np.random.random((1, 3, 224, 224))] + + + quantized_exportable_model, _ = mct.ptq.pytorch_post_training_quantization(float_model, representative_data_gen=representative_data_gen) + + + + +### ONNX + +The model will be exported in ONNX format where weights and activations are represented as float. Notice that `onnx` should be installed in order to export the model to an ONNX model. + +There are two optional formats to choose: MCTQ or FAKELY_QUANT. + +#### MCTQ Quantization Format + +By default, `mct.exporter.pytorch_export_model` will export the quantized pytorch model to +an ONNX model with custom quantizers from mct_quantizers module. + + + +.. code-block:: python + + # Path of exported model + onnx_file_path = 'model_format_onnx_mctq.onnx' + + # Export ONNX model with mctq quantizers. + mct.exporter.pytorch_export_model(model=quantized_exportable_model, + save_model_path=onnx_file_path, + repr_dataset=representative_data_gen) + +Notice that the model has the same size as the quantized exportable model as weights data types are float. + +#### ONNX opset version + +By default, the used ONNX opset version is 15, but this can be changed using `onnx_opset_version`: + +.. code-block:: python + + # Export ONNX model with mctq quantizers. + mct.exporter.pytorch_export_model(model=quantized_exportable_model, + save_model_path=onnx_file_path, + repr_dataset=representative_data_gen, + onnx_opset_version=16) + +### Use exported model for inference + +To load and infer using the exported model, which was exported to an ONNX file in MCTQ format, we will use `mct_quantizers` method `get_ort_session_options` during onnxruntime session creation. **Notice**, inference on models that are exported in this format are slowly and suffers from longer latency. However, inference of these models on IMX500 will not suffer from this issue. + +.. code-block:: python + + import mct_quantizers as mctq + import onnxruntime as ort + + sess = ort.InferenceSession(onnx_file_path, + mctq.get_ort_session_options(), + providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) + + _input_data = next(representative_data_gen())[0].astype(np.float32) + _model_output_name = sess.get_outputs()[0].name + _model_input_name = sess.get_inputs()[0].name + + # Run inference + predictions = sess.run([_model_output_name], {_model_input_name: _input_data}) + +#### Fakely-Quantized + +To export a fakely-quantized model, use QuantizationFormat.FAKELY_QUANT: + +.. code-block:: python + + import tempfile + + # Path of exported model + _, onnx_file_path = tempfile.mkstemp('.onnx') + + # Use QuantizationFormat.FAKELY_QUANT for fakely-quantized weights and activations. + mct.exporter.pytorch_export_model(model=quantized_exportable_model, + save_model_path=onnx_file_path, + repr_dataset=representative_data_gen, + quantization_format=mct.exporter.QuantizationFormat.FAKELY_QUANT) + + +Notice that the fakely-quantized model has the same size as the quantized +exportable model as weights data types are float. + +### TorchScript + +The model will be exported in TorchScript format where weights and activations are +quantized but represented as float (fakely quant). + +.. code-block:: python + + # Path of exported model + _, torchscript_file_path = tempfile.mkstemp('.pt') + + + # Use mode PytorchExportSerializationFormat.TORCHSCRIPT a torchscript model + # and QuantizationFormat.FAKELY_QUANT for fakely-quantized weights and activations. + mct.exporter.pytorch_export_model(model=quantized_exportable_model, + save_model_path=torchscript_file_path, + repr_dataset=representative_data_gen, + serialization_format=mct.exporter.PytorchExportSerializationFormat.TORCHSCRIPT, + quantization_format=mct.exporter.QuantizationFormat.FAKELY_QUANT) + +Notice that the fakely-quantized model has the same size as the quantized exportable model as weights data types are +float. + + + + + + diff --git a/docsrc/source/api/api_docs/notes/experimental_api_note.rst b/docsrc/source/api/api_docs/notes/experimental_api_note.rst deleted file mode 100644 index 4980a5344..000000000 --- a/docsrc/source/api/api_docs/notes/experimental_api_note.rst +++ /dev/null @@ -1,7 +0,0 @@ - -.. note:: - This is an experimental new API. The previous API is still available and can be used as before (please - visit :ref:`previous API documentation ` for more information). However, we recommend using the new API as the previous API is deprecated - and will be removed in the future. - - From 8de2704c59e6bae4fde328a025a8224c81ba5c42 Mon Sep 17 00:00:00 2001 From: reuvenp Date: Sat, 9 Mar 2024 20:22:19 +0200 Subject: [PATCH 7/9] add QuantizationFormat doc --- .../model_exporter/fw_agonstic/quantization_format.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/model_compression_toolkit/exporter/model_exporter/fw_agonstic/quantization_format.py b/model_compression_toolkit/exporter/model_exporter/fw_agonstic/quantization_format.py index dc9c7e965..b6fedc2b2 100644 --- a/model_compression_toolkit/exporter/model_exporter/fw_agonstic/quantization_format.py +++ b/model_compression_toolkit/exporter/model_exporter/fw_agonstic/quantization_format.py @@ -16,6 +16,16 @@ class QuantizationFormat(Enum): + """ + Specify which quantization format to use for exporting a quantized model. + + FAKELY_QUANT - Weights and activations are quantized but represented using float data type. + + INT8 - Weights and activations are represented using 8-bit integer data type. + + MCTQ - Weights and activations are quantized using mct_quantizers custom quantizers. + + """ FAKELY_QUANT = 0 INT8 = 1 MCTQ = 2 From 38eb005873713ac4c328b86176cc45334f7a4e44 Mon Sep 17 00:00:00 2001 From: reuvenp Date: Sat, 9 Mar 2024 20:25:46 +0200 Subject: [PATCH 8/9] fix exporter subtitles --- .../source/api/api_docs/modules/exporter.rst | 190 ++++++++++++++++-- 1 file changed, 175 insertions(+), 15 deletions(-) diff --git a/docsrc/source/api/api_docs/modules/exporter.rst b/docsrc/source/api/api_docs/modules/exporter.rst index 7977cc700..e441eee22 100644 --- a/docsrc/source/api/api_docs/modules/exporter.rst +++ b/docsrc/source/api/api_docs/modules/exporter.rst @@ -25,6 +25,159 @@ Select the serialization format for exporting a quantized Keras model. .. autoclass:: model_compression_toolkit.exporter.KerasExportSerializationFormat +keras_export_model +========================== +Allows to export a Keras model that was quantized via MCT. + +.. autoclass:: model_compression_toolkit.exporter.keras_export_model + + +Keras Tutorial +========================== +To export a TensorFlow model as a quantized model, it is necessary to first apply quantization +to the model using MCT: + +.. code-block:: shell + + ! pip install -q mct-nightly + +.. code-block:: python + + import numpy as np + from keras.applications import ResNet50 + import model_compression_toolkit as mct + + # Create a model + float_model = ResNet50() + # Quantize the model. + # Notice that here the representative dataset is random for demonstration only. + quantized_exportable_model, _ = mct.ptq.keras_post_training_quantization(float_model, + representative_data_gen=lambda: [np.random.random((1, 224, 224, 3))]) + + + +++++++++++++++++++++++++++++ +keras serialization format +++++++++++++++++++++++++++++ +The model will be exported as a tensorflow `.keras` model where weights and activations are quantized but represented using a float32 dtype. +Two optional quantization formats are available: MCTQ and FAKELY_QUANT. + +++++ +MCTQ +++++ + +By default, `mct.exporter.keras_export_model` will export the quantized Keras model to +a .keras model with custom quantizers from mct_quantizers module. + +.. code-block:: python + + import tempfile + + # Path of exported model + _, keras_file_path = tempfile.mkstemp('.keras') + + # Export a keras model with mctq custom quantizers. + mct.exporter.keras_export_model(model=quantized_exportable_model, + save_model_path=keras_file_path) + +Notice that the model has the same size as the quantized exportable model as weights data types are float. + ++++++++++++++++++++++++++++ +Fakely-Quantized in Keras ++++++++++++++++++++++++++++ + +.. code-block:: python + + # Path of exported model + _, keras_file_path = tempfile.mkstemp('.keras') + + # Use mode KerasExportSerializationFormat.KERAS for a .keras model + # and QuantizationFormat.FAKELY_QUANT for fakely-quantized weights + # and activations. + mct.exporter.keras_export_model(model=quantized_exportable_model, + save_model_path=keras_file_path, + quantization_format=mct.exporter.QuantizationFormat.FAKELY_QUANT) + +Notice that the fakely-quantized model has the same size as the quantized exportable model as weights data types are +float. + +++++++ +TFLite +++++++ + +The tflite serialization format export in two qauntization formats: INT8 and FAKELY_QUANT. + ++++++++++++ +INT8 TFLite ++++++++++++ + +The model will be exported as a tflite model where weights and activations are represented as 8bit integers. + +.. code-block:: python + + import tempfile + + # Path of exported model + _, tflite_file_path = tempfile.mkstemp('.tflite') + + # Use mode KerasExportSerializationFormat.TFLITE for tflite model and quantization_format.INT8. + mct.exporter.keras_export_model(model=quantized_exportable_model, + save_model_path=tflite_file_path, + serialization_format=mct.exporter.KerasExportSerializationFormat.TFLITE, + quantization_format=mct.exporter.QuantizationFormat.INT8) + +Compare size of float and quantized model: + + +.. code-block:: python + + import os + + # Save float model to measure its size + _, float_file_path = tempfile.mkstemp('.keras') + float_model.save(float_file_path) + + print("Float model in Mb:", os.path.getsize(float_file_path) / float(2 ** 20)) + print("Quantized model in Mb:", os.path.getsize(tflite_file_path) / float(2 ** 20)) + print(f'Compression ratio: {os.path.getsize(float_file_path) / os.path.getsize(tflite_file_path)}') + + ++++++++++++++++++++++++ +Fakely-Quantized TFLite ++++++++++++++++++++++++ + +The model will be exported as a tflite model where weights and activations are quantized but represented with a float data type. + ++++++++++++++ +Usage Example ++++++++++++++ + + + +.. code-block:: python + + # Path of exported model + _, tflite_file_path = tempfile.mkstemp('.tflite') + + # Use mode KerasExportSerializationFormat.TFLITE for tflite model and QuantizationFormat.FAKELY_QUANT for fakely-quantized weights + # and activations. + mct.exporter.keras_export_model(model=quantized_exportable_model, + save_model_path=tflite_file_path, + serialization_format=mct.exporter.KerasExportSerializationFormat.TFLITE, + quantization_format=mct.exporter.QuantizationFormat.FAKELY_QUANT) + + + + + +Notice that the fakely-quantized model has the same size as the quantized exportable model as weights data types are +float. + + + + + + PytorchExportSerializationFormat ================================== Select the serialization format for exporting a quantized Pytorch model. @@ -32,11 +185,7 @@ Select the serialization format for exporting a quantized Pytorch model. .. autoclass:: model_compression_toolkit.exporter.PytorchExportSerializationFormat -keras_export_model -========================== -Allows to export a Keras model that was quantized via MCT. -.. autoclass:: model_compression_toolkit.exporter.keras_export_model pytorch_export_model @@ -85,14 +234,17 @@ Now, let's start the export demonstration by quantizing the model using MCT: - -### ONNX ++++++++++ +ONNX ++++++++++ The model will be exported in ONNX format where weights and activations are represented as float. Notice that `onnx` should be installed in order to export the model to an ONNX model. There are two optional formats to choose: MCTQ or FAKELY_QUANT. -#### MCTQ Quantization Format ++++++++++++++++++++++++++++ +MCTQ Quantization Format ++++++++++++++++++++++++++++ By default, `mct.exporter.pytorch_export_model` will export the quantized pytorch model to an ONNX model with custom quantizers from mct_quantizers module. @@ -111,7 +263,9 @@ an ONNX model with custom quantizers from mct_quantizers module. Notice that the model has the same size as the quantized exportable model as weights data types are float. -#### ONNX opset version ++++++++++++++++++++++++++++ +ONNX opset version ++++++++++++++++++++++++++++ By default, the used ONNX opset version is 15, but this can be changed using `onnx_opset_version`: @@ -123,7 +277,11 @@ By default, the used ONNX opset version is 15, but this can be changed using `on repr_dataset=representative_data_gen, onnx_opset_version=16) -### Use exported model for inference +| + +++++++++++++++++++++++++++++++++++++ +Use exported model for inference +++++++++++++++++++++++++++++++++++++ To load and infer using the exported model, which was exported to an ONNX file in MCTQ format, we will use `mct_quantizers` method `get_ort_session_options` during onnxruntime session creation. **Notice**, inference on models that are exported in this format are slowly and suffers from longer latency. However, inference of these models on IMX500 will not suffer from this issue. @@ -143,7 +301,11 @@ To load and infer using the exported model, which was exported to an ONNX file i # Run inference predictions = sess.run([_model_output_name], {_model_input_name: _input_data}) -#### Fakely-Quantized +| + ++++++++++++++++++++++++++++++ +Fakely-Quantized in Pytorch ++++++++++++++++++++++++++++++ To export a fakely-quantized model, use QuantizationFormat.FAKELY_QUANT: @@ -164,7 +326,9 @@ To export a fakely-quantized model, use QuantizationFormat.FAKELY_QUANT: Notice that the fakely-quantized model has the same size as the quantized exportable model as weights data types are float. -### TorchScript +++++++++++++++++++ +TorchScript +++++++++++++++++++ The model will be exported in TorchScript format where weights and activations are quantized but represented as float (fakely quant). @@ -187,7 +351,3 @@ Notice that the fakely-quantized model has the same size as the quantized export float. - - - - From 7b1d1080629c4b23b34ad2835bb45f73ae0d777f Mon Sep 17 00:00:00 2001 From: reuvenp Date: Sat, 9 Mar 2024 20:40:17 +0200 Subject: [PATCH 9/9] auto generated --- .../api/api_docs/classes/DefaultDict.rst.txt | 11 + .../classes/FolderImageLoader.rst.txt | 20 + .../api_docs/classes/FrameworkInfo.rst.txt | 23 + .../classes/GradientPTQConfig.rst.txt | 24 + .../api_docs/classes/PruningConfig.rst.txt | 9 + .../api/api_docs/classes/PruningInfo.rst.txt | 10 + docs/_sources/api/api_docs/index.rst.txt | 50 +- .../get_keras_data_generation_config.rst.txt | 10 + .../methods/get_keras_gptq_config.rst.txt | 10 + ...get_pytorch_data_generation_config.rst.txt | 10 + .../methods/get_pytroch_gptq_config.rst.txt | 10 + .../get_target_platform_capabilities.rst.txt | 13 + ...keras_data_generation_experimental.rst.txt | 10 + ...training_quantization_experimental.rst.txt | 10 + .../keras_kpi_data_experimental.rst.txt | 10 + .../keras_load_quantizad_model.rst.txt | 10 + ...training_quantization_experimental.rst.txt | 10 + .../keras_pruning_experimental.rst.txt | 12 + ...antization_aware_training_finalize.rst.txt | 10 + ...s_quantization_aware_training_init.rst.txt | 10 + ...torch_data_generation_experimental.rst.txt | 10 + ...training_quantization_experimental.rst.txt | 10 + .../pytorch_kpi_data_experimental.rst.txt | 10 + ...training_quantization_experimental.rst.txt | 10 + .../pytorch_pruning_experimental.rst.txt | 15 + ...antization_aware_training_finalize.rst.txt | 10 + ...h_quantization_aware_training_init.rst.txt | 10 + .../api_docs/methods/set_logger_path.rst.txt | 10 + .../api/api_docs/modules/core_config.rst.txt | 16 + .../api/api_docs/modules/debug_config.rst.txt | 17 + .../api/api_docs/modules/exporter.rst.txt | 353 ++++++++++++++ .../api_docs/modules/layer_filters.rst.txt | 40 ++ ...ixed_precision_quantization_config.rst.txt | 15 +- .../api_docs/modules/network_editor.rst.txt | 62 +++ .../api/api_docs/modules/qat_config.rst.txt | 26 + .../modules/quantization_config.rst.txt | 25 + .../api_docs/modules/target_platform.rst.txt | 357 ++++++++++++++ .../modules/trainable_infrastructure.rst.txt | 83 ++++ .../api/api_docs/notes/tpc_note.rst.txt | 8 + docs/_sources/index.rst.txt | 28 +- docs/api/api_docs/classes/DefaultDict.html | 44 +- .../api_docs/classes/FolderImageLoader.html | 43 +- docs/api/api_docs/classes/FrameworkInfo.html | 34 +- .../api_docs/classes/GradientPTQConfig.html | 67 ++- docs/api/api_docs/classes/PruningConfig.html | 124 +++++ docs/api/api_docs/classes/PruningInfo.html | 117 +++++ docs/api/api_docs/index.html | 62 ++- .../get_keras_data_generation_config.html | 122 +++++ .../methods/get_keras_gptq_config.html | 57 ++- .../get_pytorch_data_generation_config.html | 123 +++++ .../methods/get_pytroch_gptq_config.html | 123 +++++ .../get_target_platform_capabilities.html | 46 +- .../keras_data_generation_experimental.html | 109 +++++ ...st_training_quantization_experimental.html | 165 +++++++ .../methods/keras_kpi_data_experimental.html | 126 +++++ .../methods/keras_load_quantizad_model.html | 101 ++++ ...st_training_quantization_experimental.html | 156 ++++++ .../methods/keras_pruning_experimental.html | 154 ++++++ ..._quantization_aware_training_finalize.html | 144 ++++++ ...eras_quantization_aware_training_init.html | 165 +++++++ .../pytorch_data_generation_experimental.html | 109 +++++ ...st_training_quantization_experimental.html | 146 ++++++ .../pytorch_kpi_data_experimental.html | 126 +++++ ...st_training_quantization_experimental.html | 135 ++++++ .../methods/pytorch_pruning_experimental.html | 158 ++++++ ..._quantization_aware_training_finalize.html | 127 +++++ ...orch_quantization_aware_training_init.html | 152 ++++++ .../api/api_docs/methods/set_logger_path.html | 25 +- docs/api/api_docs/modules/core_config.html | 113 +++++ docs/api/api_docs/modules/debug_config.html | 113 +++++ docs/api/api_docs/modules/exporter.html | 455 ++++++++++++++++++ docs/api/api_docs/modules/layer_filters.html | 33 +- .../mixed_precision_quantization_config.html | 27 +- docs/api/api_docs/modules/network_editor.html | 124 ++--- docs/api/api_docs/modules/qat_config.html | 131 +++++ .../api_docs/modules/quantization_config.html | 48 +- .../api/api_docs/modules/target_platform.html | 374 +++++++------- .../modules/trainable_infrastructure.html | 216 +++++++++ docs/api/api_docs/notes/tpc_note.html | 27 +- docs/genindex.html | 184 ++++--- docs/guidelines/quickstart_pytorch.html | 8 +- docs/guidelines/visualization.html | 4 +- docs/index.html | 28 +- docs/objects.inv | Bin 5533 -> 5385 bytes docs/searchindex.js | 2 +- 85 files changed, 5732 insertions(+), 612 deletions(-) create mode 100644 docs/_sources/api/api_docs/classes/DefaultDict.rst.txt create mode 100644 docs/_sources/api/api_docs/classes/FolderImageLoader.rst.txt create mode 100644 docs/_sources/api/api_docs/classes/FrameworkInfo.rst.txt create mode 100644 docs/_sources/api/api_docs/classes/GradientPTQConfig.rst.txt create mode 100644 docs/_sources/api/api_docs/classes/PruningConfig.rst.txt create mode 100644 docs/_sources/api/api_docs/classes/PruningInfo.rst.txt create mode 100644 docs/_sources/api/api_docs/methods/get_keras_data_generation_config.rst.txt create mode 100644 docs/_sources/api/api_docs/methods/get_keras_gptq_config.rst.txt create mode 100644 docs/_sources/api/api_docs/methods/get_pytorch_data_generation_config.rst.txt create mode 100644 docs/_sources/api/api_docs/methods/get_pytroch_gptq_config.rst.txt create mode 100644 docs/_sources/api/api_docs/methods/get_target_platform_capabilities.rst.txt create mode 100644 docs/_sources/api/api_docs/methods/keras_data_generation_experimental.rst.txt create mode 100644 docs/_sources/api/api_docs/methods/keras_gradient_post_training_quantization_experimental.rst.txt create mode 100644 docs/_sources/api/api_docs/methods/keras_kpi_data_experimental.rst.txt create mode 100644 docs/_sources/api/api_docs/methods/keras_load_quantizad_model.rst.txt create mode 100644 docs/_sources/api/api_docs/methods/keras_post_training_quantization_experimental.rst.txt create mode 100644 docs/_sources/api/api_docs/methods/keras_pruning_experimental.rst.txt create mode 100644 docs/_sources/api/api_docs/methods/keras_quantization_aware_training_finalize.rst.txt create mode 100644 docs/_sources/api/api_docs/methods/keras_quantization_aware_training_init.rst.txt create mode 100644 docs/_sources/api/api_docs/methods/pytorch_data_generation_experimental.rst.txt create mode 100644 docs/_sources/api/api_docs/methods/pytorch_gradient_post_training_quantization_experimental.rst.txt create mode 100644 docs/_sources/api/api_docs/methods/pytorch_kpi_data_experimental.rst.txt create mode 100644 docs/_sources/api/api_docs/methods/pytorch_post_training_quantization_experimental.rst.txt create mode 100644 docs/_sources/api/api_docs/methods/pytorch_pruning_experimental.rst.txt create mode 100644 docs/_sources/api/api_docs/methods/pytorch_quantization_aware_training_finalize.rst.txt create mode 100644 docs/_sources/api/api_docs/methods/pytorch_quantization_aware_training_init.rst.txt create mode 100644 docs/_sources/api/api_docs/methods/set_logger_path.rst.txt create mode 100644 docs/_sources/api/api_docs/modules/core_config.rst.txt create mode 100644 docs/_sources/api/api_docs/modules/debug_config.rst.txt create mode 100644 docs/_sources/api/api_docs/modules/exporter.rst.txt create mode 100644 docs/_sources/api/api_docs/modules/layer_filters.rst.txt create mode 100644 docs/_sources/api/api_docs/modules/network_editor.rst.txt create mode 100644 docs/_sources/api/api_docs/modules/qat_config.rst.txt create mode 100644 docs/_sources/api/api_docs/modules/quantization_config.rst.txt create mode 100644 docs/_sources/api/api_docs/modules/target_platform.rst.txt create mode 100644 docs/_sources/api/api_docs/modules/trainable_infrastructure.rst.txt create mode 100644 docs/_sources/api/api_docs/notes/tpc_note.rst.txt create mode 100644 docs/api/api_docs/classes/PruningConfig.html create mode 100644 docs/api/api_docs/classes/PruningInfo.html create mode 100644 docs/api/api_docs/methods/get_keras_data_generation_config.html create mode 100644 docs/api/api_docs/methods/get_pytorch_data_generation_config.html create mode 100644 docs/api/api_docs/methods/get_pytroch_gptq_config.html create mode 100644 docs/api/api_docs/methods/keras_data_generation_experimental.html create mode 100644 docs/api/api_docs/methods/keras_gradient_post_training_quantization_experimental.html create mode 100644 docs/api/api_docs/methods/keras_kpi_data_experimental.html create mode 100644 docs/api/api_docs/methods/keras_load_quantizad_model.html create mode 100644 docs/api/api_docs/methods/keras_post_training_quantization_experimental.html create mode 100644 docs/api/api_docs/methods/keras_pruning_experimental.html create mode 100644 docs/api/api_docs/methods/keras_quantization_aware_training_finalize.html create mode 100644 docs/api/api_docs/methods/keras_quantization_aware_training_init.html create mode 100644 docs/api/api_docs/methods/pytorch_data_generation_experimental.html create mode 100644 docs/api/api_docs/methods/pytorch_gradient_post_training_quantization_experimental.html create mode 100644 docs/api/api_docs/methods/pytorch_kpi_data_experimental.html create mode 100644 docs/api/api_docs/methods/pytorch_post_training_quantization_experimental.html create mode 100644 docs/api/api_docs/methods/pytorch_pruning_experimental.html create mode 100644 docs/api/api_docs/methods/pytorch_quantization_aware_training_finalize.html create mode 100644 docs/api/api_docs/methods/pytorch_quantization_aware_training_init.html create mode 100644 docs/api/api_docs/modules/core_config.html create mode 100644 docs/api/api_docs/modules/debug_config.html create mode 100644 docs/api/api_docs/modules/exporter.html create mode 100644 docs/api/api_docs/modules/qat_config.html create mode 100644 docs/api/api_docs/modules/trainable_infrastructure.html diff --git a/docs/_sources/api/api_docs/classes/DefaultDict.rst.txt b/docs/_sources/api/api_docs/classes/DefaultDict.rst.txt new file mode 100644 index 000000000..3d62995b3 --- /dev/null +++ b/docs/_sources/api/api_docs/classes/DefaultDict.rst.txt @@ -0,0 +1,11 @@ +:orphan: + +.. _ug-DefaultDict: + + +================================= +DefaultDict Class +================================= + +.. autoclass:: model_compression_toolkit.DefaultDict + :members: diff --git a/docs/_sources/api/api_docs/classes/FolderImageLoader.rst.txt b/docs/_sources/api/api_docs/classes/FolderImageLoader.rst.txt new file mode 100644 index 000000000..6897e353f --- /dev/null +++ b/docs/_sources/api/api_docs/classes/FolderImageLoader.rst.txt @@ -0,0 +1,20 @@ +:orphan: + +.. _ug-FolderImageLoader: + +=============================== +Folder Image Loader API +=============================== + +**The following API can be used to load a folder of images to create a representative dataset for PTQ calibration** + +.. autoclass:: model_compression_toolkit.core.FolderImageLoader + :members: + + +================================= +Default file types to scan +================================= + +.. autodata:: model_compression_toolkit.core.common.data_loader.FILETYPES + diff --git a/docs/_sources/api/api_docs/classes/FrameworkInfo.rst.txt b/docs/_sources/api/api_docs/classes/FrameworkInfo.rst.txt new file mode 100644 index 000000000..2338a7dd4 --- /dev/null +++ b/docs/_sources/api/api_docs/classes/FrameworkInfo.rst.txt @@ -0,0 +1,23 @@ +:orphan: + +.. _ug-FrameworkInfo: + + +================================= +FrameworkInfo Class +================================= + +**The following API can be used to pass MCT framework-related information to use when optimizing the network** + +.. autoclass:: model_compression_toolkit.core.FrameworkInfo + + +| + + +ChannelAxis +========================== +**Enum to select the output channels format in the model:** + +.. autoclass:: model_compression_toolkit.core.ChannelAxis + diff --git a/docs/_sources/api/api_docs/classes/GradientPTQConfig.rst.txt b/docs/_sources/api/api_docs/classes/GradientPTQConfig.rst.txt new file mode 100644 index 000000000..11a79e1c8 --- /dev/null +++ b/docs/_sources/api/api_docs/classes/GradientPTQConfig.rst.txt @@ -0,0 +1,24 @@ +:orphan: + +.. _ug-GradientPTQConfig: + + +================================= +GradientPTQConfig Class +================================= + + +**The following API can be used to create a GradientPTQConfig instance which can be used for post training quantization using knowledge distillation from a teacher (float Keras model) to a student (the quantized Keras model)** + +.. autoclass:: model_compression_toolkit.gptq.GradientPTQConfig + :members: + +================================= +GPTQHessianScoresConfig Class +================================= + + +**The following API can be used to create a GPTQHessianScoresConfig instance which can be used to define necessary parameters for computing Hessian scores for the GPTQ loss function.** + +.. autoclass:: model_compression_toolkit.gptq.GPTQHessianScoresConfig + :members: diff --git a/docs/_sources/api/api_docs/classes/PruningConfig.rst.txt b/docs/_sources/api/api_docs/classes/PruningConfig.rst.txt new file mode 100644 index 000000000..0b57b743a --- /dev/null +++ b/docs/_sources/api/api_docs/classes/PruningConfig.rst.txt @@ -0,0 +1,9 @@ +:orphan: + +.. _ug-PruningConfig: + +================================================ +Pruning Configuration +================================================ + +.. autofunction:: model_compression_toolkit.pruning.PruningConfig \ No newline at end of file diff --git a/docs/_sources/api/api_docs/classes/PruningInfo.rst.txt b/docs/_sources/api/api_docs/classes/PruningInfo.rst.txt new file mode 100644 index 000000000..7bd02336c --- /dev/null +++ b/docs/_sources/api/api_docs/classes/PruningInfo.rst.txt @@ -0,0 +1,10 @@ +:orphan: + +.. _ug-PruningInfo: + +================================================ +Pruning Information +================================================ + +.. autofunction:: model_compression_toolkit.pruning.PruningInfo + diff --git a/docs/_sources/api/api_docs/index.rst.txt b/docs/_sources/api/api_docs/index.rst.txt index 28f078466..3b8eec621 100644 --- a/docs/_sources/api/api_docs/index.rst.txt +++ b/docs/_sources/api/api_docs/index.rst.txt @@ -7,8 +7,6 @@ API Docs ========= -.. note:: This API will be removed in future releases. Please switch to the :ref:`new API` - **Init module for MCT API.** .. code-block:: python @@ -17,30 +15,56 @@ API Docs | + Functions ========= -- :ref:`pytorch_post_training_quantization`: Function to use for post training quantization of Pytorch models. -- :ref:`pytorch_post_training_quantization_mixed_precision`: Function to use for mixed-precision post training quantization of Pytorch models (experimental). -- :ref:`keras_post_training_quantization`: Function to use for post training quantization of Keras models. -- :ref:`keras_post_training_quantization_mixed_precision`: Function to use for mixed-precision post training quantization of Keras models (experimental). -- :ref:`get_keras_gptq_config`: Function to create a GradientPTQConfig instance to use for Keras models when using GPTQ (experimental). -- :ref:`get_target_platform_capabilities`: Function to get a target platform model for Tensorflow and Pytorch. -- :ref:`keras_kpi_data`: Function to compute KPI data that can be used to calculate the desired target KPI for mixed-precision quantization of Keras models. -- :ref:`pytorch_kpi_data`: Function to compute KPI data that can be used to calculate the desired target KPI for mixed-precision quantization of PyTorch models. +- :ref:`pytorch_post_training_quantization`: A function to use for post training quantization of PyTorch models. +- :ref:`keras_post_training_quantization`: A function to use for post training quantization of Keras models. + +- :ref:`keras_gradient_post_training_quantization`: A function to use for gradient-based post training quantization of Keras models. +- :ref:`get_keras_gptq_config`: A function to create a GradientPTQConfig instance to use for Keras models when using GPTQ. + +- :ref:`pytorch_gradient_post_training_quantization`: A function to use for gradient-based post training quantization of Pytorch models. +- :ref:`get_pytorch_gptq_config`: A function to create a GradientPTQConfig instance to use for Pytorch models when using GPTQ. + +- :ref:`keras_quantization_aware_training_init`: A function to use for preparing a model for Quantization Aware Training (Experimental) +- :ref:`keras_quantization_aware_training_finalize`: A function to finalize a model after Quantization Aware Training to a model without QuantizeWrappers (Experimental) + +- :ref:`keras_data_generation_experimental`: A function to generate data for a Keras model (experimental). +- :ref:`get_keras_data_generation_config`: A function to generate a DataGenerationConfig for Tensorflow data generation(experimental). + +- :ref:`pytorch_data_generation_experimental`: A function to generate data for a Pytorch model (experimental). +- :ref:`get_pytorch_data_generation_config`: A function to load a DataGenerationConfig for Pytorch data generation (experimental). + +- :ref:`keras_pruning_experimental`: A function to apply structured pruning for Keras models (experimental). +- :ref:`pytorch_pruning_experimental`: A function to apply structured pruning for Pytorch models (experimental). + +- :ref:`keras_kpi_data`: A function to compute KPI data that can be used to calculate the desired target KPI for mixed-precision quantization of Keras models. +- :ref:`pytorch_kpi_data`: A function to compute KPI data that can be used to calculate the desired target KPI for mixed-precision quantization of PyTorch models. + +- :ref:`get_target_platform_capabilities`: A function to get a target platform model for Tensorflow and Pytorch. +- :ref:`keras_load_quantized_model`: A function to load a quantized keras model. Modules ========= +- :ref:`core_config`: Module to contain configurations of the optimization process. - :ref:`quantization_config`: Module to configure the quantization process. -- :ref:`mixed_precision_quantization_config`: Module to configure the quantization process when using mixed-precision PTQ. -- :ref:`network_editor`: Module to edit your model during the quantization process. +- :ref:`mixed_precision_quantization_config`: Module to configure the quantization process when using mixed-precision PTQ. +- :ref:`debug_config`: Module to configure options for debugging the optimization process. - :ref:`target_platform`: Module to create and model hardware-related settings to optimize the model according to, by the hardware the optimized model will use during inference. +- :ref:`qat_config`: Module to create quantization configuration for Quantization-aware Training. +- :ref:`exporter`: Module that enables to export a quantized model in different serialization formats. +- :ref:`trainable_infrastructure`: Module that contains quantization abstraction and quantizers for hardware-oriented model optimization tools. Classes ========= -- :ref:`GradientPTQConfig`: Class to configure GradientPTQC options for gradient based post training quantization. +- :ref:`GradientPTQConfig`: Class to configure GradientPTQ options for gradient based post training quantization. - :ref:`FolderImageLoader`: Class to use an images directory as a representative dataset. - :ref:`FrameworkInfo`: Class to wrap framework information to be used by MCT when optimizing models. +- :ref:`PruningConfig`: PruningConfig +- :ref:`PruningInfo`: PruningInfo + Indices and tables ================== diff --git a/docs/_sources/api/api_docs/methods/get_keras_data_generation_config.rst.txt b/docs/_sources/api/api_docs/methods/get_keras_data_generation_config.rst.txt new file mode 100644 index 000000000..33efd7cb7 --- /dev/null +++ b/docs/_sources/api/api_docs/methods/get_keras_data_generation_config.rst.txt @@ -0,0 +1,10 @@ +:orphan: + +.. _ug-get_keras_data_generation_config: + + +=========================================== +Get DataGenerationConfig for Keras Models +=========================================== + +.. autofunction:: model_compression_toolkit.data_generation.get_keras_data_generation_config diff --git a/docs/_sources/api/api_docs/methods/get_keras_gptq_config.rst.txt b/docs/_sources/api/api_docs/methods/get_keras_gptq_config.rst.txt new file mode 100644 index 000000000..3f64adea5 --- /dev/null +++ b/docs/_sources/api/api_docs/methods/get_keras_gptq_config.rst.txt @@ -0,0 +1,10 @@ +:orphan: + +.. _ug-get_keras_gptq_config: + + +======================================= +Get GradientPTQConfig for Keras Models +======================================= + +.. autofunction:: model_compression_toolkit.gptq.get_keras_gptq_config diff --git a/docs/_sources/api/api_docs/methods/get_pytorch_data_generation_config.rst.txt b/docs/_sources/api/api_docs/methods/get_pytorch_data_generation_config.rst.txt new file mode 100644 index 000000000..100e669dc --- /dev/null +++ b/docs/_sources/api/api_docs/methods/get_pytorch_data_generation_config.rst.txt @@ -0,0 +1,10 @@ +:orphan: + +.. _ug-get_pytorch_data_generation_config: + + +=========================================== +Get DataGenerationConfig for Pytorch Models +=========================================== + +.. autofunction:: model_compression_toolkit.data_generation.get_pytorch_data_generation_config diff --git a/docs/_sources/api/api_docs/methods/get_pytroch_gptq_config.rst.txt b/docs/_sources/api/api_docs/methods/get_pytroch_gptq_config.rst.txt new file mode 100644 index 000000000..990c72e2c --- /dev/null +++ b/docs/_sources/api/api_docs/methods/get_pytroch_gptq_config.rst.txt @@ -0,0 +1,10 @@ +:orphan: + +.. _ug-get_pytorch_gptq_config: + + +============================================ +Get GradientPTQConfig for Pytorch Models +============================================ + +.. autofunction:: model_compression_toolkit.gptq.get_pytorch_gptq_config diff --git a/docs/_sources/api/api_docs/methods/get_target_platform_capabilities.rst.txt b/docs/_sources/api/api_docs/methods/get_target_platform_capabilities.rst.txt new file mode 100644 index 000000000..cc623b66a --- /dev/null +++ b/docs/_sources/api/api_docs/methods/get_target_platform_capabilities.rst.txt @@ -0,0 +1,13 @@ +:orphan: + +.. _ug-get_target_platform_capabilities: + + +======================================= +Get TargetPlatformCapabilities +======================================= + +.. autofunction:: model_compression_toolkit.get_target_platform_capabilities + + +.. include:: ../notes/tpc_note.rst \ No newline at end of file diff --git a/docs/_sources/api/api_docs/methods/keras_data_generation_experimental.rst.txt b/docs/_sources/api/api_docs/methods/keras_data_generation_experimental.rst.txt new file mode 100644 index 000000000..66dfa8dcf --- /dev/null +++ b/docs/_sources/api/api_docs/methods/keras_data_generation_experimental.rst.txt @@ -0,0 +1,10 @@ +:orphan: + +.. _ug-keras_data_generation_experimental: + + +===================================== +Keras Data Generation +===================================== + +.. autofunction:: model_compression_toolkit.data_generation.keras_data_generation_experimental diff --git a/docs/_sources/api/api_docs/methods/keras_gradient_post_training_quantization_experimental.rst.txt b/docs/_sources/api/api_docs/methods/keras_gradient_post_training_quantization_experimental.rst.txt new file mode 100644 index 000000000..afd2eabf6 --- /dev/null +++ b/docs/_sources/api/api_docs/methods/keras_gradient_post_training_quantization_experimental.rst.txt @@ -0,0 +1,10 @@ +:orphan: + +.. _ug-keras_gradient_post_training_quantization: + + +================================================ +Keras Gradient Based Post Training Quantization +================================================ + +.. autofunction:: model_compression_toolkit.gptq.keras_gradient_post_training_quantization diff --git a/docs/_sources/api/api_docs/methods/keras_kpi_data_experimental.rst.txt b/docs/_sources/api/api_docs/methods/keras_kpi_data_experimental.rst.txt new file mode 100644 index 000000000..a0205814f --- /dev/null +++ b/docs/_sources/api/api_docs/methods/keras_kpi_data_experimental.rst.txt @@ -0,0 +1,10 @@ +:orphan: + +.. _ug-keras_kpi_data: + + +======================================= +Get KPI information for Keras Models +======================================= + +.. autofunction:: model_compression_toolkit.core.keras_kpi_data diff --git a/docs/_sources/api/api_docs/methods/keras_load_quantizad_model.rst.txt b/docs/_sources/api/api_docs/methods/keras_load_quantizad_model.rst.txt new file mode 100644 index 000000000..d3d5d89b0 --- /dev/null +++ b/docs/_sources/api/api_docs/methods/keras_load_quantizad_model.rst.txt @@ -0,0 +1,10 @@ +:orphan: + +.. _ug-keras_load_quantized_model: + + +======================================= +Load Quantized Keras Model +======================================= + +.. autofunction:: model_compression_toolkit.keras_load_quantized_model diff --git a/docs/_sources/api/api_docs/methods/keras_post_training_quantization_experimental.rst.txt b/docs/_sources/api/api_docs/methods/keras_post_training_quantization_experimental.rst.txt new file mode 100644 index 000000000..82a351716 --- /dev/null +++ b/docs/_sources/api/api_docs/methods/keras_post_training_quantization_experimental.rst.txt @@ -0,0 +1,10 @@ +:orphan: + +.. _ug-keras_post_training_quantization: + + +================================= +Keras Post Training Quantization +================================= + +.. autofunction:: model_compression_toolkit.ptq.keras_post_training_quantization diff --git a/docs/_sources/api/api_docs/methods/keras_pruning_experimental.rst.txt b/docs/_sources/api/api_docs/methods/keras_pruning_experimental.rst.txt new file mode 100644 index 000000000..508a11e47 --- /dev/null +++ b/docs/_sources/api/api_docs/methods/keras_pruning_experimental.rst.txt @@ -0,0 +1,12 @@ +:orphan: + +.. _ug-keras_pruning_experimental: + + +================================================ +Keras Structured Pruning +================================================ + +.. autofunction:: model_compression_toolkit.pruning.keras_pruning_experimental + + diff --git a/docs/_sources/api/api_docs/methods/keras_quantization_aware_training_finalize.rst.txt b/docs/_sources/api/api_docs/methods/keras_quantization_aware_training_finalize.rst.txt new file mode 100644 index 000000000..54ff74a3f --- /dev/null +++ b/docs/_sources/api/api_docs/methods/keras_quantization_aware_training_finalize.rst.txt @@ -0,0 +1,10 @@ +:orphan: + +.. _ug-keras_quantization_aware_training_finalize_experimental: + + +================================================ +Keras Quantization Aware Training Model Finalize +================================================ + +.. autofunction:: model_compression_toolkit.qat.keras_quantization_aware_training_finalize_experimental diff --git a/docs/_sources/api/api_docs/methods/keras_quantization_aware_training_init.rst.txt b/docs/_sources/api/api_docs/methods/keras_quantization_aware_training_init.rst.txt new file mode 100644 index 000000000..b89c3367e --- /dev/null +++ b/docs/_sources/api/api_docs/methods/keras_quantization_aware_training_init.rst.txt @@ -0,0 +1,10 @@ +:orphan: + +.. _ug-keras_quantization_aware_training_init_experimental: + + +================================================ +Keras Quantization Aware Training Model Init +================================================ + +.. autofunction:: model_compression_toolkit.qat.keras_quantization_aware_training_init_experimental diff --git a/docs/_sources/api/api_docs/methods/pytorch_data_generation_experimental.rst.txt b/docs/_sources/api/api_docs/methods/pytorch_data_generation_experimental.rst.txt new file mode 100644 index 000000000..0679417ec --- /dev/null +++ b/docs/_sources/api/api_docs/methods/pytorch_data_generation_experimental.rst.txt @@ -0,0 +1,10 @@ +:orphan: + +.. _ug-pytorch_data_generation_experimental: + + +===================================== +Pytorch Data Generation +===================================== + +.. autofunction:: model_compression_toolkit.data_generation.pytorch_data_generation_experimental diff --git a/docs/_sources/api/api_docs/methods/pytorch_gradient_post_training_quantization_experimental.rst.txt b/docs/_sources/api/api_docs/methods/pytorch_gradient_post_training_quantization_experimental.rst.txt new file mode 100644 index 000000000..95dcf10f0 --- /dev/null +++ b/docs/_sources/api/api_docs/methods/pytorch_gradient_post_training_quantization_experimental.rst.txt @@ -0,0 +1,10 @@ +:orphan: + +.. _ug-pytorch_gradient_post_training_quantization: + + +==================================================== +Pytorch Gradient Based Post Training Quantization +==================================================== + +.. autofunction:: model_compression_toolkit.gptq.pytorch_gradient_post_training_quantization diff --git a/docs/_sources/api/api_docs/methods/pytorch_kpi_data_experimental.rst.txt b/docs/_sources/api/api_docs/methods/pytorch_kpi_data_experimental.rst.txt new file mode 100644 index 000000000..86f512eae --- /dev/null +++ b/docs/_sources/api/api_docs/methods/pytorch_kpi_data_experimental.rst.txt @@ -0,0 +1,10 @@ +:orphan: + +.. _ug-pytorch_kpi_data: + + +======================================= +Get KPI information for PyTorch Models +======================================= + +.. autofunction:: model_compression_toolkit.core.pytorch_kpi_data diff --git a/docs/_sources/api/api_docs/methods/pytorch_post_training_quantization_experimental.rst.txt b/docs/_sources/api/api_docs/methods/pytorch_post_training_quantization_experimental.rst.txt new file mode 100644 index 000000000..46feca7ba --- /dev/null +++ b/docs/_sources/api/api_docs/methods/pytorch_post_training_quantization_experimental.rst.txt @@ -0,0 +1,10 @@ +:orphan: + +.. _ug-pytorch_post_training_quantization: + + +===================================== +Pytorch Post Training Quantization +===================================== + +.. autofunction:: model_compression_toolkit.ptq.pytorch_post_training_quantization diff --git a/docs/_sources/api/api_docs/methods/pytorch_pruning_experimental.rst.txt b/docs/_sources/api/api_docs/methods/pytorch_pruning_experimental.rst.txt new file mode 100644 index 000000000..4f050cfe4 --- /dev/null +++ b/docs/_sources/api/api_docs/methods/pytorch_pruning_experimental.rst.txt @@ -0,0 +1,15 @@ +:orphan: + +.. _ug-pytorch_pruning_experimental: + + +================================================ +Pytorch Structured Pruning +================================================ + +.. autofunction:: model_compression_toolkit.pruning.pytorch_pruning_experimental + + + + + diff --git a/docs/_sources/api/api_docs/methods/pytorch_quantization_aware_training_finalize.rst.txt b/docs/_sources/api/api_docs/methods/pytorch_quantization_aware_training_finalize.rst.txt new file mode 100644 index 000000000..e391be8d7 --- /dev/null +++ b/docs/_sources/api/api_docs/methods/pytorch_quantization_aware_training_finalize.rst.txt @@ -0,0 +1,10 @@ +:orphan: + +.. _ug-pytorch_quantization_aware_training_finalize_experimental: + + +=================================================== +PyTorch Quantization Aware Training Model Finalize +=================================================== + +.. autofunction:: model_compression_toolkit.qat.pytorch_quantization_aware_training_finalize_experimental diff --git a/docs/_sources/api/api_docs/methods/pytorch_quantization_aware_training_init.rst.txt b/docs/_sources/api/api_docs/methods/pytorch_quantization_aware_training_init.rst.txt new file mode 100644 index 000000000..4e14c7478 --- /dev/null +++ b/docs/_sources/api/api_docs/methods/pytorch_quantization_aware_training_init.rst.txt @@ -0,0 +1,10 @@ +:orphan: + +.. _ug-pytorch_quantization_aware_training_init_experimental: + + +================================================ +PyTorch Quantization Aware Training Model Init +================================================ + +.. autofunction:: model_compression_toolkit.qat.pytorch_quantization_aware_training_init_experimental diff --git a/docs/_sources/api/api_docs/methods/set_logger_path.rst.txt b/docs/_sources/api/api_docs/methods/set_logger_path.rst.txt new file mode 100644 index 000000000..d449869aa --- /dev/null +++ b/docs/_sources/api/api_docs/methods/set_logger_path.rst.txt @@ -0,0 +1,10 @@ +:orphan: + +.. _ug-set_logger_path: + +================================= +Enable a Logger +================================= + +.. autofunction:: model_compression_toolkit.set_log_folder + diff --git a/docs/_sources/api/api_docs/modules/core_config.rst.txt b/docs/_sources/api/api_docs/modules/core_config.rst.txt new file mode 100644 index 000000000..5601ffabd --- /dev/null +++ b/docs/_sources/api/api_docs/modules/core_config.rst.txt @@ -0,0 +1,16 @@ +:orphan: + +.. _ug-core_config: + + +================================= +core_config Module +================================= + + +CoreConfig +========================== +**Class to configure the optimization process of the model:** + +.. autoclass:: model_compression_toolkit.core.CoreConfig + diff --git a/docs/_sources/api/api_docs/modules/debug_config.rst.txt b/docs/_sources/api/api_docs/modules/debug_config.rst.txt new file mode 100644 index 000000000..16654f433 --- /dev/null +++ b/docs/_sources/api/api_docs/modules/debug_config.rst.txt @@ -0,0 +1,17 @@ +:orphan: + +.. _ug-debug_config: + + +================================= +debug_config Module +================================= + + +DebugConfig +========================== +**Class to configure params for debugging purposes:** + +.. autoclass:: model_compression_toolkit.core.DebugConfig + + diff --git a/docs/_sources/api/api_docs/modules/exporter.rst.txt b/docs/_sources/api/api_docs/modules/exporter.rst.txt new file mode 100644 index 000000000..e441eee22 --- /dev/null +++ b/docs/_sources/api/api_docs/modules/exporter.rst.txt @@ -0,0 +1,353 @@ +:orphan: + +.. _ug-exporter: + + +================================= +exporter Module +================================= + +Allows to export a quantized model in different serialization formats and quantization formats. +For more details about the export formats and options, please refer to the project's GitHub `README file `_. +If you have any questions or issues, please open an issue in this GitHub repository. + + +QuantizationFormat +========================== + +.. autoclass:: model_compression_toolkit.exporter.QuantizationFormat + + +KerasExportSerializationFormat +================================ +Select the serialization format for exporting a quantized Keras model. + +.. autoclass:: model_compression_toolkit.exporter.KerasExportSerializationFormat + + +keras_export_model +========================== +Allows to export a Keras model that was quantized via MCT. + +.. autoclass:: model_compression_toolkit.exporter.keras_export_model + + +Keras Tutorial +========================== +To export a TensorFlow model as a quantized model, it is necessary to first apply quantization +to the model using MCT: + +.. code-block:: shell + + ! pip install -q mct-nightly + +.. code-block:: python + + import numpy as np + from keras.applications import ResNet50 + import model_compression_toolkit as mct + + # Create a model + float_model = ResNet50() + # Quantize the model. + # Notice that here the representative dataset is random for demonstration only. + quantized_exportable_model, _ = mct.ptq.keras_post_training_quantization(float_model, + representative_data_gen=lambda: [np.random.random((1, 224, 224, 3))]) + + + +++++++++++++++++++++++++++++ +keras serialization format +++++++++++++++++++++++++++++ +The model will be exported as a tensorflow `.keras` model where weights and activations are quantized but represented using a float32 dtype. +Two optional quantization formats are available: MCTQ and FAKELY_QUANT. + +++++ +MCTQ +++++ + +By default, `mct.exporter.keras_export_model` will export the quantized Keras model to +a .keras model with custom quantizers from mct_quantizers module. + +.. code-block:: python + + import tempfile + + # Path of exported model + _, keras_file_path = tempfile.mkstemp('.keras') + + # Export a keras model with mctq custom quantizers. + mct.exporter.keras_export_model(model=quantized_exportable_model, + save_model_path=keras_file_path) + +Notice that the model has the same size as the quantized exportable model as weights data types are float. + ++++++++++++++++++++++++++++ +Fakely-Quantized in Keras ++++++++++++++++++++++++++++ + +.. code-block:: python + + # Path of exported model + _, keras_file_path = tempfile.mkstemp('.keras') + + # Use mode KerasExportSerializationFormat.KERAS for a .keras model + # and QuantizationFormat.FAKELY_QUANT for fakely-quantized weights + # and activations. + mct.exporter.keras_export_model(model=quantized_exportable_model, + save_model_path=keras_file_path, + quantization_format=mct.exporter.QuantizationFormat.FAKELY_QUANT) + +Notice that the fakely-quantized model has the same size as the quantized exportable model as weights data types are +float. + +++++++ +TFLite +++++++ + +The tflite serialization format export in two qauntization formats: INT8 and FAKELY_QUANT. + ++++++++++++ +INT8 TFLite ++++++++++++ + +The model will be exported as a tflite model where weights and activations are represented as 8bit integers. + +.. code-block:: python + + import tempfile + + # Path of exported model + _, tflite_file_path = tempfile.mkstemp('.tflite') + + # Use mode KerasExportSerializationFormat.TFLITE for tflite model and quantization_format.INT8. + mct.exporter.keras_export_model(model=quantized_exportable_model, + save_model_path=tflite_file_path, + serialization_format=mct.exporter.KerasExportSerializationFormat.TFLITE, + quantization_format=mct.exporter.QuantizationFormat.INT8) + +Compare size of float and quantized model: + + +.. code-block:: python + + import os + + # Save float model to measure its size + _, float_file_path = tempfile.mkstemp('.keras') + float_model.save(float_file_path) + + print("Float model in Mb:", os.path.getsize(float_file_path) / float(2 ** 20)) + print("Quantized model in Mb:", os.path.getsize(tflite_file_path) / float(2 ** 20)) + print(f'Compression ratio: {os.path.getsize(float_file_path) / os.path.getsize(tflite_file_path)}') + + ++++++++++++++++++++++++ +Fakely-Quantized TFLite ++++++++++++++++++++++++ + +The model will be exported as a tflite model where weights and activations are quantized but represented with a float data type. + ++++++++++++++ +Usage Example ++++++++++++++ + + + +.. code-block:: python + + # Path of exported model + _, tflite_file_path = tempfile.mkstemp('.tflite') + + # Use mode KerasExportSerializationFormat.TFLITE for tflite model and QuantizationFormat.FAKELY_QUANT for fakely-quantized weights + # and activations. + mct.exporter.keras_export_model(model=quantized_exportable_model, + save_model_path=tflite_file_path, + serialization_format=mct.exporter.KerasExportSerializationFormat.TFLITE, + quantization_format=mct.exporter.QuantizationFormat.FAKELY_QUANT) + + + + + +Notice that the fakely-quantized model has the same size as the quantized exportable model as weights data types are +float. + + + + + + +PytorchExportSerializationFormat +================================== +Select the serialization format for exporting a quantized Pytorch model. + +.. autoclass:: model_compression_toolkit.exporter.PytorchExportSerializationFormat + + + + + +pytorch_export_model +========================== +Allows to export a Pytorch model that was quantized via MCT. + +.. autoclass:: model_compression_toolkit.exporter.pytorch_export_model + + + +Pytorch Tutorial +========================== + +To export a Pytorch model as a quantized model, it is necessary to first apply quantization +to the model using MCT: + +.. code-block:: shell + + ! pip install -q mct-nightly + +In order to export your quantized model to ONNX format, and use it for inference, some additional packages are needed. Notice, this is needed only for models exported to ONNX format, so this part can be skipped if this is not planned: + +.. code-block:: shell + + ! pip install -q onnx onnxruntime onnxruntime-extensions + +Now, let's start the export demonstration by quantizing the model using MCT: + +.. code-block:: python + + import model_compression_toolkit as mct + import numpy as np + import torch + from torchvision.models.mobilenetv2 import mobilenet_v2 + + # Create a model + float_model = mobilenet_v2() + + + # Notice that here the representative dataset is random for demonstration only. + def representative_data_gen(): + yield [np.random.random((1, 3, 224, 224))] + + + quantized_exportable_model, _ = mct.ptq.pytorch_post_training_quantization(float_model, representative_data_gen=representative_data_gen) + + + ++++++++++ +ONNX ++++++++++ + +The model will be exported in ONNX format where weights and activations are represented as float. Notice that `onnx` should be installed in order to export the model to an ONNX model. + +There are two optional formats to choose: MCTQ or FAKELY_QUANT. + ++++++++++++++++++++++++++++ +MCTQ Quantization Format ++++++++++++++++++++++++++++ + +By default, `mct.exporter.pytorch_export_model` will export the quantized pytorch model to +an ONNX model with custom quantizers from mct_quantizers module. + + + +.. code-block:: python + + # Path of exported model + onnx_file_path = 'model_format_onnx_mctq.onnx' + + # Export ONNX model with mctq quantizers. + mct.exporter.pytorch_export_model(model=quantized_exportable_model, + save_model_path=onnx_file_path, + repr_dataset=representative_data_gen) + +Notice that the model has the same size as the quantized exportable model as weights data types are float. + ++++++++++++++++++++++++++++ +ONNX opset version ++++++++++++++++++++++++++++ + +By default, the used ONNX opset version is 15, but this can be changed using `onnx_opset_version`: + +.. code-block:: python + + # Export ONNX model with mctq quantizers. + mct.exporter.pytorch_export_model(model=quantized_exportable_model, + save_model_path=onnx_file_path, + repr_dataset=representative_data_gen, + onnx_opset_version=16) + +| + +++++++++++++++++++++++++++++++++++++ +Use exported model for inference +++++++++++++++++++++++++++++++++++++ + +To load and infer using the exported model, which was exported to an ONNX file in MCTQ format, we will use `mct_quantizers` method `get_ort_session_options` during onnxruntime session creation. **Notice**, inference on models that are exported in this format are slowly and suffers from longer latency. However, inference of these models on IMX500 will not suffer from this issue. + +.. code-block:: python + + import mct_quantizers as mctq + import onnxruntime as ort + + sess = ort.InferenceSession(onnx_file_path, + mctq.get_ort_session_options(), + providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) + + _input_data = next(representative_data_gen())[0].astype(np.float32) + _model_output_name = sess.get_outputs()[0].name + _model_input_name = sess.get_inputs()[0].name + + # Run inference + predictions = sess.run([_model_output_name], {_model_input_name: _input_data}) + +| + ++++++++++++++++++++++++++++++ +Fakely-Quantized in Pytorch ++++++++++++++++++++++++++++++ + +To export a fakely-quantized model, use QuantizationFormat.FAKELY_QUANT: + +.. code-block:: python + + import tempfile + + # Path of exported model + _, onnx_file_path = tempfile.mkstemp('.onnx') + + # Use QuantizationFormat.FAKELY_QUANT for fakely-quantized weights and activations. + mct.exporter.pytorch_export_model(model=quantized_exportable_model, + save_model_path=onnx_file_path, + repr_dataset=representative_data_gen, + quantization_format=mct.exporter.QuantizationFormat.FAKELY_QUANT) + + +Notice that the fakely-quantized model has the same size as the quantized +exportable model as weights data types are float. + +++++++++++++++++++ +TorchScript +++++++++++++++++++ + +The model will be exported in TorchScript format where weights and activations are +quantized but represented as float (fakely quant). + +.. code-block:: python + + # Path of exported model + _, torchscript_file_path = tempfile.mkstemp('.pt') + + + # Use mode PytorchExportSerializationFormat.TORCHSCRIPT a torchscript model + # and QuantizationFormat.FAKELY_QUANT for fakely-quantized weights and activations. + mct.exporter.pytorch_export_model(model=quantized_exportable_model, + save_model_path=torchscript_file_path, + repr_dataset=representative_data_gen, + serialization_format=mct.exporter.PytorchExportSerializationFormat.TORCHSCRIPT, + quantization_format=mct.exporter.QuantizationFormat.FAKELY_QUANT) + +Notice that the fakely-quantized model has the same size as the quantized exportable model as weights data types are +float. + + diff --git a/docs/_sources/api/api_docs/modules/layer_filters.rst.txt b/docs/_sources/api/api_docs/modules/layer_filters.rst.txt new file mode 100644 index 000000000..fb6af858b --- /dev/null +++ b/docs/_sources/api/api_docs/modules/layer_filters.rst.txt @@ -0,0 +1,40 @@ +:orphan: + +.. _ug-layer_filters: + + +================================= +Layer Attributes Filters +================================= + +In order to create LayerFilterParams which needed to filter layers by their attributes, +one may use the next filters to check if a layer configuration holds the created LayerFilterParams: + + + +Attribute Filters +================== + +.. autoclass:: model_compression_toolkit.target_platform.Eq + +| + +.. autoclass:: model_compression_toolkit.target_platform.NotEq + +| + +.. autoclass:: model_compression_toolkit.target_platform.Greater + +| + + +.. autoclass:: model_compression_toolkit.target_platform.GreaterEq + +| + + +.. autoclass:: model_compression_toolkit.target_platform.Smaller + +| + +.. autoclass:: model_compression_toolkit.target_platform.SmallerEq diff --git a/docs/_sources/api/api_docs/modules/mixed_precision_quantization_config.rst.txt b/docs/_sources/api/api_docs/modules/mixed_precision_quantization_config.rst.txt index e48b66381..f3d6aba8c 100644 --- a/docs/_sources/api/api_docs/modules/mixed_precision_quantization_config.rst.txt +++ b/docs/_sources/api/api_docs/modules/mixed_precision_quantization_config.rst.txt @@ -1,27 +1,26 @@ :orphan: -.. _ug-mixed_precision_quantization_config: +.. _ug-mixed_precision_quantization_config_v2: -=========================================== +=========================================================== mixed_precision_quantization_config Module -=========================================== +=========================================================== KPI ================================ **Object to configure resources to use when searching for a mixed-precision configuration for a model:** -.. autoclass:: model_compression_toolkit.KPI - :noindex: +.. autoclass:: model_compression_toolkit.core.KPI | -MixedPrecisionQuantizationConfig -================================= +MixedPrecisionQuantizationConfigV2 +=================================== **Class to configure the quantization process of the model when quantizing in mixed-precision:** -.. autoclass:: model_compression_toolkit.MixedPrecisionQuantizationConfig +.. autoclass:: model_compression_toolkit.core.MixedPrecisionQuantizationConfig diff --git a/docs/_sources/api/api_docs/modules/network_editor.rst.txt b/docs/_sources/api/api_docs/modules/network_editor.rst.txt new file mode 100644 index 000000000..8df62a390 --- /dev/null +++ b/docs/_sources/api/api_docs/modules/network_editor.rst.txt @@ -0,0 +1,62 @@ +:orphan: + +.. _ug-network_editor: + + +================================= +network_editor Module +================================= + +**The model can be edited by a list of EditRules to apply on nodes in a graph that represents the model during the model quantization. Each EditRule is a tuple of a filter and an action, where we apply the action on each node the filter matches** + +EditRule +========== +.. autoclass:: model_compression_toolkit.core.network_editor.EditRule + +Filters +========== + +.. autoclass:: model_compression_toolkit.core.network_editor.NodeTypeFilter + +| + +.. autoclass:: model_compression_toolkit.core.network_editor.NodeNameFilter + +| + +.. autoclass:: model_compression_toolkit.core.network_editor.NodeNameScopeFilter + + +Actions +========== + +.. autoclass:: model_compression_toolkit.core.network_editor.ChangeFinalWeightsQuantConfigAttr + +| + +.. autoclass:: model_compression_toolkit.core.network_editor.ChangeCandidatesWeightsQuantConfigAttr + +| + +.. autoclass:: model_compression_toolkit.core.network_editor.ChangeFinalActivationQuantConfigAttr + +| + +.. autoclass:: model_compression_toolkit.core.network_editor.ChangeCandidatesActivationQuantConfigAttr + +| + +.. autoclass:: model_compression_toolkit.core.network_editor.ChangeQuantizationParamFunction + +| + +.. autoclass:: model_compression_toolkit.core.network_editor.ChangeFinalWeightsQuantizationMethod + +| + +.. autoclass:: model_compression_toolkit.core.network_editor.ChangeCandidatesWeightsQuantizationMethod + +| + +.. autoclass:: model_compression_toolkit.core.network_editor.ChangeCandidatesActivationQuantizationMethod + diff --git a/docs/_sources/api/api_docs/modules/qat_config.rst.txt b/docs/_sources/api/api_docs/modules/qat_config.rst.txt new file mode 100644 index 000000000..9583aee88 --- /dev/null +++ b/docs/_sources/api/api_docs/modules/qat_config.rst.txt @@ -0,0 +1,26 @@ +:orphan: + +.. _ug-qat_config: + + +=========================================================== +qat_config Module +=========================================================== + + +TrainingMethod +================================ +**Select a QAT training method:** + +.. autoclass:: model_compression_toolkit.qat.TrainingMethod + + +| + + +QATConfig +=================================== +**Class to configure the quantization process of the model when quantizing using Quantization-aware Training (QAT):** + +.. autoclass:: model_compression_toolkit.qat.QATConfig + diff --git a/docs/_sources/api/api_docs/modules/quantization_config.rst.txt b/docs/_sources/api/api_docs/modules/quantization_config.rst.txt new file mode 100644 index 000000000..7997f6f1b --- /dev/null +++ b/docs/_sources/api/api_docs/modules/quantization_config.rst.txt @@ -0,0 +1,25 @@ +:orphan: + +.. _ug-quantization_config: + + +================================= +quantization_config Module +================================= + +QuantizationErrorMethod +========================== +**Enum to select a method for quantization parameters' selection:** + +.. autoclass:: model_compression_toolkit.core.QuantizationErrorMethod + + +| + + +QuantizationConfig +========================== +**Class to configure the quantization process of the model:** + +.. autoclass:: model_compression_toolkit.core.QuantizationConfig + diff --git a/docs/_sources/api/api_docs/modules/target_platform.rst.txt b/docs/_sources/api/api_docs/modules/target_platform.rst.txt new file mode 100644 index 000000000..a4e4831f5 --- /dev/null +++ b/docs/_sources/api/api_docs/modules/target_platform.rst.txt @@ -0,0 +1,357 @@ +:orphan: + +.. _ug-target_platform: + + +================================= +target_platform Module +================================= + +MCT can be configured to quantize and optimize models for different hardware settings. +For example, when using qnnpack backend for Pytorch model inference, Pytorch `quantization +configuration `_ +uses `per-tensor weights quantization `_ +for Conv2d, while when using tflite modeling, Tensorflow uses `per-channel weights quantization for +Conv2D `_. + +This can be addressed in MCT by using the target_platform module, that can configure different +parameters that are hardware-related, and the optimization process will use this to optimize the model accordingly. +Models for IMX500, TFLite and qnnpack can be observed `here `_, and can be used using :ref:`get_target_platform_capabilities function`. + +| + +.. include:: ../notes/tpc_note.rst + +| + +The object MCT should get called TargetPlatformCapabilities (or shortly TPC). +This diagram demonstrates the main components: + +.. image:: ../../../../images/tpc.jpg + :scale: 80% + +Now, we will explain about each component with examples. + +The first part is configuring the quantization method for both wights and activations of an operator. +Several methods can be used using QuantizationMethod API: + + +QuantizationMethod +========================== +Select a method to use during quantization: + +.. autoclass:: model_compression_toolkit.target_platform.QuantizationMethod + + +| + + +Using a quantization method (or methods, if the weights and activations of an operator are quantized differently) +Quantization configuration of different operators can be created using OpQuantizationConfig: + + +OpQuantizationConfig +====================== +.. autoclass:: model_compression_toolkit.target_platform.OpQuantizationConfig + +| + +If, for example, we would like to quantize an operator's weights with 8 bits (and per-channel), its activations +with 8 bits, and the quantization thresholds (for both weights and activations) must be power-of-two, +we can create the OpQuantizationConfig: + +.. code-block:: python + + op_qc_8bit = OpQuantizationConfig( + activation_quantization_method=QuantizationMethod.POWER_OF_TWO, + weights_quantization_method=QuantizationMethod.POWER_OF_TWO, + activation_n_bits=8, + weights_n_bits=8, + weights_per_channel_threshold=True, + enable_weights_quantization=True, + enable_activation_quantization=True + ) + +| + +We will demonstrate later how to attach this OpQuantizationConfig to a specific operator. + +If an operator can be quantized in different ways (the simplest example is mixed-precision quantization), +one can create a QuantizationConfigOptions instance to represent a set of possible quantization +configuration options for an operator: + + +QuantizationConfigOptions +============================ +.. autoclass:: model_compression_toolkit.target_platform.QuantizationConfigOptions + +If a QuantizationConfigOptions is created with more than +one OpQuantizationConfig option, a base_config must be passed to the QuantizationConfigOptions +in order to support the model when MCT optimizes the model in no mixed-precision manner. + +For example, we would like to quantize an operator's weights with either 2, 4 or 8 bits (and in +case we would like to use MCT non mixed-precision functions, we would like to quantize the operator +using 8 bits). For this we can create new OpQuantizationConfigs based on previously created +OpQuantizationConfigs, and gather them under a single QuantizationConfigOptions instance: + +.. code-block:: python + + # To quantize a model using mixed-precision, create a QuantizationConfigOptions with more + # than one QuantizationConfig. + # In this example, we aim to quantize some operations' weights using 2, 4 or 8 bits. + op_qc_4bit = op_qc_8bit.clone_and_edit(weights_n_bits=4) + op_qc_2bit = op_qc_8bit.clone_and_edit(weights_n_bits=2) + mixed_precision_configuration_options = QuantizationConfigOptions([op_qc_8bit, + op_qc_4bit, + op_qc_2bit], + base_config=op_qc_8bit) + +| + + + +The main class to define the hardware-related properties, is called TargetPlatformModel. Using a TargetPlatformModel +object we can create operator sets, configure how these operators sets will be quantized, +group operators by common properties and configure patterns of operators to fuse: + + +TargetPlatformModel +======================= +.. autoclass:: model_compression_toolkit.target_platform.TargetPlatformModel + + +A default QuantizationConfigOptions (containing a single OpQuantizationConfig) must be passed +when instancing a TargetPlatformModel object. It comes to use when MCT needs to optimize +an operator that is not defined explicitly in the TargetPlatformModel. In this case, the OpQuantizationConfig +in the default QuantizationConfigOptions will guide MCT how this operator should be optimized. For example: + +.. code-block:: python + + # Create a QuantizationConfigOptions with a single OpQuantizationConfig to use as + # a default configuration options. + default_configuration_options = QuantizationConfigOptions([op_qc_8bit]) + + # Create a TargetPlatformModel and set its default quantization config. + # This default configuration will be used for all operations + # unless specified otherwise: + my_model = TargetPlatformModel(default_configuration_options, name='my_model') + +| + +Then, we can start defining the model by creating OperatorsSets: + +OperatorsSet +================ +.. autoclass:: model_compression_toolkit.target_platform.OperatorsSet + +An OperatorsSet gathers group of operators that are labeled by a unique name and can be attached to a +QuantizationConfigOptions (so MCT will use these options to optimize operators from this set). +For example, if FullyConnected can be quantized using 2, 4, or 8 bits, we can create the next +OperatorsSet using the previously created mixed_precision_configuration_options: + +.. code-block:: python + + # Define operators set named "FullyConnected" and attach + # mixed_precision_configuration_options as its QuantizationConfigOptions: + fc_opset = OperatorsSet("FullyConnected", mixed_precision_configuration_options) + +| + +The QuantizationConfigOptions is optional. An OperatorsSet can be also created +without any attached QuantizationConfigOptions. Operators in this kind of OperatorsSets +are attached implicitly to the default QuantizationConfigOptions of the TargetPlatformModel +they are part of: + +.. code-block:: python + + # Define operators set named "Relu" and do not attach + # it any QuantizationConfigOptions: + relu_opset = OperatorsSet("Relu") + +| + +Another component of a TargetPlatformModel is Fusing. Fusing defines a list +of operators that should be combined and treated as a single operator, hence no +quantization is applied between them when they appear in a model: + + +Fusing +============== +.. autoclass:: model_compression_toolkit.target_platform.Fusing + +For example, to fuse the previously created two OperatorsSets fc_opset and +relu_opset we can create the next Fusing: + +.. code-block:: python + + # Combine multiple operators into a single operator to avoid quantization between + # them. To do this we define fusing patterns using the OperatorsSets that were created. + Fusing([fc_opset, relu_opset]) + +| + +Notice that the list of opsets must contain at least two OperatorSets. +Also notice that sublist of the OperatorsSet list that is passed to the Fusing, +will not be fused, unless another Fusing is created for that. For example, +if a model is defined to fuse three sequenced operators [FullyConnected, Relu, Add]: + +.. code-block:: python + + # In addition to the OperatorsSets we created, create new OperatorsSets for "add" ops: + add_opset = OperatorsSet("Add") + + # Fuse sequences of operators: + Fusing([fc_opset, relu_opset, add_opset]) + +| + +and the pre-trained model that MCT optimizes has a sequence of [fc_opset, relu_opset] +where the next operator is not an add_opset, the two operators [fc_opset, relu_opset] +will not be fused as the only defined fusing pattern is of the three OperatorsSets +[fc_opset, relu_opset, add_opset]. In order to fuse sequences of [fc_opset, relu_opset] +as well, a new Fusing should be defined: + +.. code-block:: python + + # Fuse sequences of the three listed operators: + Fusing([fc_opset, relu_opset, add_opset]) + + # In addition, fuse sequences of the two listed operators: + Fusing([fc_opset, relu_opset]) + +Now, if MCT encounters a sequence of [fc_opset, relu_opset] they will be fused regardless the following operator. +Sequences of [fc_opset, relu_opset, add_opset] will be fused as well, and +the new Fusing of [fc_opset, relu_opset] will not affect them (but will affect patterns +of [fc_opset, relu_opset], of course). + +When multiple operators should be fused in a similar way, an OperatorSetConcat can be used: + +OperatorSetConcat +==================== +.. autoclass:: model_compression_toolkit.target_platform.OperatorSetConcat + + +OperatorSetConcat gathers multiple OperatorsSet and can be specified in a fusing operators list. +If, for example, we want to fuse the patterns [fc_opset, add_opset] and [fc_opset, relu_opset], +we can either create two separate Fusing objects as was demonstrated above, or an OperatorSetConcat +can be used as follows: + +.. code-block:: python + + # Concatenate two OpseratorsSet objects to be treated similarly when fused: + activations_after_fc_to_fuse = OperatorSetConcat(relu_opset, add_opset) + + # Create a fusing pattern using OperatorSetConcat. This is equivalent to define two + # separate fusing patterns of: [fc_opset, relu_opset], [fc_opset, add_opset] + Fusing([fc_opset, activations_after_fc_to_fuse]) + +| + + +TargetPlatformModel Code Example +=================================== + +.. literalinclude:: ../../../../../model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tp_model.py + :language: python + :lines: 15-156 + +| + +After modeling the hardware MCT should optimize according to, this hardware model needs to be +attached to a specific framework information in order to associate the operators that are defined in +hardware model to layers in different representations of a framework. +For example, if we created an OperatorsSet for "Add" operator, in Tensorflow this operator +can be used by two different layers: keras.layers.Add, tf.add. +To attach a list of framework's layers to an OperatorsSet that is defined in the TargetPlatformModel, +an OperationsSetToLayers can be used: + +OperationsSetToLayers +========================= +.. autoclass:: model_compression_toolkit.target_platform.OperationsSetToLayers + +Using OperationsSetToLayers we can associate an OperatorsSet label to a list of framework's layers: + +.. code-block:: python + + import tensorflow as tf + from keras.layers import Add + OperationsSetToLayers("Add", [tf.add, Add]) + +| + +This way, when MCT quantizes one of the layers tf.add or keras.layers.Add, it uses the QuantizationConfigOptions +that is associated with the OperatorsSet that was labeled "Add" to optimize the layer. + +There are cases where an operator can be represented using a layer but it must have a specific configuration. + +For example, in case the optimization should be different for bounded ReLU and unbounded ReLU, two OperatorSets +can be created, and the layers that will be attached to each OperatorSet will have to be filtered. +For that, LayerFilterParams can be used: + +LayerFilterParams +========================= +.. autoclass:: model_compression_toolkit.target_platform.LayerFilterParams + + +LayerFilterParams wraps a layer with several conditions and key-value pairs +and can check whether a layer matches the layer, conditions and key-value pairs. +If for example a distinguish need to be made between bounded-ReLU and unbounded-ReLU in Tensorflow +the next LayerFilterParams can be created: + +.. code-block:: python + + from keras.layers import ReLU + + # Create a LayerFilterParams that matches ReLU layers that have an attribute 'max_value' + # and it is None + unbounded_relu_filter = LayerFilterParams(ReLU, max_value=None) + + # Create a LayerFilterParams that matches ReLU layers that have an attribute 'max_value' + # and it is not None + unbounded_relu_filter = LayerFilterParams(ReLU, NotEq('max_value', None)) + +| + +In this example, we used NotEq which is a way to filter layers with attributes that has +a value different than the value that was passed (in this case - None). More filters can be created +and passed to the LayerFilterParams in order to create more detailed filter. +More filters and usage examples are detailed :ref:`here`. + +These LayerFilterParams instances can now be attached to OperatorsSets in the TargetPlatformModel +using OperationsSetToLayers just like any other layers: + +.. code-block:: python + + import tensorflow as tf + from keras.layers import ReLU, Activation + + OperationsSetToLayers("ReLU", [tf.nn.relu, + tf.nn.relu6, + LayerFilterParams(ReLU, negative_slope=0.0), + LayerFilterParams(Activation, activation="relu")]) + +| + +The mapping from OperatorsSets to layers' lists are part of a class called TargetPlatformCapabilities +which attaches the layers representations to OperatorsSets in a TargetPlatformModel instance: + +TargetPlatformCapabilities +============================= +.. autoclass:: model_compression_toolkit.target_platform.TargetPlatformCapabilities + + +To create a TargetPlatformCapabilities, a TargetPlatformModel instance should be passed upon the +TargetPlatformCapabilities initialization. Then, OperationsSetToLayers can be created and attached +to the TargetPlatformCapabilities like in the following example: + + +TargetPlatformCapabilities Code Example +=========================================== + +.. literalinclude:: ../../../../../model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tp_model.py + :language: python + :lines: 15-86 + + + + diff --git a/docs/_sources/api/api_docs/modules/trainable_infrastructure.rst.txt b/docs/_sources/api/api_docs/modules/trainable_infrastructure.rst.txt new file mode 100644 index 000000000..c514a3307 --- /dev/null +++ b/docs/_sources/api/api_docs/modules/trainable_infrastructure.rst.txt @@ -0,0 +1,83 @@ +:orphan: + +.. _ug-trainable_infrastructure: + + +================================= +trainable_infrastructure Module +================================= + +The trainable infrastructure is a module containing quantization abstraction and quantizers for hardware-oriented model optimization tools. +It provides the required abstraction for trainable quantization methods such as quantization-aware training. +It utilizes the Inferable Quantizers Infrastructure provided by the `MCT Quantizers `_ package, which proposes the required abstraction for emulating inference-time quantization. + +When using a trainable quantizer, each layer with quantized weights is wrapped with a "Quantization Wrapper" object, +and each activation quantizer is being stored in an "Activation Quantization Holder" object. +Both components are provided by the MCT Quantizers package. + +The quantizers in this module are built upon the "Inferable Quantizer" abstraction (from MCT Quantizers), +and define the "Trainable Quantizer" framework, +which contains learnable quantization parameters that can be optimized during training. + +Now, we will explain how a trainable quantizer is built and used. +We start by explaining the basic building block of a trainable quantizer, and then explain how to initialize it using a configuration object. + +BaseKerasTrainableQuantizer +============================== +This class is a base class for trainable Keras quantizers which validates provided quantization config and defines an abstract function which any quantizer needs to implement. +It adds to the base quantizer a get_config and from_config functions to enable loading and saving the keras model. + +.. autoclass:: model_compression_toolkit.trainable_infrastructure.BaseKerasTrainableQuantizer + +BasePytorchTrainableQuantizer +============================== +This class is a base class for trainable Pytorch quantizers which validates provided quantization config and defines an abstract function which any quantizer needs to implement. +It adds to the base quantizer a get_config and from_config functions to enable loading and saving the keras model. + +.. autoclass:: model_compression_toolkit.trainable_infrastructure.BasePytorchTrainableQuantizer + +TrainableQuantizerWeightsConfig +================================= +This configuration object contains the necessary attributes for configuring a weights trainable quantizer. + +.. autoclass:: model_compression_toolkit.trainable_infrastructure.TrainableQuantizerWeightsConfig + +For example, we can set a trainable weights quantizer with the following configuration: + +.. code-block:: python + + from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod + from model_compression_toolkit.constants import THRESHOLD, MIN_THRESHOLD + + TrainableQuantizerWeightsConfig(weights_quantization_method=QuantizationMethod.SYMMETRIC, + weights_n_bits=8, + weights_quantization_params={THRESHOLD: 2.0}, + enable_weights_quantization=True, + weights_channels_axis=3, + weights_per_channel_threshold=True, + min_threshold=MIN_THRESHOLD) + + +| + +TrainableQuantizerActivationConfig +==================================== +This configuration object contains the necessary attributes for configuring an activation trainable quantizer. + +.. autoclass:: model_compression_toolkit.trainable_infrastructure.TrainableQuantizerActivationConfig + +For example, we can set a trainable activation quantizer with the following configuration: + +.. code-block:: python + + from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod + from model_compression_toolkit.constants import THRESHOLD, MIN_THRESHOLD + + TrainableQuantizerActivationConfig(activation_quantization_method=QuantizationMethod.UNIFORM, + activation_n_bits=8, + activation_quantization_params=={THRESHOLD: 2.0}, + enable_activation_quantization=True, + min_threshold=MIN_THRESHOLD) + + +| diff --git a/docs/_sources/api/api_docs/notes/tpc_note.rst.txt b/docs/_sources/api/api_docs/notes/tpc_note.rst.txt new file mode 100644 index 000000000..c7352b8cb --- /dev/null +++ b/docs/_sources/api/api_docs/notes/tpc_note.rst.txt @@ -0,0 +1,8 @@ + +.. note:: + For now, some fields of :class:`~model_compression_toolkit.target_platform.OpQuantizationConfig` are ignored during + the optimization process (currently, the quantizer type, number of bits, and quantization enable/disable information + are in use). + + - MCT will use more information from :class:`~model_compression_toolkit.target_platform.OpQuantizationConfig`, in the future. + diff --git a/docs/_sources/index.rst.txt b/docs/_sources/index.rst.txt index bd91fe81f..2c4713162 100644 --- a/docs/_sources/index.rst.txt +++ b/docs/_sources/index.rst.txt @@ -41,7 +41,7 @@ A nightly version is also available (unstable): For using with Tensorflow please install the packages: `tensorflow `_ -`tensorflow-model-optimization `_ + For using with Pytorch please install the package: `torch `_ @@ -52,21 +52,21 @@ Supported Features Keras: -* :ref:`Post Training Quantization` [1] -* :ref:`Gradient based post training using knowledge distillation` -* :ref:`Mixed-precision post training quantization` -* :ref:`Init model for Quantization Aware Training` (Experimental) -* :ref:`Finalize model after Quantization Aware Training` (Experimental) -* :ref:`Structured Pruning` (Experimental) +* :ref:`Post Training Quantization` [1] +* :ref:`Gradient based post training using knowledge distillation` +* :ref:`Init model for Quantization Aware Training` (Experimental) +* :ref:`Finalize model after Quantization Aware Training` (Experimental) +* :ref:`Structured pruning` (Experimental) +* :ref:`Data generation` (Experimental) Pytorch: -* :ref:`Post Training Quantization` [1] -* :ref:`Gradient based post training using knowledge distillation` -* :ref:`Mixed-precision post training quantization` -* :ref:`Init model for Quantization Aware Training` (Experimental) -* :ref:`Finalize model after Quantization Aware Training` (Experimental) - +* :ref:`Post Training Quantization` [1] +* :ref:`Gradient based post training using knowledge distillation` +* :ref:`Init model for Quantization Aware Training` (Experimental) +* :ref:`Finalize model after Quantization Aware Training` (Experimental) +* :ref:`Structured pruning` (Experimental) +* :ref:`Data generation` (Experimental) Visualization: @@ -101,7 +101,7 @@ Please visit the MCT API documentation here :titlesonly: :maxdepth: 1 - API Documentation<../api/experimental_api_docs/index> + API Documentation<../api/api_docs/index> Technical Constraints ========================= diff --git a/docs/api/api_docs/classes/DefaultDict.html b/docs/api/api_docs/classes/DefaultDict.html index d99a59dbb..602dea4e2 100644 --- a/docs/api/api_docs/classes/DefaultDict.html +++ b/docs/api/api_docs/classes/DefaultDict.html @@ -2,12 +2,12 @@ - + - + - DefaultDict Class — MCT Documentation: ver 1.4.0 + DefaultDict Class — MCT Documentation: ver 1.11.0 @@ -15,6 +15,7 @@ + @@ -30,7 +31,7 @@

Navigation

  • index
  • - + @@ -41,18 +42,18 @@

    Navigation

    -

    DefaultDict Class

    +

    DefaultDict Class

    -class model_compression_toolkit.DefaultDict(known_dict, default_factory=None)
    +class model_compression_toolkit.DefaultDict(known_dict=None, default_value=None)

    Default dictionary. It wraps a dictionary given at initialization and return its values when requested. If the requested key is not presented at initial dictionary, -it returns the returned value a default factory (that is passed at initialization) generates.

    +it returns the returned value a default value (that is passed at initialization) generates.

    -
    Parameters
    +
    Parameters:
      -
    • known_dict (Dict[Any, Any]) – Dictionary to wrap.

    • -
    • default_factory (Callable) – Callable to get default values when requested key is not in known_dict.

    • +
    • known_dict – Dictionary to wrap. If None is provided, initializes an empty dictionary.

    • +
    • default_value – default value when requested key is not in known_dict.

    @@ -62,19 +63,26 @@

    Navigation

    Get the value of the inner dictionary by the given key, If key is not in dictionary, it uses the default_factory to return a default value.

    -
    Parameters
    -

    key (Any) – Key to use in inner dictionary.

    +
    Parameters:
    +

    key – Key to use in inner dictionary.

    -
    Returns
    +
    Returns:

    Value of the inner dictionary by the given key, or a default value if not exist. If default_factory was not passed at initialization, it returns None.

    -
    Return type
    +
    Return type:

    Any

    +
    +
    +keys()
    +

    Get keys of known_dict +Returns: keys of known_dict

    +
    +
    @@ -95,7 +103,7 @@

    Quick search

    - +
    @@ -106,13 +114,13 @@

    Navigation

  • index
  • - + \ No newline at end of file diff --git a/docs/api/api_docs/classes/FolderImageLoader.html b/docs/api/api_docs/classes/FolderImageLoader.html index 50ff1aac2..4da36875d 100644 --- a/docs/api/api_docs/classes/FolderImageLoader.html +++ b/docs/api/api_docs/classes/FolderImageLoader.html @@ -2,12 +2,12 @@ - + - + - Folder Image Loader API — MCT Documentation: ver 1.4.0 + Folder Image Loader API — MCT Documentation: ver 1.11.0 @@ -15,6 +15,7 @@ + @@ -30,7 +31,7 @@

    Navigation

  • index
  • - + @@ -41,21 +42,21 @@

    Navigation

    -

    Folder Image Loader API

    +

    Folder Image Loader API

    The following API can be used to load a folder of images to create a representative dataset for PTQ calibration

    -
    -class model_compression_toolkit.FolderImageLoader(folder, preprocessing, batch_size, file_types=FILETYPES)
    +
    +class model_compression_toolkit.core.FolderImageLoader(folder, preprocessing, batch_size, file_types=FILETYPES)

    Class for images loading, processing and retrieving.

    Initialize a FolderImageLoader object.

    -
    Parameters
    +
    Parameters:
      -
    • folder (str) – Path of folder with images to load. The path has to exist, and has to contain at

    • +
    • folder – Path of folder with images to load. The path has to exist, and has to contain at

    • image. (least one) –

    • -
    • preprocessing (List[Callable]) – List of functions to use when processing the images before retrieving them.

    • -
    • batch_size (int) – Number of images to retrieve each sample.

    • -
    • file_types (List[str]) – Files types to scan in the folder. Default list is FILETYPES

    • +
    • preprocessing – List of functions to use when processing the images before retrieving them.

    • +
    • batch_size – Number of images to retrieve each sample.

    • +
    • file_types – Files types to scan in the folder. Default list is FILETYPES

    @@ -74,8 +75,8 @@

    Navigation

    -
    -sample()
    +
    +sample()

    Returns: A sample of batch_size images from the folder the FolderImageLoader scanned.

    @@ -83,10 +84,10 @@

    Navigation

    -

    Default file types to scan

    +

    Default file types to scan

    -
    -model_compression_toolkit.common.data_loader.FILETYPES = ['jpeg', 'jpg', 'bmp', 'png']
    +
    +model_compression_toolkit.core.common.data_loader.FILETYPES = ['jpeg', 'jpg', 'bmp', 'png']
    @@ -115,7 +116,7 @@

    Quick search

    - +
    @@ -126,13 +127,13 @@

    Navigation

  • index
  • - + \ No newline at end of file diff --git a/docs/api/api_docs/classes/FrameworkInfo.html b/docs/api/api_docs/classes/FrameworkInfo.html index fc5e39ec1..1f4524d44 100644 --- a/docs/api/api_docs/classes/FrameworkInfo.html +++ b/docs/api/api_docs/classes/FrameworkInfo.html @@ -2,12 +2,12 @@ - + - + - FrameworkInfo Class — MCT Documentation: ver 1.4.0 + FrameworkInfo Class — MCT Documentation: ver 1.11.0 @@ -15,6 +15,7 @@ + @@ -30,7 +31,7 @@

    Navigation

  • index
  • - + @@ -41,11 +42,11 @@

    Navigation

    -

    FrameworkInfo Class

    +

    FrameworkInfo Class

    The following API can be used to pass MCT framework-related information to use when optimizing the network

    -
    -class model_compression_toolkit.FrameworkInfo(activation_quantizer_mapping, weights_quantizer_mapping, kernel_channels_mapping, activation_min_max_mapping, layer_min_max_mapping, kernel_ops_attributes_mapping, output_channel_index)
    +
    +class model_compression_toolkit.core.FrameworkInfo(activation_quantizer_mapping, kernel_channels_mapping, activation_min_max_mapping, layer_min_max_mapping, kernel_ops_attributes_mapping, out_channel_axis_mapping)

    A class to wrap all information about a specific framework the library needs to quantize a model. Specifically, FrameworkInfo holds lists of layers by how they should be quantized, and multiple mappings such as layer to it kernel channels indices, and a layer to its min/max values, etc. @@ -54,15 +55,14 @@

    Navigation

    activation_ops: Layers that their outputs should get quantized (e.g., Add, ReLU, etc.) no_quantization_ops:Layers that should not get quantized (e.g., Reshape, Transpose, etc.)

    -
    Parameters
    +
    Parameters:
    • activation_quantizer_mapping (Dict[QuantizationMethod, Callable]) – A dictionary mapping from QuantizationMethod to a quantization function.

    • -
    • weights_quantizer_mapping (Dict[QuantizationMethod, Callable]) – A dictionary mapping from QuantizationMethod to a quantization function.

    • kernel_channels_mapping (DefaultDict) – Dictionary from a layer to a tuple of its kernel in/out channels indices.

    • activation_min_max_mapping (Dict[str, tuple]) – Dictionary from an activation function to its min/max output values.

    • layer_min_max_mapping (Dict[Any, tuple]) – Dictionary from a layer to its min/max output values.

    • kernel_ops_attributes_mapping (DefaultDict) – Dictionary from a framework operator to a list of its weights attirbutes to quantize.

    • -
    • output_channel_index (ChannelAxis) – Index of output channels of the model’s layers (for computing statistics per-channel).

    • +
    • out_channel_axis_mapping (DefaultDict) – Dictionary of output channels of the model’s layers (for computing statistics per-channel).

    @@ -94,11 +94,11 @@

    Navigation


    -

    ChannelAxis

    +

    ChannelAxis

    Enum to select the output channels format in the model:

    -
    -class model_compression_toolkit.ChannelAxis(value)
    +
    +class model_compression_toolkit.core.ChannelAxis(value)

    Index of output channels axis:

    NHWC - Output channels index is last.

    NCHW - Output channels index is 1.

    @@ -133,7 +133,7 @@

    Quick search

    - +
    @@ -144,13 +144,13 @@

    Navigation

  • index
  • - + \ No newline at end of file diff --git a/docs/api/api_docs/classes/GradientPTQConfig.html b/docs/api/api_docs/classes/GradientPTQConfig.html index 37909e9f3..8c13eb544 100644 --- a/docs/api/api_docs/classes/GradientPTQConfig.html +++ b/docs/api/api_docs/classes/GradientPTQConfig.html @@ -2,12 +2,12 @@ - + - + - GradientPTQConfig Class — MCT Documentation: ver 1.4.0 + GradientPTQConfig Class — MCT Documentation: ver 1.11.0 @@ -15,6 +15,7 @@ + @@ -30,7 +31,7 @@

    Navigation

  • index
  • - + @@ -41,24 +42,52 @@

    Navigation

    -

    GradientPTQConfig Class

    +

    GradientPTQConfig Class

    The following API can be used to create a GradientPTQConfig instance which can be used for post training quantization using knowledge distillation from a teacher (float Keras model) to a student (the quantized Keras model)

    -
    -class model_compression_toolkit.GradientPTQConfig(n_iter, optimizer, loss=None, log_function=None, train_bias=True, lsb_change_per_bit_width=DefaultDict(MAX_LSBS_CHANGE_MAP, lambda : ...))
    -

    Configuration to use for quantization with GradientPTQ (experimental).

    +
    +class model_compression_toolkit.gptq.GradientPTQConfig(n_epochs, optimizer, optimizer_rest=None, loss=None, log_function=None, train_bias=True, rounding_type=RoundingType.SoftQuantizer, use_hessian_based_weights=True, optimizer_quantization_parameter=None, optimizer_bias=None, regularization_factor=REG_DEFAULT, hessian_weights_config=GPTQHessianScoresConfig(), gptq_quantizer_params_override=None)
    +

    Configuration to use for quantization with GradientPTQ.

    Initialize a GradientPTQConfig.

    -
    Parameters
    +
    Parameters:
      -
    • n_iter (int) – Number of iterations to train.

    • +
    • n_epochs (int) – Number of representative dataset epochs to train.

    • optimizer (Any) – Optimizer to use.

    • +
    • optimizer_rest (Any) – Optimizer to use for bias and quantizer parameters.

    • loss (Callable) – The loss to use. should accept 6 lists of tensors. 1st list of quantized tensors, the 2nd list is the float tensors, the 3rd is a list of quantized weights, the 4th is a list of float weights, the 5th and 6th lists are the mean and std of the tensors accordingly. see example in multiple_tensors_mse_loss

    • log_function (Callable) – Function to log information about the GPTQ process.

    • train_bias (bool) – Whether to update the bias during the training or not.

    • -
    • lsb_change_per_bit_width (dict) – Whether to update the bias during the training or not.

    • +
    • rounding_type (RoundingType) – An enum that defines the rounding type.

    • +
    • use_hessian_based_weights (bool) – Whether to use Hessian-based weights for weighted average loss.

    • +
    • optimizer_quantization_parameter (Any) – Optimizer to override the rest optimizer for quantizer parameters.

    • +
    • optimizer_bias (Any) – Optimizer to override the rest optimizer for bias.

    • +
    • regularization_factor (float) – A floating point number that defines the regularization factor.

    • +
    • hessian_weights_config (GPTQHessianScoresConfig) – A configuration that include all necessary arguments to run a computation of Hessian scores for the GPTQ loss.

    • +
    • gptq_quantizer_params_override (dict) – A dictionary of parameters to override in GPTQ quantizer instantiation. Defaults to None (no parameters).

    • +
    +
    +
    +
    + +
    +
    +

    GPTQHessianScoresConfig Class

    +

    The following API can be used to create a GPTQHessianScoresConfig instance which can be used to define necessary parameters for computing Hessian scores for the GPTQ loss function.

    +
    +
    +class model_compression_toolkit.gptq.GPTQHessianScoresConfig(hessians_num_samples=16, norm_scores=True, log_norm=True, scale_log_norm=False)
    +

    Configuration to use for computing the Hessian-based scores for GPTQ loss metric.

    +

    Initialize a GPTQHessianWeightsConfig.

    +
    +
    Parameters:
    +
      +
    • hessians_num_samples (int) – Number of samples to use for computing the Hessian-based scores.

    • +
    • norm_scores (bool) – Whether to normalize the returned scores of the weighted loss function (to get values between 0 and 1).

    • +
    • log_norm (bool) – Whether to use log normalization for the GPTQ Hessian-based scores.

    • +
    • scale_log_norm (bool) – Whether to scale the final vector of the Hessian-based scores.

    @@ -73,6 +102,14 @@

    Navigation

    @@ -93,13 +130,13 @@

    Navigation

  • index
  • - + \ No newline at end of file diff --git a/docs/api/api_docs/classes/PruningConfig.html b/docs/api/api_docs/classes/PruningConfig.html new file mode 100644 index 000000000..ead79c479 --- /dev/null +++ b/docs/api/api_docs/classes/PruningConfig.html @@ -0,0 +1,124 @@ + + + + + + + + + + Pruning Configuration — MCT Documentation: ver 1.11.0 + + + + + + + + + + + + + + + + + +
    +
    +
    +
    + +
    +

    Pruning Configuration

    +
    +
    +model_compression_toolkit.pruning.PruningConfig(num_score_approximations=32, importance_metric=ImportanceMetric.LFH, channels_filtering_strategy=ChannelsFilteringStrategy.GREEDY)
    +

    Configuration class for specifying how a neural network should be pruned.

    +
    +
    +model_compression_toolkit.pruning.num_score_approximations
    +

    The number of score approximations to perform +when calculating channel importance.

    +
    +
    Type:
    +

    int

    +
    +
    +
    + +
    +
    +model_compression_toolkit.pruning.importance_metric
    +

    The metric used to calculate channel importance.

    +
    +
    Type:
    +

    ImportanceMetric

    +
    +
    +
    + +
    +
    +model_compression_toolkit.pruning.channels_filtering_strategy
    +

    The strategy used to filter out channels.

    +
    +
    Type:
    +

    ChannelsFilteringStrategy

    +
    +
    +
    + +
    + +
    + + +
    +
    +
    +
    + +
    +
    + + + + \ No newline at end of file diff --git a/docs/api/api_docs/classes/PruningInfo.html b/docs/api/api_docs/classes/PruningInfo.html new file mode 100644 index 000000000..8564f1c77 --- /dev/null +++ b/docs/api/api_docs/classes/PruningInfo.html @@ -0,0 +1,117 @@ + + + + + + + + + + Pruning Information — MCT Documentation: ver 1.11.0 + + + + + + + + + + + + + + + + + +
    +
    +
    +
    + +
    +

    Pruning Information

    +
    +
    +model_compression_toolkit.pruning.PruningInfo(pruning_masks, importance_scores)
    +

    PruningInfo stores information about a pruned model, including the pruning masks +and importance scores for each layer. This class acts as a container for accessing +pruning-related metadata.

    +
    +
    +model_compression_toolkit.pruning.pruning_masks
    +

    Stores the pruning masks for each layer. +A pruning mask is an array where each element indicates whether the corresponding +channel or neuron has been pruned (0) or kept (1).

    +
    +
    Type:
    +

    Dict[BaseNode, np.ndarray]

    +
    +
    +
    + +
    +
    +model_compression_toolkit.pruning.importance_scores
    +

    Stores the importance scores for each layer. +Importance scores quantify the significance of each channel in the layer.

    +
    +
    Type:
    +

    Dict[BaseNode, np.ndarray]

    +
    +
    +
    + +
    + +
    + + +
    +
    +
    +
    + +
    +
    + + + + \ No newline at end of file diff --git a/docs/api/api_docs/index.html b/docs/api/api_docs/index.html index 5fda3e16d..93bc332e2 100644 --- a/docs/api/api_docs/index.html +++ b/docs/api/api_docs/index.html @@ -20,6 +20,7 @@ + + + + +
    +
    +
    +
    + +
    +

    Get DataGenerationConfig for Keras Models

    +
    +
    +model_compression_toolkit.data_generation.get_keras_data_generation_config(n_iter=DEFAULT_N_ITER, optimizer=Adam, data_gen_batch_size=DEFAULT_DATA_GEN_BS, initial_lr=DEFAULT_KERAS_INITIAL_LR, output_loss_multiplier=DEFAULT_KERAS_OUTPUT_LOSS_MULTIPLIER, scheduler_type=SchedulerType.REDUCE_ON_PLATEAU, bn_alignment_loss_type=BatchNormAlignemntLossType.L2_SQUARE, output_loss_type=OutputLossType.REGULARIZED_MIN_MAX_DIFF, data_init_type=DataInitType.Gaussian, layer_weighting_type=BNLayerWeightingType.AVERAGE, image_granularity=ImageGranularity.BatchWise, image_pipeline_type=ImagePipelineType.RANDOM_CROP_FLIP, image_normalization_type=ImageNormalizationType.KERAS_APPLICATIONS, extra_pixels=0, bn_layer_types=[BatchNormalization], clip_images=True, reflection=True)
    +

    Function to create a DataGenerationConfig object with the specified configuration parameters.

    +
    +
    Parameters:
    +
      +
    • n_iter (int) – Number of iterations for the data generation process.

    • +
    • optimizer (Optimizer) – The optimizer to use for the data generation process.

    • +
    • data_gen_batch_size (int) – Batch size for data generation.

    • +
    • initial_lr (float) – Initial learning rate for the optimizer.

    • +
    • output_loss_multiplier (float) – Multiplier for the output loss during optimization.

    • +
    • scheduler_type (SchedulerType) – The type of scheduler to use.

    • +
    • bn_alignment_loss_type (BatchNormAlignemntLossType) – The type of BatchNorm alignment loss to use.

    • +
    • output_loss_type (OutputLossType) – The type of output loss to use.

    • +
    • data_init_type (DataInitType) – The type of data initialization to use.

    • +
    • layer_weighting_type (BNLayerWeightingType) – The type of layer weighting to use.

    • +
    • image_granularity (ImageGranularity) – The granularity of the images for optimization.

    • +
    • image_pipeline_type (ImagePipelineType) – The type of image pipeline to use.

    • +
    • image_normalization_type (ImageNormalizationType) – The type of image normalization to use.

    • +
    • extra_pixels (int) – Extra pixels to add to the input image size. Defaults to 0.

    • +
    • bn_layer_types (List) – List of BatchNorm layer types to be considered for data generation.

    • +
    • clip_images (bool) – Whether to clip images during optimization.

    • +
    • reflection (bool) – Whether to use reflection during optimization.

    • +
    +
    +
    Returns:
    +

    Data generation configuration object.

    +
    +
    Return type:
    +

    DataGenerationConfig

    +
    +
    Return type:
    +

    DataGenerationConfig

    +
    +
    +
    + +
    + + +
    +
    +
    +
    + +
    +
    + + + + \ No newline at end of file diff --git a/docs/api/api_docs/methods/get_keras_gptq_config.html b/docs/api/api_docs/methods/get_keras_gptq_config.html index d40ae3122..8993aac0d 100644 --- a/docs/api/api_docs/methods/get_keras_gptq_config.html +++ b/docs/api/api_docs/methods/get_keras_gptq_config.html @@ -2,12 +2,12 @@ - + - + - Get GradientPTQConfig for Keras Models — MCT Documentation: ver 1.4.0 + Get GradientPTQConfig for Keras Models — MCT Documentation: ver 1.11.0 @@ -15,6 +15,7 @@ + @@ -30,7 +31,7 @@

    Navigation

  • index
  • - + @@ -41,39 +42,47 @@

    Navigation

    -

    Get GradientPTQConfig for Keras Models

    +

    Get GradientPTQConfig for Keras Models

    -
    -model_compression_toolkit.get_keras_gptq_config(n_iter, optimizer=tf.keras.optimizers.Adam(), loss=multiple_tensors_mse_loss, log_function=None, train_bias=True)
    -

    Create a GradientPTQConfig instance for Keras models.

    +
    +model_compression_toolkit.gptq.get_keras_gptq_config(n_epochs, optimizer=tf.keras.optimizers.Adam(learning_rate=LR_DEFAULT), optimizer_rest=tf.keras.optimizers.Adam(learning_rate=LR_REST_DEFAULT), loss=GPTQMultipleTensorsLoss(), log_function=None, use_hessian_based_weights=True, regularization_factor=REG_DEFAULT)
    +

    Create a GradientPTQConfigV2 instance for Keras models.

    -
    Parameters
    +
    Parameters:
      -
    • n_iter (int) – Number of iterations to fine-tune.

    • -
    • optimizer (OptimizerV2) – Keras optimizer to use for fine-tuning.

    • +
    • n_epochs (int) – Number of epochs for running the representative dataset for fine-tuning.

    • +
    • optimizer (OptimizerV2) – Keras optimizer to use for fine-tuning for auxiliry variable with a default learning rate set to 0.2.

    • +
    • optimizer_rest (OptimizerV2) – Keras optimizer to use for fine-tuning of the bias variable.

    • loss (Callable) – loss to use during fine-tuning. should accept 4 lists of tensors. 1st list of quantized tensors, the 2nd list is the float tensors, the 3rd is a list of quantized weights and the 4th is a list of float weights.

    • log_function (Callable) – Function to log information about the gptq process.

    • -
    • train_bias (bool) – Whether to update the bias during the the fine-tuning or not.

    • +
    • use_hessian_based_weights (bool) – Whether to use Hessian-based weights for weighted average loss.

    • +
    • regularization_factor (float) – A floating point number that defines the regularization factor.

    -
    Returns
    -

    a GradientPTQConfig object to use when fine-tuning the quantized model using gptq.

    +
    Returns:
    +

    a GradientPTQConfigV2 object to use when fine-tuning the quantized model using gptq.

    Examples

    -

    Create a GradientPTQConfig to run for 5 iteration:

    -
    >>> gptq_conf = get_keras_gptq_config(n_iter=5)
    +

    Import MCT and TensorFlow:

    +
    >>> import model_compression_toolkit as mct
    +>>> import tensorflow as tf
     
    -

    To disable the biases training, one may set train_bias to false (enabled by default):

    -
    >>> gptq_conf = get_keras_gptq_config(n_iter=5, train_bias=false)
    +

    Create a GradientPTQConfigV2 to run for 5 epochs:

    +
    >>> gptq_conf = mct.gptq.get_keras_gptq_config(n_epochs=5)
     

    Other Tensorflow optimizers can be passed:

    -
    >>> gptq_conf = get_keras_gptq_config(n_iter=3, optimizer=tf.keras.optimizers.Nadam())
    +
    >>> gptq_conf = mct.gptq.get_keras_gptq_config(n_epochs=3, optimizer=tf.keras.optimizers.Nadam())
     
    -

    The configuration can be passed to keras_post_training_quantization() in order to quantize a keras model using gptq.

    +

    The configuration can be passed to keras_post_training_quantization() in order to quantize a keras model using gptq.

    +
    +
    Return type:
    +

    GradientPTQConfig

    +
    +
    @@ -94,7 +103,7 @@

    Quick search

    - +
    @@ -105,13 +114,13 @@

    Navigation

  • index
  • - + \ No newline at end of file diff --git a/docs/api/api_docs/methods/get_pytorch_data_generation_config.html b/docs/api/api_docs/methods/get_pytorch_data_generation_config.html new file mode 100644 index 000000000..2852b8509 --- /dev/null +++ b/docs/api/api_docs/methods/get_pytorch_data_generation_config.html @@ -0,0 +1,123 @@ + + + + + + + + + + Get DataGenerationConfig for Pytorch Models — MCT Documentation: ver 1.11.0 + + + + + + + + + + + + + + + + + +
    +
    +
    +
    + +
    +

    Get DataGenerationConfig for Pytorch Models

    +
    +
    +model_compression_toolkit.data_generation.get_pytorch_data_generation_config(n_iter=DEFAULT_N_ITER, optimizer=RAdam, data_gen_batch_size=DEFAULT_DATA_GEN_BS, initial_lr=DEFAULT_PYTORCH_INITIAL_LR, output_loss_multiplier=DEFAULT_PYTORCH_OUTPUT_LOSS_MULTIPLIER, scheduler_type=SchedulerType.REDUCE_ON_PLATEAU, bn_alignment_loss_type=BatchNormAlignemntLossType.L2_SQUARE, output_loss_type=OutputLossType.REGULARIZED_MIN_MAX_DIFF, data_init_type=DataInitType.Diverse, layer_weighting_type=BNLayerWeightingType.AVERAGE, image_granularity=ImageGranularity.AllImages, image_pipeline_type=ImagePipelineType.RANDOM_CROP, image_normalization_type=ImageNormalizationType.TORCHVISION, extra_pixels=0, bn_layer_types=DEFAULT_PYTORCH_BN_LAYER_TYPES, last_layer_types=DEFAULT_PYTORCH_LAST_LAYER_TYPES, clip_images=True, reflection=True)
    +

    Function to create a DataGenerationConfig object with the specified configuration parameters.

    +
    +
    Parameters:
    +
      +
    • n_iter (int) – Number of iterations for the data generation process.

    • +
    • optimizer (Optimizer) – The optimizer to use for the data generation process.

    • +
    • data_gen_batch_size (int) – Batch size for data generation.

    • +
    • initial_lr (float) – Initial learning rate for the optimizer.

    • +
    • output_loss_multiplier (float) – Multiplier for the output loss during optimization.

    • +
    • scheduler_type (SchedulerType) – The type of scheduler to use.

    • +
    • bn_alignment_loss_type (BatchNormAlignemntLossType) – The type of BatchNorm alignment loss to use.

    • +
    • output_loss_type (OutputLossType) – The type of output loss to use.

    • +
    • data_init_type (DataInitType) – The type of data initialization to use.

    • +
    • layer_weighting_type (BNLayerWeightingType) – The type of layer weighting to use.

    • +
    • image_granularity (ImageGranularity) – The granularity of the images for optimization.

    • +
    • image_pipeline_type (ImagePipelineType) – The type of image pipeline to use.

    • +
    • image_normalization_type (ImageNormalizationType) – The type of image normalization to use.

    • +
    • extra_pixels (int) – Extra pixels to add to the input image size. Defaults to 0.

    • +
    • bn_layer_types (List) – List of BatchNorm layer types to be considered for data generation.

    • +
    • last_layer_types (List) – List of layer types to be considered for the output loss.

    • +
    • clip_images (bool) – Whether to clip images during optimization.

    • +
    • reflection (bool) – Whether to use reflection during optimization.

    • +
    +
    +
    Returns:
    +

    Data generation configuration object.

    +
    +
    Return type:
    +

    DataGenerationConfig

    +
    +
    Return type:
    +

    DataGenerationConfig

    +
    +
    +
    + +
    + + +
    +
    +
    +
    + +
    +
    + + + + \ No newline at end of file diff --git a/docs/api/api_docs/methods/get_pytroch_gptq_config.html b/docs/api/api_docs/methods/get_pytroch_gptq_config.html new file mode 100644 index 000000000..5a8c0370a --- /dev/null +++ b/docs/api/api_docs/methods/get_pytroch_gptq_config.html @@ -0,0 +1,123 @@ + + + + + + + + + + Get GradientPTQConfig for Pytorch Models — MCT Documentation: ver 1.11.0 + + + + + + + + + + + + + + + + + +
    +
    +
    +
    + +
    +

    Get GradientPTQConfig for Pytorch Models

    +
    +
    +model_compression_toolkit.gptq.get_pytorch_gptq_config(n_epochs, optimizer=Adam([torch.Tensor([])], lr=LR_DEFAULT), optimizer_rest=Adam([torch.Tensor([])], lr=LR_REST_DEFAULT), loss=multiple_tensors_mse_loss, log_function=None, use_hessian_based_weights=True, regularization_factor=REG_DEFAULT)
    +

    Create a GradientPTQConfigV2 instance for Pytorch models.

    +
    +
    Parameters:
    +
      +
    • n_epochs (int) – Number of epochs for running the representative dataset for fine-tuning.

    • +
    • optimizer (Optimizer) – Pytorch optimizer to use for fine-tuning for auxiliry variable.

    • +
    • optimizer_rest (Optimizer) – Pytorch optimizer to use for fine-tuning of the bias variable.

    • +
    • loss (Callable) – loss to use during fine-tuning. should accept 4 lists of tensors. 1st list of quantized tensors, the 2nd list is the float tensors, the 3rd is a list of quantized weights and the 4th is a list of float weights.

    • +
    • log_function (Callable) – Function to log information about the gptq process.

    • +
    • use_hessian_based_weights (bool) – Whether to use Hessian-based weights for weighted average loss.

    • +
    • regularization_factor (float) – A floating point number that defines the regularization factor.

    • +
    +
    +
    Returns:
    +

    a GradientPTQConfigV2 object to use when fine-tuning the quantized model using gptq.

    +
    +
    +

    Examples

    +

    Import MCT and Create a GradientPTQConfigV2 to run for 5 epochs:

    +
    >>> import model_compression_toolkit as mct
    +>>> gptq_conf = mct.gptq.get_pytorch_gptq_config(n_epochs=5)
    +
    +
    +

    Other PyTorch optimizers can be passed with dummy params:

    +
    >>> import torch
    +>>> gptq_conf = mct.gptq.get_pytorch_gptq_config(n_epochs=3, optimizer=torch.optim.Adam([torch.Tensor(1)]))
    +
    +
    +

    The configuration can be passed to pytorch_post_training_quantization() in order to quantize a pytorch model using gptq.

    +
    +
    Return type:
    +

    GradientPTQConfig

    +
    +
    +
    + +
    + + +
    +
    +
    +
    + +
    +
    + + + + \ No newline at end of file diff --git a/docs/api/api_docs/methods/get_target_platform_capabilities.html b/docs/api/api_docs/methods/get_target_platform_capabilities.html index b6e0f7c69..f486b41f8 100644 --- a/docs/api/api_docs/methods/get_target_platform_capabilities.html +++ b/docs/api/api_docs/methods/get_target_platform_capabilities.html @@ -2,12 +2,12 @@ - + - + - Get TargetPlatformCapabilities — MCT Documentation: ver 1.4.0 + Get TargetPlatformCapabilities — MCT Documentation: ver 1.11.0 @@ -15,6 +15,7 @@ + @@ -30,7 +31,7 @@

    Navigation

  • index
  • - + @@ -41,39 +42,38 @@

    Navigation

    -

    Get TargetPlatformCapabilities

    +

    Get TargetPlatformCapabilities

    -model_compression_toolkit.get_target_platform_capabilities(fw_name, target_platform_name)
    +model_compression_toolkit.get_target_platform_capabilities(fw_name, target_platform_name, target_platform_version=None)

    Get a TargetPlatformCapabilities by the target platform model name and the framework name. For now, it supports frameworks ‘tensorflow’ and ‘pytorch’. For both of them -the target platform model can be ‘default’,’tflite’, or ‘qnnpack’.

    +the target platform model can be ‘default’, ‘imx500’, ‘tflite’, or ‘qnnpack’.

    -
    Parameters
    +
    Parameters:
      -
    • fw_name (str) – Framework name of the TargetPlatformCapabilities.

    • -
    • target_platform_name (str) – Target platform model name the model will use for inference.

    • +
    • fw_name – Framework name of the TargetPlatformCapabilities.

    • +
    • target_platform_name – Target platform model name the model will use for inference.

    • +
    • target_platform_version – Target platform capabilities version.

    -
    Returns
    +
    Returns:

    A TargetPlatformCapabilities object that models the hardware and attaches a framework information to it.

    -
    Return type
    -

    TargetPlatformCapabilities

    +
    Return type:
    +

    TargetPlatformCapabilities

    Note

    -

    For now, fusing operators information from TargetPlatformModel -is ignored during the optimization process (fusing still occurs but using an existing mechanism). -Also, parts of OpQuantizationConfig is ignored (currently, -the quantizer type, number of bits, and quantization enable/disable information is used during the -optimization process).

    +

    For now, some fields of OpQuantizationConfig are ignored during +the optimization process (currently, the quantizer type, number of bits, and quantization enable/disable information +are in use).

    @@ -94,7 +94,7 @@

    Quick search

    - +
    @@ -105,13 +105,13 @@

    Navigation

  • index
  • - + \ No newline at end of file diff --git a/docs/api/api_docs/methods/keras_data_generation_experimental.html b/docs/api/api_docs/methods/keras_data_generation_experimental.html new file mode 100644 index 000000000..87ea353f0 --- /dev/null +++ b/docs/api/api_docs/methods/keras_data_generation_experimental.html @@ -0,0 +1,109 @@ + + + + + + + + + + Keras Data Generation — MCT Documentation: ver 1.11.0 + + + + + + + + + + + + + + + + + +
    +
    +
    +
    + +
    +

    Keras Data Generation

    +
    +
    +model_compression_toolkit.data_generation.keras_data_generation_experimental(model, n_images, output_image_size, data_generation_config)
    +

    Function to perform data generation using the provided Keras model and data generation configuration.

    +
    +
    Parameters:
    +
      +
    • model (Model) – Keras model to generate data for.

    • +
    • n_images (int) – Number of images to generate.

    • +
    • output_image_size (Tuple) – Size of the output images.

    • +
    • data_generation_config (DataGenerationConfig) – Configuration for data generation.

    • +
    +
    +
    Returns:
    +

    Finalized list containing generated images.

    +
    +
    Return type:
    +

    List[tf.Tensor]

    +
    +
    Return type:
    +

    Tensor

    +
    +
    +
    + +
    + + +
    +
    +
    +
    + +
    +
    + + + + \ No newline at end of file diff --git a/docs/api/api_docs/methods/keras_gradient_post_training_quantization_experimental.html b/docs/api/api_docs/methods/keras_gradient_post_training_quantization_experimental.html new file mode 100644 index 000000000..1c7688ad3 --- /dev/null +++ b/docs/api/api_docs/methods/keras_gradient_post_training_quantization_experimental.html @@ -0,0 +1,165 @@ + + + + + + + + + + Keras Gradient Based Post Training Quantization — MCT Documentation: ver 1.11.0 + + + + + + + + + + + + + + + + + +
    +
    +
    +
    + +
    +

    Keras Gradient Based Post Training Quantization

    +
    +
    +model_compression_toolkit.gptq.keras_gradient_post_training_quantization(in_model, representative_data_gen, gptq_config, gptq_representative_data_gen=None, target_kpi=None, core_config=CoreConfig(), fw_info=DEFAULT_KERAS_INFO, target_platform_capabilities=DEFAULT_KERAS_TPC)
    +

    Quantize a trained Keras model using post-training quantization. The model is quantized using a +symmetric constraint quantization thresholds (power of two). +The model is first optimized using several transformations (e.g. BatchNormalization folding to +preceding layers). Then, using a given dataset, statistics (e.g. min/max, histogram, etc.) are +being collected for each layer’s output (and input, depends on the quantization configuration). +For each possible bit width (per layer) a threshold is then being calculated using the collected +statistics. Then, if given a mixed precision config in the core_config, using an ILP solver we find +a mixed-precision configuration, and set a bit-width for each layer. The model is then quantized +(both coefficients and activations by default). +In order to limit the maximal model’s size, a target KPI need to be passed after weights_memory +is set (in bytes). +Then, the quantized weights are optimized using gradient based post +training quantization by comparing points between the float and quantized models, and minimizing the observed +loss.

    +
    +
    Parameters:
    +
      +
    • in_model (Model) – Keras model to quantize.

    • +
    • representative_data_gen (Callable) – Dataset used for calibration.

    • +
    • gptq_config (GradientPTQConfig) – Configuration for using gptq (e.g. optimizer).

    • +
    • gptq_representative_data_gen (Callable) – Dataset used for GPTQ training. If None defaults to representative_data_gen

    • +
    • target_kpi (KPI) – KPI object to limit the search of the mixed-precision configuration as desired.

    • +
    • core_config (CoreConfig) – Configuration object containing parameters of how the model should be quantized, including mixed precision parameters.

    • +
    • fw_info (FrameworkInfo) – Information needed for quantization about the specific framework (e.g., kernel channels indices, groups of layers by how they should be quantized, etc.). Default Keras info

    • +
    • target_platform_capabilities (TargetPlatformCapabilities) – TargetPlatformCapabilities to optimize the Keras model according to.

    • +
    +
    +
    Returns:
    +

    A quantized model and information the user may need to handle the quantized model.

    +
    +
    +

    Examples

    +

    Import a Keras model:

    +
    >>> from tensorflow.keras.applications.mobilenet import MobileNet
    +>>> model = MobileNet()
    +
    +
    +

    Create a random dataset generator, for required number of calibration iterations (num_calibration_batches): +In this example a random dataset of 10 batches each containing 4 images is used.

    +
    >>> import numpy as np
    +>>> num_calibration_batches = 10
    +>>> def repr_datagen():
    +>>>     for _ in range(num_calibration_batches):
    +>>>         yield [np.random.random((4, 224, 224, 3))]
    +
    +
    +

    Create an MCT core config, containing the quantization configuration:

    +
    >>> config = mct.core.CoreConfig()
    +
    +
    +

    If mixed precision is desired, create an MCT core config with a mixed-precision configuration, to quantize a model +with different bitwidths for different layers. +The candidates bitwidth for quantization should be defined in the target platform model:

    +
    >>> config = mct.core.CoreConfig(mixed_precision_config=mct.core.MixedPrecisionQuantizationConfig(num_of_images=1))
    +
    +
    +

    For mixed-precision set a target KPI object: +Create a KPI object to limit our returned model’s size. Note that this value affects only coefficients +that should be quantized (for example, the kernel of Conv2D in Keras will be affected by this value, +while the bias will not):

    +
    >>> kpi = mct.core.KPI(model.count_params() * 0.75)  # About 0.75 of the model size when quantized with 8 bits.
    +
    +
    +

    Create GPTQ config:

    +
    >>> gptq_config = mct.gptq.get_keras_gptq_config(n_epochs=1)
    +
    +
    +

    Pass the model with the representative dataset generator to get a quantized model:

    +
    >>> quantized_model, quantization_info = mct.gptq.keras_gradient_post_training_quantization(model, repr_datagen, gptq_config, target_kpi=kpi, core_config=config)
    +
    +
    +
    +
    Return type:
    +

    Tuple[Model, UserInformation]

    +
    +
    +
    + +
    + + +
    +
    +
    +
    + +
    +
    + + + + \ No newline at end of file diff --git a/docs/api/api_docs/methods/keras_kpi_data_experimental.html b/docs/api/api_docs/methods/keras_kpi_data_experimental.html new file mode 100644 index 000000000..d136d94de --- /dev/null +++ b/docs/api/api_docs/methods/keras_kpi_data_experimental.html @@ -0,0 +1,126 @@ + + + + + + + + + + Get KPI information for Keras Models — MCT Documentation: ver 1.11.0 + + + + + + + + + + + + + + + + + +
    +
    +
    +
    + +
    +

    Get KPI information for Keras Models

    +
    +
    +model_compression_toolkit.core.keras_kpi_data(in_model, representative_data_gen, core_config=CoreConfig(mixed_precision_config=MixedPrecisionQuantizationConfig()), fw_info=DEFAULT_KERAS_INFO, target_platform_capabilities=KERAS_DEFAULT_TPC)
    +

    Computes KPI data that can be used to calculate the desired target KPI for mixed-precision quantization. +Builds the computation graph from the given model and hw modeling, and uses it to compute the KPI data.

    +
    +
    Parameters:
    +
      +
    • in_model (Model) – Keras model to quantize.

    • +
    • representative_data_gen (Callable) – Dataset used for calibration.

    • +
    • core_config (CoreConfig) – CoreConfig containing parameters for quantization and mixed precision of how the model should be quantized.

    • +
    • fw_info (FrameworkInfo) – Information needed for quantization about the specific framework (e.g., kernel channels indices, groups of layers by how they should be quantized, etc.). Default Keras info

    • +
    • target_platform_capabilities (TargetPlatformCapabilities) – TargetPlatformCapabilities to optimize the Keras model according to.

    • +
    +
    +
    Returns:
    +

    A KPI object with total weights parameters sum and max activation tensor.

    +
    +
    +

    Examples

    +

    Import a Keras model:

    +
    >>> from tensorflow.keras.applications.mobilenet import MobileNet
    +>>> model = MobileNet()
    +
    +
    +

    Create a random dataset generator:

    +
    >>> import numpy as np
    +>>> def repr_datagen(): yield [np.random.random((1, 224, 224, 3))]
    +
    +
    +

    Import MCT and call for KPI data calculation:

    +
    >>> import model_compression_toolkit as mct
    +>>> kpi_data = mct.core.keras_kpi_data(model, repr_datagen)
    +
    +
    +
    +
    Return type:
    +

    KPI

    +
    +
    +
    + +
    + + +
    +
    +
    +
    + +
    +
    + + + + \ No newline at end of file diff --git a/docs/api/api_docs/methods/keras_load_quantizad_model.html b/docs/api/api_docs/methods/keras_load_quantizad_model.html new file mode 100644 index 000000000..bf405bc93 --- /dev/null +++ b/docs/api/api_docs/methods/keras_load_quantizad_model.html @@ -0,0 +1,101 @@ + + + + + + + + + + Load Quantized Keras Model — MCT Documentation: ver 1.11.0 + + + + + + + + + + + + + + + + + +
    +
    +
    +
    + +
    +

    Load Quantized Keras Model

    +
    +
    +model_compression_toolkit.keras_load_quantized_model(filepath, custom_objects=None, compile=True, options=None)
    +

    This function wraps the keras load model and adds trainable quantizers classes to its custom objects.

    +
    +
    Parameters:
    +
      +
    • filepath – the model file path.

    • +
    • custom_objects – Additional custom objects

    • +
    • compile – Boolean, whether to compile the model after loading.

    • +
    • options – Optional tf.saved_model.LoadOptions object that specifies options for loading from SavedModel.

    • +
    +
    +
    +

    Returns: A keras Model

    +
    + +
    + + +
    +
    +
    +
    + +
    +
    + + + + \ No newline at end of file diff --git a/docs/api/api_docs/methods/keras_post_training_quantization_experimental.html b/docs/api/api_docs/methods/keras_post_training_quantization_experimental.html new file mode 100644 index 000000000..ce57934e9 --- /dev/null +++ b/docs/api/api_docs/methods/keras_post_training_quantization_experimental.html @@ -0,0 +1,156 @@ + + + + + + + + + + Keras Post Training Quantization — MCT Documentation: ver 1.11.0 + + + + + + + + + + + + + + + + + +
    +
    +
    +
    + +
    +

    Keras Post Training Quantization

    +
    +
    +model_compression_toolkit.ptq.keras_post_training_quantization(in_model, representative_data_gen, target_kpi=None, core_config=CoreConfig(), target_platform_capabilities=DEFAULT_KERAS_TPC)
    +

    Quantize a trained Keras model using post-training quantization. The model is quantized using a +symmetric constraint quantization thresholds (power of two). +The model is first optimized using several transformations (e.g. BatchNormalization folding to +preceding layers). Then, using a given dataset, statistics (e.g. min/max, histogram, etc.) are +being collected for each layer’s output (and input, depends on the quantization configuration). +For each possible bit width (per layer) a threshold is then being calculated using the collected +statistics. Then, if given a mixed precision config in the core_config, using an ILP solver we find +a mixed-precision configuration, and set a bit-width for each layer. The model is then quantized +(both coefficients and activations by default). +In order to limit the maximal model’s size, a target KPI need to be passed after weights_memory +is set (in bytes).

    +
    +
    Parameters:
    +
      +
    • in_model (Model) – Keras model to quantize.

    • +
    • representative_data_gen (Callable) – Dataset used for calibration.

    • +
    • target_kpi (KPI) – KPI object to limit the search of the mixed-precision configuration as desired.

    • +
    • core_config (CoreConfig) – Configuration object containing parameters of how the model should be quantized, including mixed precision parameters.

    • +
    • target_platform_capabilities (TargetPlatformCapabilities) – TargetPlatformCapabilities to optimize the Keras model according to.

    • +
    +
    +
    Returns:
    +

    A quantized model and information the user may need to handle the quantized model.

    +
    +
    +

    Examples

    +

    Import MCT:

    +
    >>> import model_compression_toolkit as mct
    +
    +
    +

    Import a Keras model:

    +
    >>> from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
    +>>> model = MobileNetV2()
    +
    +
    +

    Create a random dataset generator, for required number of calibration iterations (num_calibration_batches): +In this example a random dataset of 10 batches each containing 4 images is used.

    +
    >>> import numpy as np
    +>>> num_calibration_batches = 10
    +>>> def repr_datagen():
    +>>>     for _ in range(num_calibration_batches):
    +>>>         yield [np.random.random((4, 224, 224, 3))]
    +
    +
    +

    Create a MCT core config, containing the quantization configuration:

    +
    >>> config = mct.core.CoreConfig()
    +
    +
    +

    If mixed precision is desired, create a MCT core config with a mixed-precision configuration, to quantize a model with different bitwidths for different layers. +The candidates bitwidth for quantization should be defined in the target platform model. +In this example we use 1 image to search mixed-precision configuration:

    +
    >>> config = mct.core.CoreConfig(mixed_precision_config=mct.core.MixedPrecisionQuantizationConfig(num_of_images=1))
    +
    +
    +

    For mixed-precision set a target KPI object: +Create a KPI object to limit our returned model’s size. Note that this value affects only coefficients +that should be quantized (for example, the kernel of Conv2D in Keras will be affected by this value, +while the bias will not):

    +
    >>> kpi = mct.core.KPI(model.count_params() * 0.75)  # About 0.75 of the model size when quantized with 8 bits.
    +
    +
    +

    Pass the model, the representative dataset generator, the configuration and the target KPI to get a +quantized model:

    +
    >>> quantized_model, quantization_info = mct.ptq.keras_post_training_quantization(model, repr_datagen, kpi, core_config=config)
    +
    +
    +

    For more configuration options, please take a look at our API documentation.

    +
    + +
    + + +
    +
    +
    +
    + +
    +
    + + + + \ No newline at end of file diff --git a/docs/api/api_docs/methods/keras_pruning_experimental.html b/docs/api/api_docs/methods/keras_pruning_experimental.html new file mode 100644 index 000000000..828299501 --- /dev/null +++ b/docs/api/api_docs/methods/keras_pruning_experimental.html @@ -0,0 +1,154 @@ + + + + + + + + + + Keras Structured Pruning — MCT Documentation: ver 1.11.0 + + + + + + + + + + + + + + + + + +
    +
    +
    +
    + +
    +

    Keras Structured Pruning

    +
    +
    +model_compression_toolkit.pruning.keras_pruning_experimental(model, target_kpi, representative_data_gen, pruning_config=PruningConfig(), target_platform_capabilities=DEFAULT_KERAS_TPC)
    +

    Perform structured pruning on a Keras model to meet a specified target KPI. +This function prunes the provided model according to the target KPI by grouping and pruning +channels based on each layer’s SIMD configuration in the Target Platform Capabilities (TPC). +By default, the importance of each channel group is determined using the Label-Free Hessian +(LFH) method, assessing each channel’s sensitivity to the Hessian of the loss function. +This pruning strategy considers groups of channels together for a more hardware-friendly +architecture. The process involves analyzing the model with a representative dataset to +identify groups of channels that can be removed with minimal impact on performance.

    +

    Notice that the pruned model must be retrained to recover the compressed model’s performance.

    +
    +
    Parameters:
    +
      +
    • model (Model) – The original Keras model to be pruned.

    • +
    • target_kpi (KPI) – The target Key Performance Indicators to be achieved through pruning.

    • +
    • representative_data_gen (Callable) – A function to generate representative data for pruning analysis.

    • +
    • pruning_config (PruningConfig) – Configuration settings for the pruning process. Defaults to standard config.

    • +
    • target_platform_capabilities (TargetPlatformCapabilities) – Platform-specific constraints and capabilities. +Defaults to DEFAULT_KERAS_TPC.

    • +
    +
    +
    Returns:
    +

    A tuple containing the pruned Keras model and associated pruning information.

    +
    +
    Return type:
    +

    Tuple[Model, PruningInfo]

    +
    +
    +

    Examples

    +

    Import MCT:

    +
    >>> import model_compression_toolkit as mct
    +
    +
    +

    Import a Keras model:

    +
    >>> from tensorflow.keras.applications.resnet50 import ResNet50
    +>>> model = ResNet50()
    +
    +
    +

    Create a random dataset generator:

    +
    >>> import numpy as np
    +>>> def repr_datagen(): yield [np.random.random((1, 224, 224, 3))]
    +
    +
    +

    Define a target KPI for pruning. +Here, we aim to reduce the memory footprint of weights by 50%, assuming the model weights +are represented in float32 data type (thus, each parameter is represented using 4 bytes):

    +
    >>> dense_nparams = sum([l.count_params() for l in model.layers])
    +>>> target_kpi = mct.KPI(weights_memory=dense_nparams * 4 * 0.5)
    +
    +
    +

    Optionally, define a pruning configuration. num_score_approximations can be passed +to configure the number of importance scores that will be calculated for each channel. +A higher value for this parameter yields more precise score approximations but also +extends the duration of the pruning process:

    +
    >>> pruning_config = mct.pruning.PruningConfig(num_score_approximations=1)
    +
    +
    +

    Perform pruning:

    +
    >>> pruned_model, pruning_info = mct.pruning.keras_pruning_experimental(model=model, target_kpi=target_kpi, representative_data_gen=repr_datagen, pruning_config=pruning_config)
    +
    +
    +
    +
    Return type:
    +

    Tuple[Model, PruningInfo]

    +
    +
    +
    + +
    + + +
    +
    +
    +
    + +
    +
    + + + + \ No newline at end of file diff --git a/docs/api/api_docs/methods/keras_quantization_aware_training_finalize.html b/docs/api/api_docs/methods/keras_quantization_aware_training_finalize.html new file mode 100644 index 000000000..0d3fba485 --- /dev/null +++ b/docs/api/api_docs/methods/keras_quantization_aware_training_finalize.html @@ -0,0 +1,144 @@ + + + + + + + + + + Keras Quantization Aware Training Model Finalize — MCT Documentation: ver 1.11.0 + + + + + + + + + + + + + + + + + +
    +
    +
    +
    + +
    +

    Keras Quantization Aware Training Model Finalize

    +
    +
    +model_compression_toolkit.qat.keras_quantization_aware_training_finalize_experimental(in_model)
    +

    Convert a model fine-tuned by the user (Trainable quantizers) to a model with Inferable quantizers.

    +
    +
    Parameters:
    +

    in_model (Model) – Keras model to replace TrainableQuantizer with InferableQuantizer

    +
    +
    Returns:
    +

    A quantized model with Inferable quantizers

    +
    +
    +

    Examples

    +

    Import MCT:

    +
    >>> import model_compression_toolkit as mct
    +
    +
    +

    Import a Keras model:

    +
    >>> from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
    +>>> model = MobileNetV2()
    +
    +
    +

    Create a random dataset generator:

    +
    >>> import numpy as np
    +>>> def repr_datagen(): yield [np.random.random((1, 224, 224, 3))]
    +
    +
    +

    Create a MCT core config, containing the quantization configuration:

    +
    >>> config = mct.core.CoreConfig()
    +
    +
    +

    If mixed precision is desired, create a MCT core config with a mixed-precision configuration, to quantize a model with different bitwidths for different layers. +The candidates bitwidth for quantization should be defined in the target platform model:

    +
    >>> config = mct.core.CoreConfig(mixed_precision_config=MixedPrecisionQuantizationConfig())
    +
    +
    +

    For mixed-precision set a target KPI object: +Create a KPI object to limit our returned model’s size. Note that this value affects only coefficients +that should be quantized (for example, the kernel of Conv2D in Keras will be affected by this value, +while the bias will not):

    +
    >>> kpi = mct.core.KPI(model.count_params() * 0.75)  # About 0.75 of the model size when quantized with 8 bits.
    +
    +
    +

    Pass the model, the representative dataset generator, the configuration and the target KPI to get a +quantized model:

    +
    >>> quantized_model, quantization_info, custom_objects = mct.qat.keras_quantization_aware_training_init_experimental(model, repr_datagen, kpi, core_config=config)
    +
    +
    +

    Use the quantized model for fine-tuning. For loading the model from file, use the custom_objects dictionary:

    +
    >>> quantized_model = tf.keras.models.load_model(model_file, custom_objects=custom_objects)
    +>>> quantized_model = mct.qat.keras_quantization_aware_training_finalize_experimental(quantized_model)
    +
    +
    +
    +
    Return type:
    +

    Model

    +
    +
    +
    + +
    + + +
    +
    +
    +
    + +
    +
    + + + + \ No newline at end of file diff --git a/docs/api/api_docs/methods/keras_quantization_aware_training_init.html b/docs/api/api_docs/methods/keras_quantization_aware_training_init.html new file mode 100644 index 000000000..eb840041f --- /dev/null +++ b/docs/api/api_docs/methods/keras_quantization_aware_training_init.html @@ -0,0 +1,165 @@ + + + + + + + + + + Keras Quantization Aware Training Model Init — MCT Documentation: ver 1.11.0 + + + + + + + + + + + + + + + + + +
    +
    +
    +
    + +
    +

    Keras Quantization Aware Training Model Init

    +
    +
    +model_compression_toolkit.qat.keras_quantization_aware_training_init_experimental(in_model, representative_data_gen, target_kpi=None, core_config=CoreConfig(), qat_config=QATConfig(), fw_info=DEFAULT_KERAS_INFO, target_platform_capabilities=DEFAULT_KERAS_TPC)
    +

    Prepare a trained Keras model for quantization aware training. First the model quantization is optimized +with post-training quantization, then the model layers are wrapped with QuantizeWrappers. The model is +quantized using a symmetric quantization thresholds (power of two). +The model is first optimized using several transformations (e.g. BatchNormalization folding to +preceding layers). Then, using a given dataset, statistics (e.g. min/max, histogram, etc.) are +being collected for each layer’s output (and input, depends on the quantization configuration). +For each possible bit width (per layer) a threshold is then being calculated using the collected +statistics. Then, if given a mixed precision config in the core_config, using an ILP solver we find +a mixed-precision configuration, and set a bit-width for each layer. The model is built with fake_quant +nodes for quantizing activation. Weights are kept as float and are quantized online while training by the +quantization wrapper’s weight quantizer. +In order to limit the maximal model’s size, a target KPI need to be passed after weights_memory +is set (in bytes).

    +
    +
    Parameters:
    +
      +
    • in_model (Model) – Keras model to quantize.

    • +
    • representative_data_gen (Callable) – Dataset used for initial calibration.

    • +
    • target_kpi (KPI) – KPI object to limit the search of the mixed-precision configuration as desired.

    • +
    • core_config (CoreConfig) – Configuration object containing parameters of how the model should be quantized, including mixed precision parameters.

    • +
    • qat_config (QATConfig) – QAT configuration

    • +
    • fw_info (FrameworkInfo) – Information needed for quantization about the specific framework (e.g., kernel channels indices, groups of layers by how they should be quantized, etc.). Default Keras info

    • +
    • target_platform_capabilities (TargetPlatformCapabilities) – TargetPlatformCapabilities to optimize the Keras model according to.

    • +
    +
    +
    Returns:
    +

    A quantized model. +User information that may be needed to handle the quantized model. +Custom-Objects dictionary for loading the saved kers model.

    +
    +
    +

    Examples

    +

    Import MCT:

    +
    >>> import model_compression_toolkit as mct
    +
    +
    +

    Import a Keras model:

    +
    >>> from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
    +>>> model = MobileNetV2()
    +
    +
    +

    Create a random dataset generator, for required number of calibration iterations (num_calibration_batches): +In this example a random dataset of 10 batches each containing 4 images is used.

    +
    >>> import numpy as np
    +>>> num_calibration_batches = 10
    +>>> def repr_datagen():
    +>>>     for _ in range(num_calibration_batches):
    +>>>         yield [np.random.random((4, 224, 224, 3))]
    +
    +
    +

    Create a MCT core config, containing the quantization configuration:

    +
    >>> config = mct.core.CoreConfig()
    +
    +
    +

    If mixed precision is desired, create a MCT core config with a mixed-precision configuration, to quantize a model with different bitwidths for different layers. +The candidates bitwidth for quantization should be defined in the target platform model:

    +
    >>> config = mct.core.CoreConfig(mixed_precision_config=MixedPrecisionQuantizationConfig())
    +
    +
    +

    For mixed-precision set a target KPI object: +Create a KPI object to limit our returned model’s size. Note that this value affects only coefficients +that should be quantized (for example, the kernel of Conv2D in Keras will be affected by this value, +while the bias will not):

    +
    >>> kpi = mct.core.KPI(model.count_params() * 0.75)  # About 0.75 of the model size when quantized with 8 bits.
    +
    +
    +

    Pass the model, the representative dataset generator, the configuration and the target KPI to get a +quantized model:

    +
    >>> quantized_model, quantization_info, custom_objects = mct.qat.keras_quantization_aware_training_init_experimental(model, repr_datagen, kpi, core_config=config)
    +
    +
    +

    Use the quantized model for fine-tuning. For loading the model from file, use the custom_objects dictionary:

    +
    >>> quantized_model = tf.keras.models.load_model(model_file, custom_objects=custom_objects)
    +
    +
    +

    For more configuration options, please take a look at our API documentation.

    +
    + +
    + + +
    +
    +
    +
    + +
    +
    + + + + \ No newline at end of file diff --git a/docs/api/api_docs/methods/pytorch_data_generation_experimental.html b/docs/api/api_docs/methods/pytorch_data_generation_experimental.html new file mode 100644 index 000000000..36690dde9 --- /dev/null +++ b/docs/api/api_docs/methods/pytorch_data_generation_experimental.html @@ -0,0 +1,109 @@ + + + + + + + + + + Pytorch Data Generation — MCT Documentation: ver 1.11.0 + + + + + + + + + + + + + + + + + +
    +
    +
    +
    + +
    +

    Pytorch Data Generation

    +
    +
    +model_compression_toolkit.data_generation.pytorch_data_generation_experimental(model, n_images, output_image_size, data_generation_config)
    +

    Function to perform data generation using the provided model and data generation configuration.

    +
    +
    Parameters:
    +
      +
    • model (Module) – PyTorch model to generate data for.

    • +
    • n_images (int) – Number of images to generate.

    • +
    • output_image_size (Tuple) – Size of the output images.

    • +
    • data_generation_config (DataGenerationConfig) – Configuration for data generation.

    • +
    +
    +
    Returns:
    +

    Finalized list containing generated images.

    +
    +
    Return type:
    +

    List[Tensor]

    +
    +
    Return type:
    +

    List[Tensor]

    +
    +
    +
    + +
    + + +
    +
    +
    +
    + +
    +
    + + + + \ No newline at end of file diff --git a/docs/api/api_docs/methods/pytorch_gradient_post_training_quantization_experimental.html b/docs/api/api_docs/methods/pytorch_gradient_post_training_quantization_experimental.html new file mode 100644 index 000000000..e5f7c2020 --- /dev/null +++ b/docs/api/api_docs/methods/pytorch_gradient_post_training_quantization_experimental.html @@ -0,0 +1,146 @@ + + + + + + + + + + Pytorch Gradient Based Post Training Quantization — MCT Documentation: ver 1.11.0 + + + + + + + + + + + + + + + + + +
    +
    +
    +
    + +
    +

    Pytorch Gradient Based Post Training Quantization

    +
    +
    +model_compression_toolkit.gptq.pytorch_gradient_post_training_quantization(model, representative_data_gen, target_kpi=None, core_config=CoreConfig(), gptq_config=None, gptq_representative_data_gen=None, target_platform_capabilities=DEFAULT_PYTORCH_TPC)
    +

    Quantize a trained Pytorch module using post-training quantization. +By default, the module is quantized using a symmetric constraint quantization thresholds +(power of two) as defined in the default TargetPlatformCapabilities. +The module is first optimized using several transformations (e.g. BatchNormalization folding to +preceding layers). Then, using a given dataset, statistics (e.g. min/max, histogram, etc.) are +being collected for each layer’s output (and input, depends on the quantization configuration). +Thresholds are then being calculated using the collected statistics and the module is quantized +(both coefficients and activations by default). +If gptq_config is passed, the quantized weights are optimized using gradient based post +training quantization by comparing points between the float and quantized modules, and minimizing the +observed loss. +Then, the quantized weights are optimized using gradient based post +training quantization by comparing points between the float and quantized models, and minimizing the observed +loss.

    +
    +
    Parameters:
    +
      +
    • model (Module) – Pytorch model to quantize.

    • +
    • representative_data_gen (Callable) – Dataset used for calibration.

    • +
    • target_kpi (KPI) – KPI object to limit the search of the mixed-precision configuration as desired.

    • +
    • core_config (CoreConfig) – Configuration object containing parameters of how the model should be quantized, including mixed precision parameters.

    • +
    • gptq_config (GradientPTQConfig) – Configuration for using gptq (e.g. optimizer).

    • +
    • gptq_representative_data_gen (Callable) – Dataset used for GPTQ training. If None defaults to representative_data_gen

    • +
    • target_platform_capabilities (TargetPlatformCapabilities) – TargetPlatformCapabilities to optimize the PyTorch model according to.

    • +
    +
    +
    Returns:
    +

    A quantized module and information the user may need to handle the quantized module.

    +
    +
    +

    Examples

    +

    Import Model Compression Toolkit:

    +
    >>> import model_compression_toolkit as mct
    +
    +
    +

    Import a Pytorch module:

    +
    >>> from torchvision import models
    +>>> module = models.mobilenet_v2()
    +
    +
    +

    Create a random dataset generator, for required number of calibration iterations (num_calibration_batches): +In this example a random dataset of 10 batches each containing 4 images is used.

    +
    >>> import numpy as np
    +>>> num_calibration_batches = 10
    +>>> def repr_datagen():
    +>>>     for _ in range(num_calibration_batches):
    +>>>         yield [np.random.random((4, 3, 224, 224))]
    +
    +
    +

    Create MCT core configurations with number of calibration iterations set to 1:

    +
    >>> config = mct.core.CoreConfig()
    +
    +
    +

    Pass the module, the representative dataset generator and the configuration (optional) to get a quantized module

    +
    >>> quantized_module, quantization_info = mct.gptq.pytorch_gradient_post_training_quantization(module, repr_datagen, core_config=config, gptq_config=gptq_conf)
    +
    +
    +
    + +
    + + +
    +
    +
    +
    + +
    +
    + + + + \ No newline at end of file diff --git a/docs/api/api_docs/methods/pytorch_kpi_data_experimental.html b/docs/api/api_docs/methods/pytorch_kpi_data_experimental.html new file mode 100644 index 000000000..e1c6441d3 --- /dev/null +++ b/docs/api/api_docs/methods/pytorch_kpi_data_experimental.html @@ -0,0 +1,126 @@ + + + + + + + + + + Get KPI information for PyTorch Models — MCT Documentation: ver 1.11.0 + + + + + + + + + + + + + + + + + +
    +
    +
    +
    + +
    +

    Get KPI information for PyTorch Models

    +
    +
    +model_compression_toolkit.core.pytorch_kpi_data(in_model, representative_data_gen, core_config=CoreConfig(mixed_precision_config=MixedPrecisionQuantizationConfig()), fw_info=DEFAULT_PYTORCH_INFO, target_platform_capabilities=PYTORCH_DEFAULT_TPC)
    +

    Computes KPI data that can be used to calculate the desired target KPI for mixed-precision quantization. +Builds the computation graph from the given model and target platform capabilities, and uses it to compute the KPI data.

    +
    +
    Parameters:
    +
      +
    • in_model (Model) – PyTorch model to quantize.

    • +
    • representative_data_gen (Callable) – Dataset used for calibration.

    • +
    • core_config (CoreConfig) – CoreConfig containing parameters for quantization and mixed precision

    • +
    • fw_info (FrameworkInfo) – Information needed for quantization about the specific framework (e.g., kernel channels indices, groups of layers by how they should be quantized, etc.). Default PyTorch info

    • +
    • target_platform_capabilities (TargetPlatformCapabilities) – TargetPlatformCapabilities to optimize the PyTorch model according to.

    • +
    +
    +
    Returns:
    +

    A KPI object with total weights parameters sum and max activation tensor.

    +
    +
    +

    Examples

    +

    Import a Pytorch model:

    +
    >>> from torchvision import models
    +>>> module = models.mobilenet_v2()
    +
    +
    +

    Create a random dataset generator:

    +
    >>> import numpy as np
    +>>> def repr_datagen(): yield [np.random.random((1, 3, 224, 224))]
    +
    +
    +

    Import mct and call for KPI data calculation:

    +
    >>> import model_compression_toolkit as mct
    +>>> kpi_data = mct.core.pytorch_kpi_data(module, repr_datagen)
    +
    +
    +
    +
    Return type:
    +

    KPI

    +
    +
    +
    + +
    + + +
    +
    +
    +
    + +
    +
    + + + + \ No newline at end of file diff --git a/docs/api/api_docs/methods/pytorch_post_training_quantization_experimental.html b/docs/api/api_docs/methods/pytorch_post_training_quantization_experimental.html new file mode 100644 index 000000000..630598efb --- /dev/null +++ b/docs/api/api_docs/methods/pytorch_post_training_quantization_experimental.html @@ -0,0 +1,135 @@ + + + + + + + + + + Pytorch Post Training Quantization — MCT Documentation: ver 1.11.0 + + + + + + + + + + + + + + + + + +
    +
    +
    +
    + +
    +

    Pytorch Post Training Quantization

    +
    +
    +model_compression_toolkit.ptq.pytorch_post_training_quantization(in_module, representative_data_gen, target_kpi=None, core_config=CoreConfig(), target_platform_capabilities=DEFAULT_PYTORCH_TPC)
    +

    Quantize a trained Pytorch module using post-training quantization. +By default, the module is quantized using a symmetric constraint quantization thresholds +(power of two) as defined in the default TargetPlatformCapabilities. +The module is first optimized using several transformations (e.g. BatchNormalization folding to +preceding layers). Then, using a given dataset, statistics (e.g. min/max, histogram, etc.) are +being collected for each layer’s output (and input, depends on the quantization configuration). +Thresholds are then being calculated using the collected statistics and the module is quantized +(both coefficients and activations by default). +If gptq_config is passed, the quantized weights are optimized using gradient based post +training quantization by comparing points between the float and quantized modules, and minimizing the +observed loss.

    +
    +
    Parameters:
    +
      +
    • in_module (Module) – Pytorch module to quantize.

    • +
    • representative_data_gen (Callable) – Dataset used for calibration.

    • +
    • target_kpi (KPI) – KPI object to limit the search of the mixed-precision configuration as desired.

    • +
    • core_config (CoreConfig) – Configuration object containing parameters of how the model should be quantized, including mixed precision parameters.

    • +
    • target_platform_capabilities (TargetPlatformCapabilities) – TargetPlatformCapabilities to optimize the PyTorch model according to.

    • +
    +
    +
    Returns:
    +

    A quantized module and information the user may need to handle the quantized module.

    +
    +
    +

    Examples

    +

    Import a Pytorch module:

    +
    >>> from torchvision import models
    +>>> module = models.mobilenet_v2()
    +
    +
    +

    Create a random dataset generator, for required number of calibration iterations (num_calibration_batches): +In this example a random dataset of 10 batches each containing 4 images is used.

    +
    >>> import numpy as np
    +>>> num_calibration_batches = 10
    +>>> def repr_datagen():
    +>>>     for _ in range(num_calibration_batches):
    +>>>         yield [np.random.random((4, 3, 224, 224))]
    +
    +
    +

    Import MCT and pass the module with the representative dataset generator to get a quantized module +Set number of clibration iterations to 1:

    +
    >>> import model_compression_toolkit as mct
    +>>> quantized_module, quantization_info = mct.ptq.pytorch_post_training_quantization(module, repr_datagen)
    +
    +
    +
    + +
    + + +
    +
    +
    +
    + +
    +
    + + + + \ No newline at end of file diff --git a/docs/api/api_docs/methods/pytorch_pruning_experimental.html b/docs/api/api_docs/methods/pytorch_pruning_experimental.html new file mode 100644 index 000000000..d3efe563f --- /dev/null +++ b/docs/api/api_docs/methods/pytorch_pruning_experimental.html @@ -0,0 +1,158 @@ + + + + + + + + + + Pytorch Structured Pruning — MCT Documentation: ver 1.11.0 + + + + + + + + + + + + + + + + + +
    +
    +
    +
    + +
    +

    Pytorch Structured Pruning

    +
    +
    +model_compression_toolkit.pruning.pytorch_pruning_experimental(model, target_kpi, representative_data_gen, pruning_config=PruningConfig(), target_platform_capabilities=DEFAULT_PYOTRCH_TPC)
    +

    Perform structured pruning on a Pytorch model to meet a specified target KPI. +This function prunes the provided model according to the target KPI by grouping and pruning +channels based on each layer’s SIMD configuration in the Target Platform Capabilities (TPC). +By default, the importance of each channel group is determined using the Label-Free Hessian +(LFH) method, assessing each channel’s sensitivity to the Hessian of the loss function. +This pruning strategy considers groups of channels together for a more hardware-friendly +architecture. The process involves analyzing the model with a representative dataset to +identify groups of channels that can be removed with minimal impact on performance.

    +

    Notice that the pruned model must be retrained to recover the compressed model’s performance.

    +
    +
    Parameters:
    +
      +
    • model (Module) – The PyTorch model to be pruned.

    • +
    • target_kpi (KPI) – Key Performance Indicators specifying the pruning targets.

    • +
    • representative_data_gen (Callable) – A function to generate representative data for pruning analysis.

    • +
    • pruning_config (PruningConfig) – Configuration settings for the pruning process. Defaults to standard config.

    • +
    • target_platform_capabilities (TargetPlatformCapabilities) – Platform-specific constraints and capabilities. +Defaults to DEFAULT_PYTORCH_TPC.

    • +
    +
    +
    Returns:
    +

    A tuple containing the pruned Pytorch model and associated pruning information.

    +
    +
    Return type:
    +

    Tuple[Model, PruningInfo]

    +
    +
    +
    +

    Note

    +

    The pruned model should be fine-tuned or retrained to recover or improve its performance post-pruning.

    +
    +

    Examples

    +

    Import MCT:

    +
    >>> import model_compression_toolkit as mct
    +
    +
    +

    Import a Pytorch model:

    +
    >>> from torchvision.models import resnet50, ResNet50_Weights
    +>>> model = resnet50(weights=ResNet50_Weights.IMAGENET1K_V1)
    +
    +
    +

    Create a random dataset generator:

    +
    >>> import numpy as np
    +>>> def repr_datagen(): yield [np.random.random((1, 3, 224, 224))]
    +
    +
    +

    Define a target KPI for pruning. +Here, we aim to reduce the memory footprint of weights by 50%, assuming the model weights +are represented in float32 data type (thus, each parameter is represented using 4 bytes):

    +
    >>> dense_nparams = sum(p.numel() for p in model.state_dict().values())
    +>>> target_kpi = mct.KPI(weights_memory=dense_nparams * 4 * 0.5)
    +
    +
    +

    Optionally, define a pruning configuration. num_score_approximations can be passed +to configure the number of importance scores that will be calculated for each channel. +A higher value for this parameter yields more precise score approximations but also +extends the duration of the pruning process:

    +
    >>> pruning_config = mct.pruning.PruningConfig(num_score_approximations=1)
    +
    +
    +

    Perform pruning:

    +
    >>> pruned_model, pruning_info = mct.pruning.pytorch_pruning_experimental(model=model, target_kpi=target_kpi, representative_data_gen=repr_datagen, pruning_config=pruning_config)
    +
    +
    +
    +
    Return type:
    +

    Tuple[Module, PruningInfo]

    +
    +
    +
    + +
    + + +
    +
    +
    +
    + +
    +
    + + + + \ No newline at end of file diff --git a/docs/api/api_docs/methods/pytorch_quantization_aware_training_finalize.html b/docs/api/api_docs/methods/pytorch_quantization_aware_training_finalize.html new file mode 100644 index 000000000..0dd0f11e3 --- /dev/null +++ b/docs/api/api_docs/methods/pytorch_quantization_aware_training_finalize.html @@ -0,0 +1,127 @@ + + + + + + + + + + PyTorch Quantization Aware Training Model Finalize — MCT Documentation: ver 1.11.0 + + + + + + + + + + + + + + + + + +
    +
    +
    +
    + +
    +

    PyTorch Quantization Aware Training Model Finalize

    +
    +
    +model_compression_toolkit.qat.pytorch_quantization_aware_training_finalize_experimental(in_model)
    +

    Convert a model fine-tuned by the user to a network with QuantizeWrappers containing +InferableQuantizers, that quantizes both the layers weights and outputs

    +
    +
    Parameters:
    +

    in_model (Model) – Pytorch model to remove QuantizeWrappers.

    +
    +
    Returns:
    +

    A quantized model with QuantizeWrappers and InferableQuantizers.

    +
    +
    +

    Examples

    +

    Import MCT:

    +
    >>> import model_compression_toolkit as mct
    +
    +
    +

    Import a Pytorch model:

    +
    >>> from torchvision.models import mobilenet_v2
    +>>> model = mobilenet_v2(pretrained=True)
    +
    +
    +

    Create a random dataset generator:

    +
    >>> import numpy as np
    +>>> def repr_datagen(): yield [np.random.random((1, 224, 224, 3))]
    +
    +
    +

    Create a MCT core config, containing the quantization configuration:

    +
    >>> config = mct.core.CoreConfig()
    +
    +
    +

    Pass the model, the representative dataset generator, the configuration and the target KPI to get a +quantized model:

    +
    >>> quantized_model, quantization_info = mct.qat.pytorch_quantization_aware_training_init_experimental(model, repr_datagen, core_config=config)
    +
    +
    +

    Use the quantized model for fine-tuning. Finally, remove the quantizer wrappers and keep a quantize model ready for inference.

    +
    >>> quantized_model = mct.qat.pytorch_quantization_aware_training_finalize_experimental(quantized_model)
    +
    +
    +
    + +
    + + +
    +
    +
    +
    + +
    +
    + + + + \ No newline at end of file diff --git a/docs/api/api_docs/methods/pytorch_quantization_aware_training_init.html b/docs/api/api_docs/methods/pytorch_quantization_aware_training_init.html new file mode 100644 index 000000000..f915beca8 --- /dev/null +++ b/docs/api/api_docs/methods/pytorch_quantization_aware_training_init.html @@ -0,0 +1,152 @@ + + + + + + + + + + PyTorch Quantization Aware Training Model Init — MCT Documentation: ver 1.11.0 + + + + + + + + + + + + + + + + + +
    +
    +
    +
    + +
    +

    PyTorch Quantization Aware Training Model Init

    +
    +
    +model_compression_toolkit.qat.pytorch_quantization_aware_training_init_experimental(in_model, representative_data_gen, target_kpi=None, core_config=CoreConfig(), qat_config=QATConfig(), fw_info=DEFAULT_PYTORCH_INFO, target_platform_capabilities=DEFAULT_PYTORCH_TPC)
    +

    Prepare a trained Pytorch model for quantization aware training. First the model quantization is optimized +with post-training quantization, then the model layers are wrapped with QuantizeWrappers. The model is +quantized using a symmetric quantization thresholds (power of two). +The model is first optimized using several transformations (e.g. BatchNormalization folding to +preceding layers). Then, using a given dataset, statistics (e.g. min/max, histogram, etc.) are +being collected for each layer’s output (and input, depends on the quantization configuration). +For each possible bit width (per layer) a threshold is then being calculated using the collected +statistics. Then, if given a mixed precision config in the core_config, using an ILP solver we find +a mixed-precision configuration, and set a bit-width for each layer. The model is built with fake_quant +nodes for quantizing activation. Weights are kept as float and are quantized online while training by the +quantization wrapper’s weight quantizer. +In order to limit the maximal model’s size, a target KPI need to be passed after weights_memory +is set (in bytes).

    +
    +
    Parameters:
    +
      +
    • in_model (Model) – Pytorch model to quantize.

    • +
    • representative_data_gen (Callable) – Dataset used for initial calibration.

    • +
    • target_kpi (KPI) – KPI object to limit the search of the mixed-precision configuration as desired.

    • +
    • core_config (CoreConfig) – Configuration object containing parameters of how the model should be quantized, including mixed precision parameters.

    • +
    • qat_config (QATConfig) – QAT configuration

    • +
    • fw_info (FrameworkInfo) – Information needed for quantization about the specific framework (e.g., kernel channels indices, groups of layers by how they should be quantized, etc.). Default Pytorch info

    • +
    • target_platform_capabilities (TargetPlatformCapabilities) – TargetPlatformCapabilities to optimize the Pytorch model according to.

    • +
    +
    +
    Returns:
    +

    A quantized model. +User information that may be needed to handle the quantized model.

    +
    +
    +

    Examples

    +

    Import MCT:

    +
    >>> import model_compression_toolkit as mct
    +
    +
    +

    Import a Pytorch model:

    +
    >>> from torchvision.models import mobilenet_v2
    +>>> model = mobilenet_v2(pretrained=True)
    +
    +
    +
    +

    Create a random dataset generator, for required number of calibration iterations (num_calibration_batches): +In this example a random dataset of 10 batches each containing 4 images is used.

    +
    >>> import numpy as np
    +>>> num_calibration_batches = 10
    +>>> def repr_datagen():
    +>>>     for _ in range(num_calibration_batches):
    +>>>         yield [np.random.random((4, 3, 224, 224))]
    +
    +
    +
    +

    Create a MCT core config, containing the quantization configuration:

    +
    >>> config = mct.core.CoreConfig()
    +
    +
    +

    Pass the model, the representative dataset generator, the configuration and the target KPI to get a +quantized model. Now the model contains quantizer wrappers for fine tunning the weights:

    +
    >>> quantized_model, quantization_info = mct.qat.pytorch_quantization_aware_training_init_experimental(model, repr_datagen, core_config=config)
    +
    +
    +

    For more configuration options, please take a look at our API documentation.

    +
    +
    +
    + +
    + + +
    +
    +
    +
    + +
    +
    + + + + \ No newline at end of file diff --git a/docs/api/api_docs/methods/set_logger_path.html b/docs/api/api_docs/methods/set_logger_path.html index af9c88cb0..8f6d40611 100644 --- a/docs/api/api_docs/methods/set_logger_path.html +++ b/docs/api/api_docs/methods/set_logger_path.html @@ -2,12 +2,12 @@ - + - + - Enable a Logger — MCT Documentation: ver 1.4.0 + Enable a Logger — MCT Documentation: ver 1.11.0 @@ -15,6 +15,7 @@ + @@ -30,7 +31,7 @@

    Navigation

  • index
  • - + @@ -41,16 +42,16 @@

    Navigation

    -

    Enable a Logger

    +

    Enable a Logger

    model_compression_toolkit.set_log_folder(folder, level=logging.INFO)

    Set a directory path for saving a log file.

    -
    Parameters
    +
    Parameters:
      -
    • folder (str) – Folder path to save the log file.

    • -
    • level (int) – Level of verbosity to set to the logger.

    • +
    • folder – Folder path to save the log file.

    • +
    • level – Level of verbosity to set to the logger.

    @@ -74,7 +75,7 @@

    Quick search

    - +
    @@ -85,13 +86,13 @@

    Navigation

  • index
  • - + \ No newline at end of file diff --git a/docs/api/api_docs/modules/core_config.html b/docs/api/api_docs/modules/core_config.html new file mode 100644 index 000000000..37ef02de7 --- /dev/null +++ b/docs/api/api_docs/modules/core_config.html @@ -0,0 +1,113 @@ + + + + + + + + + + core_config Module — MCT Documentation: ver 1.11.0 + + + + + + + + + + + + + + + + + +
    +
    +
    +
    + +
    +

    core_config Module

    +
    +

    CoreConfig

    +

    Class to configure the optimization process of the model:

    +
    +
    +class model_compression_toolkit.core.CoreConfig(quantization_config=QuantizationConfig(), mixed_precision_config=None, debug_config=DebugConfig())
    +

    A class to hold the configurations classes of the MCT-core.

    +
    +
    Parameters:
    +
    +
    +
    +
    + +
    +
    + + +
    +
    +
    +
    + +
    +
    + + + + \ No newline at end of file diff --git a/docs/api/api_docs/modules/debug_config.html b/docs/api/api_docs/modules/debug_config.html new file mode 100644 index 000000000..40bfbdcc9 --- /dev/null +++ b/docs/api/api_docs/modules/debug_config.html @@ -0,0 +1,113 @@ + + + + + + + + + + debug_config Module — MCT Documentation: ver 1.11.0 + + + + + + + + + + + + + + + + + +
    +
    +
    +
    + +
    +

    debug_config Module

    +
    +

    DebugConfig

    +

    Class to configure params for debugging purposes:

    +
    +
    +class model_compression_toolkit.core.DebugConfig(analyze_similarity=False, network_editor=[])
    +

    A class for MCT core debug information.

    +
    +
    Parameters:
    +
      +
    • analyze_similarity (bool) – Whether to plot similarity figures within TensorBoard (when logger is +enabled) or not. Can be used to pinpoint problematic layers in the quantization process.

    • +
    • network_editor (List[EditRule]) – A list of rules and actions to edit the network for quantization.

    • +
    +
    +
    +
    + +
    +
    + + +
    +
    +
    +
    + +
    +
    + + + + \ No newline at end of file diff --git a/docs/api/api_docs/modules/exporter.html b/docs/api/api_docs/modules/exporter.html new file mode 100644 index 000000000..f7f1c37d5 --- /dev/null +++ b/docs/api/api_docs/modules/exporter.html @@ -0,0 +1,455 @@ + + + + + + + + + + exporter Module — MCT Documentation: ver 1.11.0 + + + + + + + + + + + + + + + + + +
    +
    +
    +
    + +
    +

    exporter Module

    +

    Allows to export a quantized model in different serialization formats and quantization formats. +For more details about the export formats and options, please refer to the project’s GitHub README file. +If you have any questions or issues, please open an issue in this GitHub repository.

    +
    +

    QuantizationFormat

    +
    +
    +class model_compression_toolkit.exporter.QuantizationFormat(value)
    +

    Specify which quantization format to use for exporting a quantized model.

    +

    FAKELY_QUANT - Weights and activations are quantized but represented using float data type.

    +

    INT8 - Weights and activations are represented using 8-bit integer data type.

    +

    MCTQ - Weights and activations are quantized using mct_quantizers custom quantizers.

    +
    + +
    +
    +

    KerasExportSerializationFormat

    +

    Select the serialization format for exporting a quantized Keras model.

    +
    +
    +class model_compression_toolkit.exporter.KerasExportSerializationFormat(value)
    +

    Specify which serialization format to use for exporting a quantized Keras model.

    +

    KERAS - .keras file format

    +

    TFLITE - .tflite file format

    +
    + +
    +
    +

    keras_export_model

    +

    Allows to export a Keras model that was quantized via MCT.

    +
    +
    +class model_compression_toolkit.exporter.keras_export_model(model, save_model_path, is_layer_exportable_fn=is_keras_layer_exportable, serialization_format=KerasExportSerializationFormat.KERAS, quantization_format=QuantizationFormat.MCTQ)
    +

    Export a Keras quantized model to a h5 or tflite model. +The model will be saved to the path in save_model_path. +keras_export_model supports the combination of QuantizationFormat.FAKELY_QUANT (where weights +and activations are float fakely-quantized values) and KerasExportSerializationFormat.KERAS_H5 (where the model +will be saved to h5 model) or the combination of KerasExportSerializationFormat.TFLITE (where the model will be +saved to tflite model) with QuantizationFormat.FAKELY_QUANT or QuantizationFormat.INT8 (where weights and +activations are represented using 8bits integers).

    +
    +
    Parameters:
    +
      +
    • model – Model to export.

    • +
    • save_model_path – Path to save the model.

    • +
    • is_layer_exportable_fn – Callable to check whether a layer can be exported or not.

    • +
    • serialization_format – Format to export the model according to (by default

    • +
    • KerasExportSerializationFormat.KERAS_H5).

    • +
    • quantization_format – Format of how quantizers are exported (fakely-quant, int8, MCTQ quantizers).

    • +
    +
    +
    Returns:
    +

    Custom objects dictionary needed to load the model.

    +
    +
    Return type:
    +

    Dict[str, type]

    +
    +
    +
    + +
    +
    +

    Keras Tutorial

    +

    To export a TensorFlow model as a quantized model, it is necessary to first apply quantization +to the model using MCT:

    +
    ! pip install -q mct-nightly
    +
    +
    +
    import numpy as np
    +from keras.applications import ResNet50
    +import model_compression_toolkit as mct
    +
    +# Create a model
    +float_model = ResNet50()
    +# Quantize the model.
    +# Notice that here the representative dataset is random for demonstration only.
    +quantized_exportable_model, _ = mct.ptq.keras_post_training_quantization(float_model,
    +                                                                         representative_data_gen=lambda: [np.random.random((1, 224, 224, 3))])
    +
    +
    +
    +

    keras serialization format

    +

    The model will be exported as a tensorflow .keras model where weights and activations are quantized but represented using a float32 dtype. +Two optional quantization formats are available: MCTQ and FAKELY_QUANT.

    +
    +
    +

    MCTQ

    +

    By default, mct.exporter.keras_export_model will export the quantized Keras model to +a .keras model with custom quantizers from mct_quantizers module.

    +
    import tempfile
    +
    +# Path of exported model
    +_, keras_file_path = tempfile.mkstemp('.keras')
    +
    +# Export a keras model with mctq custom quantizers.
    +mct.exporter.keras_export_model(model=quantized_exportable_model,
    +                                save_model_path=keras_file_path)
    +
    +
    +

    Notice that the model has the same size as the quantized exportable model as weights data types are float.

    +
    +
    +

    Fakely-Quantized in Keras

    +
    # Path of exported model
    +_, keras_file_path = tempfile.mkstemp('.keras')
    +
    +# Use mode KerasExportSerializationFormat.KERAS for a .keras model
    +# and QuantizationFormat.FAKELY_QUANT for fakely-quantized weights
    +# and activations.
    +mct.exporter.keras_export_model(model=quantized_exportable_model,
    +                                save_model_path=keras_file_path,
    +                                quantization_format=mct.exporter.QuantizationFormat.FAKELY_QUANT)
    +
    +
    +

    Notice that the fakely-quantized model has the same size as the quantized exportable model as weights data types are +float.

    +
    +
    +

    TFLite

    +

    The tflite serialization format export in two qauntization formats: INT8 and FAKELY_QUANT.

    +
    +
    +

    INT8 TFLite

    +

    The model will be exported as a tflite model where weights and activations are represented as 8bit integers.

    +
    import tempfile
    +
    +# Path of exported model
    +_, tflite_file_path = tempfile.mkstemp('.tflite')
    +
    +# Use mode KerasExportSerializationFormat.TFLITE for tflite model and quantization_format.INT8.
    +mct.exporter.keras_export_model(model=quantized_exportable_model,
    +                                save_model_path=tflite_file_path,
    +                                serialization_format=mct.exporter.KerasExportSerializationFormat.TFLITE,
    +                                quantization_format=mct.exporter.QuantizationFormat.INT8)
    +
    +
    +

    Compare size of float and quantized model:

    +
    import os
    +
    +# Save float model to measure its size
    +_, float_file_path = tempfile.mkstemp('.keras')
    +float_model.save(float_file_path)
    +
    +print("Float model in Mb:", os.path.getsize(float_file_path) / float(2 ** 20))
    +print("Quantized model in Mb:", os.path.getsize(tflite_file_path) / float(2 ** 20))
    +print(f'Compression ratio: {os.path.getsize(float_file_path) / os.path.getsize(tflite_file_path)}')
    +
    +
    +
    +
    +

    Fakely-Quantized TFLite

    +

    The model will be exported as a tflite model where weights and activations are quantized but represented with a float data type.

    +
    +
    +

    Usage Example

    +
    # Path of exported model
    +_, tflite_file_path = tempfile.mkstemp('.tflite')
    +
    +# Use mode KerasExportSerializationFormat.TFLITE for tflite model and QuantizationFormat.FAKELY_QUANT for fakely-quantized weights
    +# and activations.
    +mct.exporter.keras_export_model(model=quantized_exportable_model,
    +                                save_model_path=tflite_file_path,
    +                                serialization_format=mct.exporter.KerasExportSerializationFormat.TFLITE,
    +                                quantization_format=mct.exporter.QuantizationFormat.FAKELY_QUANT)
    +
    +
    +

    Notice that the fakely-quantized model has the same size as the quantized exportable model as weights data types are +float.

    +
    +
    +
    +

    PytorchExportSerializationFormat

    +

    Select the serialization format for exporting a quantized Pytorch model.

    +
    +
    +class model_compression_toolkit.exporter.PytorchExportSerializationFormat(value)
    +

    Specify which serialization format to use for exporting a quantized Pytorch model.

    +

    TORCHSCRIPT - torchscript format

    +

    ONNX - onnx format

    +
    + +
    +
    +

    pytorch_export_model

    +

    Allows to export a Pytorch model that was quantized via MCT.

    +
    +
    +class model_compression_toolkit.exporter.pytorch_export_model(model, save_model_path, repr_dataset, is_layer_exportable_fn=is_pytorch_layer_exportable, serialization_format=PytorchExportSerializationFormat.ONNX, quantization_format=QuantizationFormat.MCTQ, onnx_opset_version=DEFAULT_ONNX_OPSET_VERSION)
    +

    Export a PyTorch quantized model to a torchscript or onnx model. +The model will be saved to the path in save_model_path. +Currently, pytorch_export_model supports only QuantizationFormat.FAKELY_QUANT (where weights +and activations are float fakely-quantized values) and PytorchExportSerializationFormat.TORCHSCRIPT +(where the model will be saved to TorchScript model) or PytorchExportSerializationFormat.ONNX +(where the model will be saved to ONNX model).

    +
    +
    Parameters:
    +
      +
    • model – Model to export.

    • +
    • save_model_path – Path to save the model.

    • +
    • repr_dataset – Representative dataset for tracing the pytorch model (mandatory for exporting it).

    • +
    • is_layer_exportable_fn – Callable to check whether a layer can be exported or not.

    • +
    • serialization_format – Format to export the model according to (by default

    • +
    • PytorchExportSerializationFormat.ONNX).

    • +
    • quantization_format – Format of how quantizers are exported (fakely-quant, int8, MCTQ quantizers).

    • +
    • onnx_opset_version – ONNX opset version to use for exported ONNX model.

    • +
    +
    +
    Return type:
    +

    None

    +
    +
    +
    + +
    +
    +

    Pytorch Tutorial

    +

    To export a Pytorch model as a quantized model, it is necessary to first apply quantization +to the model using MCT:

    +
    ! pip install -q mct-nightly
    +
    +
    +

    In order to export your quantized model to ONNX format, and use it for inference, some additional packages are needed. Notice, this is needed only for models exported to ONNX format, so this part can be skipped if this is not planned:

    +
    ! pip install -q onnx onnxruntime onnxruntime-extensions
    +
    +
    +

    Now, let’s start the export demonstration by quantizing the model using MCT:

    +
    import model_compression_toolkit as mct
    +import numpy as np
    +import torch
    +from torchvision.models.mobilenetv2 import mobilenet_v2
    +
    +# Create a model
    +float_model = mobilenet_v2()
    +
    +
    +# Notice that here the representative dataset is random for demonstration only.
    +def representative_data_gen():
    +    yield [np.random.random((1, 3, 224, 224))]
    +
    +
    +quantized_exportable_model, _ = mct.ptq.pytorch_post_training_quantization(float_model, representative_data_gen=representative_data_gen)
    +
    +
    +
    +

    ONNX

    +

    The model will be exported in ONNX format where weights and activations are represented as float. Notice that onnx should be installed in order to export the model to an ONNX model.

    +

    There are two optional formats to choose: MCTQ or FAKELY_QUANT.

    +
    +
    +

    MCTQ Quantization Format

    +

    By default, mct.exporter.pytorch_export_model will export the quantized pytorch model to +an ONNX model with custom quantizers from mct_quantizers module.

    +
    # Path of exported model
    +onnx_file_path = 'model_format_onnx_mctq.onnx'
    +
    +# Export ONNX model with mctq quantizers.
    +mct.exporter.pytorch_export_model(model=quantized_exportable_model,
    +                                  save_model_path=onnx_file_path,
    +                                  repr_dataset=representative_data_gen)
    +
    +
    +

    Notice that the model has the same size as the quantized exportable model as weights data types are float.

    +
    +
    +

    ONNX opset version

    +

    By default, the used ONNX opset version is 15, but this can be changed using onnx_opset_version:

    +
    # Export ONNX model with mctq quantizers.
    +mct.exporter.pytorch_export_model(model=quantized_exportable_model,
    +                                  save_model_path=onnx_file_path,
    +                                  repr_dataset=representative_data_gen,
    +                                  onnx_opset_version=16)
    +
    +
    +
    +

    +
    +
    +
    +

    Use exported model for inference

    +

    To load and infer using the exported model, which was exported to an ONNX file in MCTQ format, we will use mct_quantizers method get_ort_session_options during onnxruntime session creation. Notice, inference on models that are exported in this format are slowly and suffers from longer latency. However, inference of these models on IMX500 will not suffer from this issue.

    +
    import mct_quantizers as mctq
    +import onnxruntime as ort
    +
    +sess = ort.InferenceSession(onnx_file_path,
    +                            mctq.get_ort_session_options(),
    +                            providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
    +
    +_input_data = next(representative_data_gen())[0].astype(np.float32)
    +_model_output_name = sess.get_outputs()[0].name
    +_model_input_name = sess.get_inputs()[0].name
    +
    +# Run inference
    +predictions = sess.run([_model_output_name], {_model_input_name: _input_data})
    +
    +
    +
    +

    +
    +
    +
    +

    Fakely-Quantized in Pytorch

    +

    To export a fakely-quantized model, use QuantizationFormat.FAKELY_QUANT:

    +
    import tempfile
    +
    +# Path of exported model
    +_, onnx_file_path = tempfile.mkstemp('.onnx')
    +
    +# Use QuantizationFormat.FAKELY_QUANT for fakely-quantized weights and activations.
    +mct.exporter.pytorch_export_model(model=quantized_exportable_model,
    +                                  save_model_path=onnx_file_path,
    +                                  repr_dataset=representative_data_gen,
    +                                  quantization_format=mct.exporter.QuantizationFormat.FAKELY_QUANT)
    +
    +
    +

    Notice that the fakely-quantized model has the same size as the quantized +exportable model as weights data types are float.

    +
    +
    +

    TorchScript

    +

    The model will be exported in TorchScript format where weights and activations are +quantized but represented as float (fakely quant).

    +
    # Path of exported model
    +_, torchscript_file_path = tempfile.mkstemp('.pt')
    +
    +
    +# Use mode PytorchExportSerializationFormat.TORCHSCRIPT a torchscript model
    +# and QuantizationFormat.FAKELY_QUANT for fakely-quantized weights and activations.
    +mct.exporter.pytorch_export_model(model=quantized_exportable_model,
    +                                  save_model_path=torchscript_file_path,
    +                                  repr_dataset=representative_data_gen,
    +                                  serialization_format=mct.exporter.PytorchExportSerializationFormat.TORCHSCRIPT,
    +                                  quantization_format=mct.exporter.QuantizationFormat.FAKELY_QUANT)
    +
    +
    +

    Notice that the fakely-quantized model has the same size as the quantized exportable model as weights data types are +float.

    +
    +
    +
    + + +
    +
    +
    +
    + +
    +
    + + + + \ No newline at end of file diff --git a/docs/api/api_docs/modules/layer_filters.html b/docs/api/api_docs/modules/layer_filters.html index 704e6be88..3a94b765c 100644 --- a/docs/api/api_docs/modules/layer_filters.html +++ b/docs/api/api_docs/modules/layer_filters.html @@ -2,12 +2,12 @@ - + - + - Layer Attributes Filters — MCT Documentation: ver 1.4.0 + Layer Attributes Filters — MCT Documentation: ver 1.11.0 @@ -15,6 +15,7 @@ + @@ -30,7 +31,7 @@

    Navigation

  • index
  • - + @@ -41,17 +42,17 @@

    Navigation

    -

    Layer Attributes Filters

    +

    Layer Attributes Filters

    In order to create LayerFilterParams which needed to filter layers by their attributes, one may use the next filters to check if a layer configuration holds the created LayerFilterParams:

    -

    Attribute Filters

    +

    Attribute Filters

    class model_compression_toolkit.target_platform.Eq(attr, value)

    Filter configurations such that it matches configurations that have an attribute with a value that equals to the value that Eq holds.

    -
    Parameters
    +
    Parameters:
    - +
    @@ -193,13 +194,13 @@

    Navigation

  • index
  • - + \ No newline at end of file diff --git a/docs/api/api_docs/modules/mixed_precision_quantization_config.html b/docs/api/api_docs/modules/mixed_precision_quantization_config.html index 74980341c..e37d5079e 100644 --- a/docs/api/api_docs/modules/mixed_precision_quantization_config.html +++ b/docs/api/api_docs/modules/mixed_precision_quantization_config.html @@ -42,13 +42,13 @@

    Navigation

    -

    mixed_precision_quantization_config Module

    +

    mixed_precision_quantization_config Module

    KPI

    Object to configure resources to use when searching for a mixed-precision configuration for a model:

    -
    -class model_compression_toolkit.KPI(weights_memory=np.inf, activation_memory=np.inf, total_memory=np.inf, bops=np.inf)
    +
    +class model_compression_toolkit.core.KPI(weights_memory=np.inf, activation_memory=np.inf, total_memory=np.inf, bops=np.inf)

    Class to represent measurements of performance.

    Parameters:
    @@ -66,24 +66,25 @@

    KPI

    -
    -

    MixedPrecisionQuantizationConfig

    +
    +

    MixedPrecisionQuantizationConfigV2

    Class to configure the quantization process of the model when quantizing in mixed-precision:

    -
    -class model_compression_toolkit.MixedPrecisionQuantizationConfig(qc=DEFAULTCONFIG, compute_distance_fn=compute_mse, distance_weighting_method=get_average_weights, num_of_images=32, configuration_overwrite=None, num_interest_points_factor=1.0)
    -

    Class to wrap all different parameters the library quantize the input model according to. -Unlike QuantizationConfig, number of bits for quantization is a list of possible bit widths to -support mixed-precision model quantization.

    +
    +class model_compression_toolkit.core.MixedPrecisionQuantizationConfig(compute_distance_fn=None, distance_weighting_method=get_average_weights, num_of_images=32, configuration_overwrite=None, num_interest_points_factor=1.0, use_hessian_based_scores=True, norm_scores=True, refine_mp_solution=True, metric_normalization_threshold=1e10)
    +

    Class with mixed precision parameters to quantize the input model.

    Parameters:
      -
    • qc (QuantizationConfig) – QuantizationConfig object containing parameters of how the model should be quantized.

    • compute_distance_fn (Callable) – Function to compute a distance between two tensors.

    • distance_weighting_method (Callable) – Function to use when weighting the distances among different layers when computing the sensitivity metric.

    • num_of_images (int) – Number of images to use to evaluate the sensitivity of a mixed-precision model comparing to the float model.

    • configuration_overwrite (List[int]) – A list of integers that enables overwrite of mixed precision with a predefined one.

    • -
    • num_interest_points_factor – A multiplication factor between zero and one (represents percentage) to reduce the number of interest points used to calculate the distance metric.

    • +
    • num_interest_points_factor (float) – A multiplication factor between zero and one (represents percentage) to reduce the number of interest points used to calculate the distance metric.

    • +
    • use_hessian_based_scores (bool) – Whether to use Hessian-based scores for weighted average distance metric computation.

    • +
    • norm_scores (bool) – Whether to normalize the returned scores for the weighted distance metric (to get values between 0 and 1).

    • +
    • refine_mp_solution (bool) – Whether to try to improve the final mixed-precision configuration using a greedy algorithm that searches layers to increase their bit-width, or not.

    • +
    • metric_normalization_threshold (float) – A threshold for checking the mixed precision distance metric values, In case of values larger than this threshold, the metric will be scaled to prevent numerical issues.

    @@ -104,7 +105,7 @@

    Table of Contents

    diff --git a/docs/api/api_docs/modules/network_editor.html b/docs/api/api_docs/modules/network_editor.html index 2b01b1349..f34dfba5f 100644 --- a/docs/api/api_docs/modules/network_editor.html +++ b/docs/api/api_docs/modules/network_editor.html @@ -2,12 +2,12 @@ - + - + - network_editor Module — MCT Documentation: ver 1.4.0 + network_editor Module — MCT Documentation: ver 1.11.0 @@ -15,6 +15,7 @@ + @@ -30,7 +31,7 @@

    Navigation

  • index
  • - + @@ -41,38 +42,38 @@

    Navigation

    -

    network_editor Module

    +

    network_editor Module

    The model can be edited by a list of EditRules to apply on nodes in a graph that represents the model during the model quantization. Each EditRule is a tuple of a filter and an action, where we apply the action on each node the filter matches

    -

    EditRule

    +

    EditRule

    -
    -class model_compression_toolkit.network_editor.EditRule(filter, action)
    +
    +class model_compression_toolkit.core.network_editor.EditRule(filter, action)

    A tuple of a node filter and an action. The filter matches nodes in the graph which represents the model, and the action is applied on these nodes during the quantization process.

    Examples

    -

    Create an EditRule to quantize all Conv2D wights using 9 bits:

    +

    Create an EditRule to quantize all Conv2D kernel attribute weights using 9 bits:

    >>> import model_compression_toolkit as mct
    +>>> from model_compression_toolkit.core.keras.constants import KERNEL
     >>> from tensorflow.keras.layers import Conv2D
    ->>> er_list = [EditRule(filter=mct.network_editor.NodeTypeFilter(Conv2D),
    ->>> action=mct.network_editor.ChangeCandidatesWeightsQuantConfigAttr(weights_n_bits=9))]
    +>>> er_list = [mct.network_editor.EditRule(filter=mct.network_editor.NodeTypeFilter(Conv2D), action=mct.network_editor.ChangeCandidatesWeightsQuantConfigAttr(attr_name=KERNEL, weights_n_bits=9))]
     
    -

    Then the rules list can be passed to keras_post_training_quantization() +

    Then the rules list can be passed to keras_post_training_quantization() to modify the network during the quantization process.

    Create new instance of EditRule(filter, action)

    -

    Filters

    +

    Filters

    -
    -class model_compression_toolkit.network_editor.NodeTypeFilter(node_type)
    +
    +class model_compression_toolkit.core.network_editor.NodeTypeFilter(node_type)

    Class NodeNameFilter to check if a node is of a specific type.

    Init a NodeTypeFilter object.

    -
    Parameters
    +
    Parameters:

    node_type – Node type to check.

    @@ -82,12 +83,12 @@

    Filters

    -
    -class model_compression_toolkit.network_editor.NodeNameFilter(node_name)
    +
    +class model_compression_toolkit.core.network_editor.NodeNameFilter(node_name)

    Class NodeNameFilter to check if a node’s name has a specific value.

    Init a NodeNameFilter object.

    -
    Parameters
    +
    Parameters:

    node_name – Node name to check.

    @@ -97,12 +98,12 @@

    Filters
    -
    -class model_compression_toolkit.network_editor.NodeNameScopeFilter(node_name_scope)
    +
    +class model_compression_toolkit.core.network_editor.NodeNameScopeFilter(node_name_scope)

    Class NodeNameFilter to check if a string is in a node’s name.

    Init a NodeNameScopeFilter object.

    -
    Parameters
    +
    Parameters:

    node_name_scope – String to check if exists in node name.

    @@ -110,14 +111,17 @@

    Filters -

    Actions

    +

    Actions

    -
    -class model_compression_toolkit.network_editor.ChangeFinalWeightsQuantConfigAttr(**kwargs)
    +
    +class model_compression_toolkit.core.network_editor.ChangeFinalWeightsQuantConfigAttr(attr_name=None, **kwargs)

    Change attributes in a layer’s final weights quantization config.

    -
    Parameters
    -

    kwargs – Dictionary of attr_name and attr_value to change layer’s final weights quantization config.

    +
    Parameters:
    +
      +
    • attr_name – The weights attribute’s name to set the weights quantization params function for.

    • +
    • kwargs – Dictionary of attr_name and attr_value to change layer’s final weights quantization config.

    • +
    @@ -126,12 +130,15 @@

    Actions
    -
    -class model_compression_toolkit.network_editor.ChangeCandidatesWeightsQuantConfigAttr(**kwargs)
    +
    +class model_compression_toolkit.core.network_editor.ChangeCandidatesWeightsQuantConfigAttr(attr_name=None, **kwargs)

    Change attributes in a layer’s weights quantization configuration candidates.

    -
    Parameters
    -

    kwargs – Dictionary of attr_name and attr_value to change layer’s weights quantization configuration candidates.

    +
    Parameters:
    +
      +
    • attr_name – The weights attribute’s name to set the weights quantization params function for.

    • +
    • kwargs – Dictionary of attr_name and attr_value to change layer’s weights quantization configuration candidates.

    • +
    @@ -140,11 +147,11 @@

    Actions
    -
    -class model_compression_toolkit.network_editor.ChangeFinalActivationQuantConfigAttr(**kwargs)
    +
    +class model_compression_toolkit.core.network_editor.ChangeFinalActivationQuantConfigAttr(**kwargs)

    Change attributes in a layer’s final activation quantization config.

    -
    Parameters
    +
    Parameters:

    kwargs – Dictionary of attr_name and attr_value to change layer’s final activation quantization config.

    @@ -154,11 +161,11 @@

    Actions
    -
    -class model_compression_toolkit.network_editor.ChangeCandidatesActivationQuantConfigAttr(**kwargs)
    +
    +class model_compression_toolkit.core.network_editor.ChangeCandidatesActivationQuantConfigAttr(**kwargs)

    Change attributes in a layer’s activation quantization configuration candidates.

    -
    Parameters
    +
    Parameters:

    kwargs – Dictionary of attr_name and attr_value to change in the layer’s activation quantization configuration candidates.

    @@ -168,13 +175,14 @@

    Actions
    -
    -class model_compression_toolkit.network_editor.ChangeQuantizationParamFunction(activation_quantization_params_fn=None, weights_quantization_params_fn=None)
    +
    +class model_compression_toolkit.core.network_editor.ChangeQuantizationParamFunction(attr_name=None, activation_quantization_params_fn=None, weights_quantization_params_fn=None)

    Class ChangeQuantizationParamFunction to change a node’s weights/activations quantization params function.

    Init a ChangeQuantizationParamFunction object.

    -
    Parameters
    +
    Parameters:
      +
    • attr_name – The weights attribute’s name to set the weights quantization params function for (if setting weights params).

    • activation_quantization_params_fn – a params function for a node’s activations.

    • weights_quantization_params_fn – a params function for a node’s weights.

    @@ -186,13 +194,16 @@

    Actions
    -
    -class model_compression_toolkit.network_editor.ChangeFinalWeightsQuantizationMethod(weights_quantization_method=None)
    +
    +class model_compression_toolkit.core.network_editor.ChangeFinalWeightsQuantizationMethod(attr_name, weights_quantization_method=None)

    Class ChangeFinalWeightsQuantizationMethod to change a node’s weights/activations quantizer function.

    Init a ChangeFinalWeightsQuantizationMethod object.

    -
    Parameters
    -

    weights_quantization_method – a quantization method for a node’s weights.

    +
    Parameters:
    +
      +
    • attr_name – The weights attribute’s name to set the weights quantization method for.

    • +
    • weights_quantization_method – a quantization method for a node’s weights.

    • +
    @@ -201,13 +212,16 @@

    Actions
    -
    -class model_compression_toolkit.network_editor.ChangeCandidatesWeightsQuantizationMethod(weights_quantization_method=None)
    +
    +class model_compression_toolkit.core.network_editor.ChangeCandidatesWeightsQuantizationMethod(attr_name, weights_quantization_method=None)

    Class ChangeCandidatesWeightsQuantizationMethod to change a node’s weights quantizer function.

    Init a ChangeCandidatesWeightsQuantizationMethod object.

    -
    Parameters
    -

    weights_quantization_method – a quantization method for a node’s weights.

    +
    Parameters:
    +
      +
    • weights_quantization_method – a quantization method for a node’s weights.

    • +
    • attr_name – The weights attribute’s name to set the weights quantization params function for.

    • +
    @@ -216,12 +230,12 @@

    Actions
    -
    -class model_compression_toolkit.network_editor.ChangeCandidatesActivationQuantizationMethod(activation_quantization_method=None)
    +
    +class model_compression_toolkit.core.network_editor.ChangeCandidatesActivationQuantizationMethod(activation_quantization_method=None)

    Class ChangeQuantizationMethod to change a node’s activations quantizer function.

    Init a ChangeCandidatesActivationQuantizationMethod object.

    -
    Parameters
    +
    Parameters:

    activation_quantization_method – a quantization method for a node’s activations.

    @@ -258,7 +272,7 @@

    Quick search

    - +
    @@ -269,13 +283,13 @@

    Navigation

  • index
  • - + \ No newline at end of file diff --git a/docs/api/api_docs/modules/qat_config.html b/docs/api/api_docs/modules/qat_config.html new file mode 100644 index 000000000..e062c191c --- /dev/null +++ b/docs/api/api_docs/modules/qat_config.html @@ -0,0 +1,131 @@ + + + + + + + + + + qat_config Module — MCT Documentation: ver 1.11.0 + + + + + + + + + + + + + + + + + +
    +
    +
    +
    + +
    +

    qat_config Module

    +
    +

    TrainingMethod

    +

    Select a QAT training method:

    +
    +
    +class model_compression_toolkit.qat.TrainingMethod(value)
    +

    An enum for selecting a QAT training method

    +

    STE - Standard straight-through estimator. Includes PowerOfTwo, symmetric & uniform quantizers

    +

    DQA - DNN Quantization with Attention. Includes a smooth quantization introduces by DQA method

    +

    LSQ - Learned Step size Quantization. Includes PowerOfTwo, symmetric & uniform quantizers: https://arxiv.org/pdf/1902.08153.pdf

    +
    + +
    +

    +
    +
    +
    +

    QATConfig

    +

    Class to configure the quantization process of the model when quantizing using Quantization-aware Training (QAT):

    +
    +
    +class model_compression_toolkit.qat.QATConfig(weight_training_method=TrainingMethod.STE, activation_training_method=TrainingMethod.STE, weight_quantizer_params_override=None, activation_quantizer_params_override=None)
    +

    QAT configuration class.

    +
    +
    Parameters:
    +
      +
    • weight_training_method (TrainingMethod) – Training method for weight quantizers

    • +
    • activation_training_method (TrainingMethod) – Training method for activation quantizers:

    • +
    • weight_quantizer_params_override – A dictionary of parameters to override in weight quantization quantizer instantiation. Defaults to None (no parameters)

    • +
    • activation_quantizer_params_override – A dictionary of parameters to override in activation quantization quantizer instantiation. Defaults to None (no parameters)

    • +
    +
    +
    +
    + +
    +
    + + +
    +
    +
    +
    + +
    +
    + + + + \ No newline at end of file diff --git a/docs/api/api_docs/modules/quantization_config.html b/docs/api/api_docs/modules/quantization_config.html index 2bd31e1fe..2639a8d4a 100644 --- a/docs/api/api_docs/modules/quantization_config.html +++ b/docs/api/api_docs/modules/quantization_config.html @@ -2,12 +2,12 @@ - + - + - quantization_config Module — MCT Documentation: ver 1.4.0 + quantization_config Module — MCT Documentation: ver 1.11.0 @@ -15,6 +15,7 @@ + @@ -30,7 +31,7 @@

    Navigation

  • index
  • - + @@ -41,13 +42,13 @@

    Navigation

    -

    quantization_config Module

    +

    quantization_config Module

    -

    QuantizationErrorMethod

    +

    QuantizationErrorMethod

    Enum to select a method for quantization parameters’ selection:

    -
    -class model_compression_toolkit.QuantizationErrorMethod(value)
    +
    +class model_compression_toolkit.core.QuantizationErrorMethod(value)

    Method for quantization threshold selection:

    NOCLIPPING - Use min/max values as thresholds.

    MSE - Use min square error for minimizing quantization noise.

    @@ -61,20 +62,20 @@

    QuantizationErrorMethod

    -

    QuantizationConfig

    +

    QuantizationConfig

    Class to configure the quantization process of the model:

    -
    -class model_compression_toolkit.QuantizationConfig(activation_error_method=QuantizationErrorMethod.MSE, weights_error_method=QuantizationErrorMethod.MSE, relu_bound_to_power_of_2=False, weights_bias_correction=True, weights_per_channel_threshold=True, input_scaling=False, softmax_shift=False, shift_negative_activation_correction=False, activation_channel_equalization=False, z_threshold=math.inf, min_threshold=MIN_THRESHOLD, l_p_value=2, shift_negative_ratio=0.05, shift_negative_threshold_recalculation=False)
    +
    +class model_compression_toolkit.core.QuantizationConfig(activation_error_method=QuantizationErrorMethod.MSE, weights_error_method=QuantizationErrorMethod.MSE, relu_bound_to_power_of_2=False, weights_bias_correction=True, weights_second_moment_correction=False, input_scaling=False, softmax_shift=False, shift_negative_activation_correction=False, activation_channel_equalization=False, z_threshold=math.inf, min_threshold=MIN_THRESHOLD, l_p_value=2, linear_collapsing=True, residual_collapsing=True, shift_negative_ratio=0.05, shift_negative_threshold_recalculation=False, shift_negative_params_search=False)

    Class to wrap all different parameters the library quantize the input model according to.

    -
    Parameters
    +
    Parameters:
    @@ -130,7 +134,7 @@

    Quick search

    - +
    @@ -141,13 +145,13 @@

    Navigation

  • index
  • - + \ No newline at end of file diff --git a/docs/api/api_docs/modules/target_platform.html b/docs/api/api_docs/modules/target_platform.html index e3f421a7a..e57071ab8 100644 --- a/docs/api/api_docs/modules/target_platform.html +++ b/docs/api/api_docs/modules/target_platform.html @@ -2,12 +2,12 @@ - + - + - target_platform Module — MCT Documentation: ver 1.4.0 + target_platform Module — MCT Documentation: ver 1.11.0 @@ -15,6 +15,7 @@ + @@ -30,7 +31,7 @@

    Navigation

  • index
  • - + @@ -41,7 +42,7 @@

    Navigation

    -

    target_platform Module

    +

    target_platform Module

    MCT can be configured to quantize and optimize models for different hardware settings. For example, when using qnnpack backend for Pytorch model inference, Pytorch quantization configuration @@ -50,28 +51,30 @@

    Navigation

    Conv2D.

    This can be addressed in MCT by using the target_platform module, that can configure different parameters that are hardware-related, and the optimization process will use this to optimize the model accordingly. -Models for TFLite and qnnpack can be observed here, and can be used using get_target_platform_capabilities function.

    +Models for IMX500, TFLite and qnnpack can be observed here, and can be used using get_target_platform_capabilities function.


    Note

    -

    For now, fusing operators information from TargetPlatformModel -is ignored during the optimization process (fusing still occurs but using an existing mechanism). -Also, parts of OpQuantizationConfig is ignored (currently, -the quantizer type, number of bits, and quantization enable/disable information is used during the -optimization process).

    +

    For now, some fields of OpQuantizationConfig are ignored during +the optimization process (currently, the quantizer type, number of bits, and quantization enable/disable information +are in use).


    +

    The object MCT should get called TargetPlatformCapabilities (or shortly TPC). +This diagram demonstrates the main components:

    +../../../images/tpc.jpg +

    Now, we will explain about each component with examples.

    The first part is configuring the quantization method for both wights and activations of an operator. Several methods can be used using QuantizationMethod API:

    -

    QuantizationMethod

    +

    QuantizationMethod

    Select a method to use during quantization:

    @@ -79,9 +82,10 @@

    QuantizationMethod @@ -91,25 +95,23 @@

    QuantizationMethod -

    OpQuantizationConfig

    +

    OpQuantizationConfig

    -class model_compression_toolkit.target_platform.OpQuantizationConfig(activation_quantization_method, weights_quantization_method, activation_n_bits, weights_n_bits, weights_per_channel_threshold, enable_weights_quantization, enable_activation_quantization, quantization_preserving, fixed_scale, fixed_zero_point, weights_multiplier_nbits)
    +class model_compression_toolkit.target_platform.OpQuantizationConfig(default_weight_attr_config, attr_weights_configs_mapping, activation_quantization_method, activation_n_bits, enable_activation_quantization, quantization_preserving, fixed_scale, fixed_zero_point, simd_size)

    OpQuantizationConfig is a class to configure the quantization parameters of an operator.

    -
    Parameters
    +
    Parameters:
      +
    • default_weight_attr_config (AttributeQuantizationConfig) – A default attribute quantization configuration for the operation.

    • +
    • attr_weights_configs_mapping (dict) – A mapping between an op attribute name and its quantization configuration.

    • activation_quantization_method (QuantizationMethod) – Which method to use from QuantizationMethod for activation quantization.

    • -
    • weights_quantization_method (QuantizationMethod) – Which method to use from QuantizationMethod for weights quantization.

    • activation_n_bits (int) – Number of bits to quantize the activations.

    • -
    • weights_n_bits (int) – Number of bits to quantize the coefficients.

    • -
    • weights_per_channel_threshold (bool) – Whether to quantize the weights per-channel or not (per-tensor).

    • -
    • enable_weights_quantization (bool) – Whether to quantize the model weights or not.

    • enable_activation_quantization (bool) – Whether to quantize the model activations or not.

    • quantization_preserving (bool) – Whether quantization parameters should be the same for an operator’s input and output.

    • fixed_scale (float) – Scale to use for an operator quantization parameters.

    • fixed_zero_point (int) – Zero-point to use for an operator quantization parameters.

    • -
    • weights_multiplier_nbits (int) – Number of bits to use when quantizing in look-up-table.

    • +
    • simd_size (int) – Per op integer representing the Single Instruction, Multiple Data (SIMD) width of an operator. It indicates the number of data elements that can be fetched and processed simultaneously in a single instruction.

    @@ -141,14 +143,14 @@

    OpQuantizationConfig

    -

    QuantizationConfigOptions

    +

    QuantizationConfigOptions

    class model_compression_toolkit.target_platform.QuantizationConfigOptions(quantization_config_list, base_config=None)

    Wrap a set of quantization configurations to consider during the quantization of an operator.

    -
    Parameters
    +
    Parameters:
    -

    LayerFilterParams

    +

    LayerFilterParams

    class model_compression_toolkit.target_platform.LayerFilterParams(layer, *conditions, **kwargs)

    Wrap a layer with filters to filter framework’s layers by their attributes.

    -
    Parameters
    +
    Parameters:
    • layer – Layer to match when filtering.

    • *conditions (AttributeFilter) – List of conditions to satisfy.

    • @@ -591,16 +598,17 @@

      LayerFilterParams -

      TargetPlatformCapabilities

      +

      TargetPlatformCapabilities

      -class model_compression_toolkit.target_platform.TargetPlatformCapabilities(tp_model, name='base')
      +class model_compression_toolkit.target_platform.TargetPlatformCapabilities(tp_model, name='base', version=None)

      Attach framework information to a modeled hardware.

      -
      Parameters
      +
      Parameters:
      • tp_model (TargetPlatformModel) – Modeled hardware to attach framework information to.

      • name (str) – Name of the TargetPlatformCapabilities.

      • +
      • version (str) – TPC version.

      @@ -611,87 +619,79 @@

      TargetPlatformCapabilities -

      TargetPlatformCapabilities Code Example

      -
      
      -import tensorflow as tf
      +

      TargetPlatformCapabilities Code Example

      +
      from typing import List, Tuple
       
      -from model_compression_toolkit.common.target_platform import TargetPlatformModel
      +import model_compression_toolkit as mct
      +from model_compression_toolkit.constants import FLOAT_BITWIDTH
      +from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS
      +from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \
      +    TargetPlatformModel
      +from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \
      +    AttributeQuantizationConfig
       
      -if tf.__version__ < "2.6":
      -    from tensorflow.keras.layers import Conv2D, DepthwiseConv2D, Dense, Conv2DTranspose, Reshape, ZeroPadding2D, Dropout, \
      -        MaxPooling2D, Activation, ReLU, Add, PReLU, Flatten, Cropping2D
      -else:
      -    from keras.layers import Conv2D, DepthwiseConv2D, Dense, Conv2DTranspose, Reshape, ZeroPadding2D, \
      -    Dropout, MaxPooling2D, Activation, ReLU, Add, PReLU, Flatten, Cropping2D
      +tp = mct.target_platform
       
      -from model_compression_toolkit.tpc_models.default_tp_model import get_default_tp_model
       
      -import model_compression_toolkit as mct
      -tpc = mct.target_platform
      +def get_tp_model() -> TargetPlatformModel:
      +    """
      +    A method that generates a default target platform model, with base 8-bit quantization configuration and 8, 4, 2
      +    bits configuration list for mixed-precision quantization.
      +    NOTE: in order to generate a target platform model with different configurations but with the same Operators Sets
      +    (for tests, experiments, etc.), use this method implementation as a test-case, i.e., override the
      +    'get_op_quantization_configs' method and use its output to call 'generate_tp_model' with your configurations.
       
      +    Returns: A TargetPlatformModel object.
       
      -def get_default_keras_tpc():
      -    default_tp_model = get_default_tp_model()
      -    return generate_keras_default_tpc(name='default_keras_tpc',
      -                                      tp_model=default_tp_model)
      +    """
      +    base_config, mixed_precision_cfg_list, default_config = get_op_quantization_configs()
      +    return generate_tp_model(default_config=default_config,
      +                             base_config=base_config,
      +                             mixed_precision_cfg_list=mixed_precision_cfg_list,
      +                             name='imx500_tp_model')
       
       
      -def generate_keras_default_tpc(name: str, tp_model: TargetPlatformModel):
      -    """
      -    Generates a TargetPlatformCapabilities object with default operation sets to layers mapping.
      +def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantizationConfig], OpQuantizationConfig]:
      +    """
      +    Creates a default configuration object for 8-bit quantization, to be used to set a default TargetPlatformModel.
      +    In addition, creates a default configuration objects list (with 8, 4 and 2 bit quantization) to be used as
      +    default configuration for mixed-precision quantization.
       
      -    Args:
      -        name: Name of the TargetPlatformCapabilities.
      -        tp_model: TargetPlatformModel object.
      +    Returns: An OpQuantizationConfig config object and a list of OpQuantizationConfig objects.
       
      -    Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel.
           """
       
      -    keras_tpc = tpc.TargetPlatformCapabilities(tp_model,
      -                                                name=name)
      -    with keras_tpc:
      -        tpc.OperationsSetToLayers("NoQuantization", [Reshape,
      -                                                     tf.reshape,
      -                                                     Flatten,
      -                                                     Cropping2D,
      -                                                     ZeroPadding2D,
      -                                                     Dropout,
      -                                                     MaxPooling2D,
      -                                                     tf.split,
      -                                                     tf.quantization.fake_quant_with_min_max_vars,
      -                                                     tf.math.argmax,
      -                                                     tf.shape,
      -                                                     tf.__operators__.getitem])
      -
      -        tpc.OperationsSetToLayers("Conv", [Conv2D,
      -                                           DepthwiseConv2D,
      -                                           tf.nn.conv2d,
      -                                           tf.nn.depthwise_conv2d])
      -
      -        tpc.OperationsSetToLayers("FullyConnected", [Dense])
      -
      -        tpc.OperationsSetToLayers("ConvTranspose", [Conv2DTranspose,
      -                                                    tf.nn.conv2d_transpose])
      -
      -        tpc.OperationsSetToLayers("AnyReLU", [tf.nn.relu,
      -                                              tf.nn.relu6,
      -                                              tpc.LayerFilterParams(ReLU, negative_slope=0.0),
      -                                              tpc.LayerFilterParams(Activation, activation="relu")])
      -
      -        tpc.OperationsSetToLayers("Add", [tf.add,
      -                                          Add])
      -
      -        tpc.OperationsSetToLayers("PReLU", [PReLU])
      -
      -        tpc.OperationsSetToLayers("Swish", [tf.nn.swish,
      -                                            tpc.LayerFilterParams(Activation, activation="swish")])
      -
      -        tpc.OperationsSetToLayers("Sigmoid", [tf.nn.sigmoid,
      -                                              tpc.LayerFilterParams(Activation, activation="sigmoid")])
      -
      -        tpc.OperationsSetToLayers("Tanh", [tf.nn.tanh,
      -                                           tpc.LayerFilterParams(Activation, activation="tanh")])
      -    return keras_tpc
      +    # TODO: currently, we don't want to quantize any attribute but the kernel by default,
      +    #  to preserve the current behavior of MCT, so quantization is disabled for all other attributes.
      +    #  Other quantization parameters are set to what we eventually want to quantize by default
      +    #  when we enable multi-attributes quantization - THIS NEED TO BE MODIFIED IN ALL TP MODELS!
      +
      +    # define a default quantization config for all non-specified weights attributes.
      +    default_weight_attr_config = AttributeQuantizationConfig(
      +        weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
      +        weights_n_bits=8,
      +        weights_per_channel_threshold=False,
      +        enable_weights_quantization=False,  # TODO: this will changed to True once implementing multi-attributes quantization
      +        lut_values_bitwidth=None)
      +
      +    # define a quantization config to quantize the kernel (for layers where there is a kernel attribute).
      +    kernel_base_config = AttributeQuantizationConfig(
      +        weights_quantization_method=tp.QuantizationMethod.SYMMETRIC,
      +        weights_n_bits=8,
      +        weights_per_channel_threshold=True,
      +        enable_weights_quantization=True,
      +        lut_values_bitwidth=None)
      +
      +    # define a quantization config to quantize the bias (for layers where there is a bias attribute).
      +    bias_config = AttributeQuantizationConfig(
      +        weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
      +        weights_n_bits=FLOAT_BITWIDTH,
      +        weights_per_channel_threshold=False,
      +        enable_weights_quantization=False,
      +        lut_values_bitwidth=None)
      +
      +    # Create a quantization config.
      +    # A quantization configuration defines how an operator
       

    @@ -734,7 +734,7 @@

    Quick search

    - +
    @@ -745,13 +745,13 @@

    Navigation

  • index
  • - + \ No newline at end of file diff --git a/docs/api/api_docs/modules/trainable_infrastructure.html b/docs/api/api_docs/modules/trainable_infrastructure.html new file mode 100644 index 000000000..db6e20290 --- /dev/null +++ b/docs/api/api_docs/modules/trainable_infrastructure.html @@ -0,0 +1,216 @@ + + + + + + + + + + trainable_infrastructure Module — MCT Documentation: ver 1.11.0 + + + + + + + + + + + + + + + + + +
    +
    +
    +
    + +
    +

    trainable_infrastructure Module

    +

    The trainable infrastructure is a module containing quantization abstraction and quantizers for hardware-oriented model optimization tools. +It provides the required abstraction for trainable quantization methods such as quantization-aware training. +It utilizes the Inferable Quantizers Infrastructure provided by the MCT Quantizers package, which proposes the required abstraction for emulating inference-time quantization.

    +

    When using a trainable quantizer, each layer with quantized weights is wrapped with a “Quantization Wrapper” object, +and each activation quantizer is being stored in an “Activation Quantization Holder” object. +Both components are provided by the MCT Quantizers package.

    +

    The quantizers in this module are built upon the “Inferable Quantizer” abstraction (from MCT Quantizers), +and define the “Trainable Quantizer” framework, +which contains learnable quantization parameters that can be optimized during training.

    +

    Now, we will explain how a trainable quantizer is built and used. +We start by explaining the basic building block of a trainable quantizer, and then explain how to initialize it using a configuration object.

    +
    +

    BaseKerasTrainableQuantizer

    +

    This class is a base class for trainable Keras quantizers which validates provided quantization config and defines an abstract function which any quantizer needs to implement. +It adds to the base quantizer a get_config and from_config functions to enable loading and saving the keras model.

    +
    +
    +class model_compression_toolkit.trainable_infrastructure.BaseKerasTrainableQuantizer(quantization_config)
    +

    This class is a base quantizer which validates provided quantization config and defines an abstract function which any quantizer needs to implement. +This class adds to the base quantizer a get_config and from_config functions to enable loading and saving the keras model.

    +
    +
    Parameters:
    +

    quantization_config – quantizer config class contains all the information about a quantizer configuration.

    +
    +
    +
    + +
    +
    +

    BasePytorchTrainableQuantizer

    +

    This class is a base class for trainable Pytorch quantizers which validates provided quantization config and defines an abstract function which any quantizer needs to implement. +It adds to the base quantizer a get_config and from_config functions to enable loading and saving the keras model.

    +
    +
    +class model_compression_toolkit.trainable_infrastructure.BasePytorchTrainableQuantizer(quantization_config)
    +

    This class is a base Pytorch quantizer which validates the provided quantization config and defines an +abstract function which any quantizer needs to implement.

    +
    +
    Parameters:
    +

    quantization_config – quantizer config class contains all the information about the quantizer configuration.

    +
    +
    +
    + +
    +
    +

    TrainableQuantizerWeightsConfig

    +

    This configuration object contains the necessary attributes for configuring a weights trainable quantizer.

    +
    +
    +class model_compression_toolkit.trainable_infrastructure.TrainableQuantizerWeightsConfig(weights_quantization_method, weights_n_bits, weights_quantization_params, enable_weights_quantization, weights_channels_axis, weights_per_channel_threshold, min_threshold, weights_quantization_candidates=None)
    +

    Attributes for configuring weights trainable quantizer.

    +
    +
    Parameters:
    +
      +
    • weights_quantization_method (QuantizationMethod) – Which method to use from QuantizationMethod for weights quantization.

    • +
    • weights_n_bits (int) – Number of bits to quantize the coefficients.

    • +
    • weights_quantization_params (Dict) – Dictionary that contains weights quantization params.

    • +
    • enable_weights_quantization (bool) – Whether to quantize the layer’s weights or not.

    • +
    • weights_channels_axis (int) – Axis to quantize a node’s kernel when quantizing per-channel.

    • +
    • weights_per_channel_threshold (bool) – Whether to quantize the weights per-channel or not (per-tensor).

    • +
    • min_threshold (float) – Minimum threshold to use during thresholds selection.

    • +
    +
    +
    +
    + +

    For example, we can set a trainable weights quantizer with the following configuration:

    +
    from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod
    +from model_compression_toolkit.constants import THRESHOLD, MIN_THRESHOLD
    +
    +TrainableQuantizerWeightsConfig(weights_quantization_method=QuantizationMethod.SYMMETRIC,
    +                                           weights_n_bits=8,
    +                                           weights_quantization_params={THRESHOLD: 2.0},
    +                                           enable_weights_quantization=True,
    +                                           weights_channels_axis=3,
    +                                           weights_per_channel_threshold=True,
    +                                           min_threshold=MIN_THRESHOLD)
    +
    +
    +
    +

    +
    +
    +
    +

    TrainableQuantizerActivationConfig

    +

    This configuration object contains the necessary attributes for configuring an activation trainable quantizer.

    +
    +
    +class model_compression_toolkit.trainable_infrastructure.TrainableQuantizerActivationConfig(activation_quantization_method, activation_n_bits, activation_quantization_params, enable_activation_quantization, min_threshold, activation_quantization_candidates=None)
    +

    Attributes for configuring activations trainable quantizer.

    +
    +
    Parameters:
    +
      +
    • activation_quantization_method (QuantizationMethod) – Which method to use from QuantizationMethod for activation quantization.

    • +
    • activation_n_bits (int) – Number of bits to quantize the activations.

    • +
    • activation_quantization_params (Dict) – Dictionary that contains activation quantization params.

    • +
    • enable_activation_quantization (bool) – Whether to quantize the layer’s activations or not.

    • +
    • min_threshold (float) – Minimum threshold to use during thresholds selection.

    • +
    +
    +
    +
    + +

    For example, we can set a trainable activation quantizer with the following configuration:

    +
    from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod
    +from model_compression_toolkit.constants import THRESHOLD, MIN_THRESHOLD
    +
    +TrainableQuantizerActivationConfig(activation_quantization_method=QuantizationMethod.UNIFORM,
    +                                              activation_n_bits=8,
    +                                              activation_quantization_params=={THRESHOLD: 2.0},
    +                                              enable_activation_quantization=True,
    +                                              min_threshold=MIN_THRESHOLD)
    +
    +
    +
    +

    +
    +
    +
    + + +
    +
    +
    +
    + +
    +
    + + + + \ No newline at end of file diff --git a/docs/api/api_docs/notes/tpc_note.html b/docs/api/api_docs/notes/tpc_note.html index 3d79c2ff9..79e6b0b0c 100644 --- a/docs/api/api_docs/notes/tpc_note.html +++ b/docs/api/api_docs/notes/tpc_note.html @@ -2,12 +2,12 @@ - + - + - <no title> — MCT Documentation: ver 1.4.0 + <no title> — MCT Documentation: ver 1.11.0 @@ -15,6 +15,7 @@ + @@ -30,7 +31,7 @@

    Navigation

  • index
  • - + @@ -42,13 +43,11 @@

    Navigation

    Note

    -

    For now, fusing operators information from TargetPlatformModel -is ignored during the optimization process (fusing still occurs but using an existing mechanism). -Also, parts of OpQuantizationConfig is ignored (currently, -the quantizer type, number of bits, and quantization enable/disable information is used during the -optimization process).

    +

    For now, some fields of OpQuantizationConfig are ignored during +the optimization process (currently, the quantizer type, number of bits, and quantization enable/disable information +are in use).

    @@ -68,7 +67,7 @@

    Quick search

    - +
    @@ -79,13 +78,13 @@

    Navigation

  • index
  • - + \ No newline at end of file diff --git a/docs/genindex.html b/docs/genindex.html index 349de2131..9fb67ca01 100644 --- a/docs/genindex.html +++ b/docs/genindex.html @@ -65,11 +65,11 @@

    Index

    B

    @@ -77,29 +77,29 @@

    B

    C

    @@ -107,11 +107,11 @@

    C

    D

    @@ -119,11 +119,11 @@

    D

    E

    @@ -131,17 +131,15 @@

    E

    F

    @@ -149,29 +147,27 @@

    F

    G

    @@ -179,11 +175,11 @@

    G

    I

    @@ -191,35 +187,31 @@

    I

    K

    @@ -227,7 +219,7 @@

    K

    L

    @@ -235,11 +227,7 @@

    L

    M

    -
    @@ -247,17 +235,17 @@

    M

    N

    @@ -265,15 +253,15 @@

    N

    O

    @@ -281,35 +269,31 @@

    O

    P

    @@ -317,17 +301,19 @@

    P

    Q

    @@ -335,15 +321,15 @@

    Q

    S

    @@ -351,19 +337,17 @@

    S

    T

    diff --git a/docs/guidelines/quickstart_pytorch.html b/docs/guidelines/quickstart_pytorch.html index ba3a9d422..ab11858b8 100644 --- a/docs/guidelines/quickstart_pytorch.html +++ b/docs/guidelines/quickstart_pytorch.html @@ -20,7 +20,7 @@ - +