diff --git a/tests/test_suite.py b/tests/test_suite.py index c8340eb73..7163da948 100644 --- a/tests/test_suite.py +++ b/tests/test_suite.py @@ -28,8 +28,6 @@ from tests.common_tests.test_doc_examples import TestCommonDocsExamples from tests.common_tests.test_tp_model import TargetPlatformModelingTest, OpsetTest, QCOptionsTest, FusingTest -if FOUND_ONNX: - from tests.pytorch_tests.function_tests.test_export_pytorch_fully_quantized_model import TestPyTorchFakeQuantExporter found_tf = importlib.util.find_spec("tensorflow") is not None found_pytorch = importlib.util.find_spec("torch") is not None and importlib.util.find_spec( @@ -147,8 +145,6 @@ suiteList.append(unittest.TestLoader().loadTestsFromTestCase(FeatureModelsTestRunner)) suiteList.append(unittest.TestLoader().loadTestsFromTestCase(FunctionTestRunner)) # Exporter test of pytorch must have ONNX installed - if FOUND_ONNX: - suiteList.append(unittest.TestLoader().loadTestsFromTestCase(TestPyTorchFakeQuantExporter)) # suiteList.append(unittest.TestLoader().loadTestsFromName('test_mobilenet_v2', ModelTest)) # suiteList.append(unittest.TestLoader().loadTestsFromName('test_mobilenet_v3', ModelTest)) # suiteList.append(unittest.TestLoader().loadTestsFromName('test_efficientnet_b0', ModelTest)) diff --git a/tutorials/notebooks/example_keras_imagenet.ipynb b/tutorials/notebooks/example_keras_imagenet.ipynb index d43ffa869..e0dd1be7f 100644 --- a/tutorials/notebooks/example_keras_imagenet.ipynb +++ b/tutorials/notebooks/example_keras_imagenet.ipynb @@ -1,485 +1,486 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "f8194007-6ea7-4e00-8931-a37ca2d0dd20", - "metadata": { - "id": "f8194007-6ea7-4e00-8931-a37ca2d0dd20" - }, - "source": [ - "# Post Training Quantization using the Model Compression Toolkit - A Quick-Start Guide" - ] - }, - { - "cell_type": "markdown", - "id": "9be59ea8-e208-4b64-aede-1dd6270b3540", - "metadata": { - "id": "9be59ea8-e208-4b64-aede-1dd6270b3540" - }, - "source": [ - "[Run this tutorial in Google Colab](https://colab.research.google.com/github/sony/model_optimization/blob/main/tutorials/notebooks/example_keras_imagenet.ipynb)" - ] - }, - { - "cell_type": "markdown", - "id": "930e6d6d-4980-4d66-beed-9ff5a494acf9", - "metadata": { - "id": "930e6d6d-4980-4d66-beed-9ff5a494acf9" - }, - "source": [ - "## Overview" - ] - }, - { - "cell_type": "markdown", - "id": "699be4fd-d382-4eec-9d3f-e2e85cfb1762", - "metadata": { - "id": "699be4fd-d382-4eec-9d3f-e2e85cfb1762" - }, - "source": [ - "This tutorial shows how to quantize a pre-trained model using the Model Compression Toolkit (MCT). We will do so by giving an example of MCT's post-training quantization. As we will see, post-training quantization is a low complexity yet effective quantization method. In this example, we will quantize the model and evaluate the accuracy before and after quantization." - ] - }, - { - "cell_type": "markdown", - "id": "85199e25-c587-41b1-aaf5-e1d23ce97ca1", - "metadata": { - "id": "85199e25-c587-41b1-aaf5-e1d23ce97ca1" - }, - "source": [ - "## Summary" - ] - }, - { - "cell_type": "markdown", - "id": "9c0e9543-d356-412f-acf1-c2ecad553e06", - "metadata": { - "id": "9c0e9543-d356-412f-acf1-c2ecad553e06" - }, - "source": [ - "In this tutorial we will cover:\n", - "\n", - "1. Post-Training Quantization using MCT.\n", - "2. Loading and preprocessing ImageNet's validation dataset.\n", - "3. Loading and preprocessing an unlabeled representative dataset from the ImageNet trainset.\n", - "4. Accuracy evaluation of the floating-point and the quantized models." - ] - }, - { - "cell_type": "markdown", - "id": "04228b7c-00f1-4ded-bead-722e2a4e89a0", - "metadata": { - "tags": [], - "id": "04228b7c-00f1-4ded-bead-722e2a4e89a0" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "markdown", - "id": "2657cf1a-654d-45a6-b877-8bf42fc26d0d", - "metadata": { - "id": "2657cf1a-654d-45a6-b877-8bf42fc26d0d" - }, - "source": [ - "Install and import the relevant packages:\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "324685b9-5dcc-4d22-80f4-dec9a93d3324", - "metadata": { - "tags": [], - "id": "324685b9-5dcc-4d22-80f4-dec9a93d3324" - }, - "outputs": [], - "source": [ - "!pip install -q tensorflow\n", - "!pip install -q model-compression-toolkit" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b3f0acc8-281c-4bca-b0b9-3d7677105f19", - "metadata": { - "id": "b3f0acc8-281c-4bca-b0b9-3d7677105f19" - }, - "outputs": [], - "source": [ - "import tensorflow as tf\n", - "import keras\n", - "import model_compression_toolkit as mct" - ] - }, - { - "cell_type": "markdown", - "id": "0c7fed0d-cfc8-41ee-adf1-22a98110397b", - "metadata": { - "id": "0c7fed0d-cfc8-41ee-adf1-22a98110397b" - }, - "source": [ - "## Dataset preparation" - ] - }, - { - "cell_type": "markdown", - "id": "719a8ca4-6c62-4624-a1ec-662c03dde902", - "metadata": { - "id": "719a8ca4-6c62-4624-a1ec-662c03dde902" - }, - "source": [ - "Assuming we've downloaded ImageNet's training dataset to a folder, let's set the folder path:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c9893131-0a95-4472-aa42-a73bd8d50576", - "metadata": { - "tags": [], - "id": "c9893131-0a95-4472-aa42-a73bd8d50576" - }, - "outputs": [], - "source": [ - "TRAIN_DATASET_FOLDER = '/path/to/imagenet/training/dir'" - ] - }, - { - "cell_type": "markdown", - "id": "028112db-3143-4fcb-96ae-e639e6476c31", - "metadata": { - "id": "028112db-3143-4fcb-96ae-e639e6476c31" - }, - "source": [ - "Now, let's create two functions. The first is for preprocessing the dataset and the second is for creating an unlabeled representative dataset for quantization calibration. We will use a batch size of 50:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ed56f505-97ff-4acb-8ad8-ef09c53e9d57", - "metadata": { - "id": "ed56f505-97ff-4acb-8ad8-ef09c53e9d57" - }, - "outputs": [], - "source": [ - "def imagenet_preprocess_input(images, labels):\n", - " return tf.keras.applications.mobilenet_v2.preprocess_input(images), labels" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0408f624-ab68-4989-95f8-f9d327882840", - "metadata": { - "id": "0408f624-ab68-4989-95f8-f9d327882840" - }, - "outputs": [], - "source": [ - "BATCH_SIZE = 50\n", - "n_iter=10\n", - "\n", - "def get_representative_dataset():\n", - " print('loading dataset, this may take few minutes ...')\n", - " dataset = tf.keras.utils.image_dataset_from_directory(\n", - " directory=TRAIN_DATASET_FOLDER,\n", - " batch_size=BATCH_SIZE,\n", - " image_size=[224, 224],\n", - " shuffle=True,\n", - " crop_to_aspect_ratio=True,\n", - " interpolation='bilinear')\n", - " dataset = dataset.map(lambda x, y: (imagenet_preprocess_input(x, y)))\n", - "\n", - " def representative_dataset():\n", - " for _ in range(n_iter):\n", - " yield dataset.take(1).get_single_element()[0].numpy()\n", - "\n", - " return representative_dataset\n", - "representative_dataset_gen = get_representative_dataset()" - ] - }, - { - "cell_type": "markdown", - "id": "4a1e9ba6-2954-4506-ad5c-0da273701ba5", - "metadata": { - "id": "4a1e9ba6-2954-4506-ad5c-0da273701ba5" - }, - "source": [ - "## Model post training quantization using MCT" - ] - }, - { - "cell_type": "markdown", - "id": "55edbb99-ab2f-4dde-aa74-4ddee61b2615", - "metadata": { - "id": "55edbb99-ab2f-4dde-aa74-4ddee61b2615" - }, - "source": [ - "Now for the main part.\n", - "\n", - "First, let's load a pre-trained mobilenet-v2 model from Keras, in 32-bits floating-point precision format:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "80cac59f-ec5e-41ca-b673-96220924a47c", - "metadata": { - "id": "80cac59f-ec5e-41ca-b673-96220924a47c" - }, - "outputs": [], - "source": [ - "from keras.applications.mobilenet_v2 import MobileNetV2\n", - "float_model = MobileNetV2()" - ] - }, - { - "cell_type": "markdown", - "id": "8a8b486a-ca39-45d9-8699-f7116b0414c9", - "metadata": { - "id": "8a8b486a-ca39-45d9-8699-f7116b0414c9" - }, - "source": [ - "Now, we apply post-training quantization on the model. In this example, we use the default 8-bits precision and 10 calibration iterations over the representative dataset:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "33f8373a-82a5-4b97-9a10-25ee2341d148", - "metadata": { - "id": "33f8373a-82a5-4b97-9a10-25ee2341d148" - }, - "outputs": [], - "source": [ - "quantized_model, quantization_info = mct.ptq.keras_post_training_quantization_experimental(float_model, representative_dataset_gen)" - ] - }, - { - "cell_type": "markdown", - "id": "7382ada6-d001-4564-907d-767fa4e9ec56", - "metadata": { - "id": "7382ada6-d001-4564-907d-767fa4e9ec56" - }, - "source": [ - "That's it! Our model is now quantized." - ] - }, - { - "cell_type": "markdown", - "id": "5a7a5150-3b92-49b5-abb2-06e6c5c91d6b", - "metadata": { - "id": "5a7a5150-3b92-49b5-abb2-06e6c5c91d6b" - }, - "source": [ - "## Models evaluation" - ] - }, - { - "cell_type": "markdown", - "id": "0ce4fc61-e13c-48be-9f7c-d441ad76a386", - "metadata": { - "id": "0ce4fc61-e13c-48be-9f7c-d441ad76a386" - }, - "source": [ - "In order to evaluate our models, we first need to load the validation dataset. As before, let's assume we downloaded the ImageNet validation dataset to a folder with the path below:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "eef7c875-c4fc-4819-97e5-721805cba546", - "metadata": { - "tags": [], - "id": "eef7c875-c4fc-4819-97e5-721805cba546" - }, - "outputs": [], - "source": [ - "TEST_DATASET_FOLDER = '/path/to/imagenet/test/dir'\n", - "def get_validation_dataset():\n", - " dataset = tf.keras.utils.image_dataset_from_directory(\n", - " directory=TEST_DATASET_FOLDER,\n", - " batch_size=BATCH_SIZE,\n", - " image_size=[224, 224],\n", - " shuffle=False,\n", - " crop_to_aspect_ratio=True,\n", - " interpolation='bilinear')\n", - " dataset = dataset.map(lambda x, y: (imagenet_preprocess_input(x, y)))\n", - " return dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fd661b39-e033-4efc-a916-f97a1642cb36", - "metadata": { - "id": "fd661b39-e033-4efc-a916-f97a1642cb36" - }, - "outputs": [], - "source": [ - "evaluation_dataset = get_validation_dataset()" - ] - }, - { - "cell_type": "markdown", - "id": "9889d217-90a6-4615-8569-38dc9cdd5999", - "metadata": { - "id": "9889d217-90a6-4615-8569-38dc9cdd5999" - }, - "source": [ - "Let's start with the floating-point model evaluation.\n", - "\n", - "We need to compile the model before evaluation and set the loss and the evaluation metric:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1d3a0ae9-beaa-4af8-8481-49d4917c2209", - "metadata": { - "id": "1d3a0ae9-beaa-4af8-8481-49d4917c2209" - }, - "outputs": [], - "source": [ - "float_model.compile(loss=keras.losses.SparseCategoricalCrossentropy(), metrics=[\"accuracy\"])\n", - "results = float_model.evaluate(evaluation_dataset)" - ] - }, - { - "cell_type": "markdown", - "id": "ead4a6f3-86a0-4e6c-8229-a2ff514f7b8c", - "metadata": { - "id": "ead4a6f3-86a0-4e6c-8229-a2ff514f7b8c" - }, - "source": [ - "Finally, let's evaluate the quantized model:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1bc377ee-39b4-4ced-95db-f7d51ab60848", - "metadata": { - "id": "1bc377ee-39b4-4ced-95db-f7d51ab60848" - }, - "outputs": [], - "source": [ - "quantized_model.compile(loss=keras.losses.SparseCategoricalCrossentropy(), metrics=[\"accuracy\"])\n", - "results = quantized_model.evaluate(evaluation_dataset)" - ] - }, - { - "cell_type": "markdown", - "id": "ebfbb4de-5b6e-4732-83d3-a21e96cdd866", - "metadata": { - "id": "ebfbb4de-5b6e-4732-83d3-a21e96cdd866" - }, - "source": [ - "You can see that we got a very small degradation with a compression rate of x4 !" - ] - }, - { - "cell_type": "markdown", - "source": [ - "Now, we can export the model to Keras and TFLite:" - ], - "metadata": { - "id": "6YjIdiRRjgkL" - }, - "id": "6YjIdiRRjgkL" - }, - { - "cell_type": "code", - "source": [ - "import tempfile\n", - "_, tflite_file_path = tempfile.mkstemp('.tflite') # Path of exported model\n", - "mct.exporter.keras_export_model(model=quantized_model, save_model_path=tflite_file_path,\n", - " target_platform_capabilities=target_platform_cap,\n", - " serialization_format=mct.exporter.KerasExportSerializationFormat.TFLITE)\n", - "\n", - "_, keras_file_path = tempfile.mkstemp('.h5') # Path of exported model\n", - "mct.exporter.keras_export_model(model=quantized_model, save_model_path=keras_file_path,\n", - " target_platform_capabilities=target_platform_cap,\n", - " serialization_format=mct.exporter.KerasExportSerializationFormat.KERAS_H5)" - ], - "metadata": { - "id": "z3CA16-ojoFL" - }, - "id": "z3CA16-ojoFL", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "id": "14877777", - "metadata": { - "id": "14877777" - }, - "source": [ - "## Conclusion" - ] - }, - { - "cell_type": "markdown", - "id": "bb7e1572", - "metadata": { - "id": "bb7e1572" - }, - "source": [ - "In this tutorial, we demonstrated how to quantize a pre-trained model using MCT with a few lines of code. We saw that we can achieve an x4 compression ratio with minimal performance degradation.\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "01c1645e-205c-4d9a-8af3-e497b3addec1", - "metadata": { - "id": "01c1645e-205c-4d9a-8af3-e497b3addec1" - }, - "source": [ - "\n", - "\n", - "Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved.\n", - "\n", - "Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "you may not use this file except in compliance with the License.\n", - "You may obtain a copy of the License at\n", - "\n", - " http://www.apache.org/licenses/LICENSE-2.0\n", - "\n", - "Unless required by applicable law or agreed to in writing, software\n", - "distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "See the License for the specific language governing permissions and\n", - "limitations under the License.\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.7" - }, - "colab": { - "provenance": [] - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} \ No newline at end of file + "cells": [ + { + "cell_type": "markdown", + "id": "f8194007-6ea7-4e00-8931-a37ca2d0dd20", + "metadata": { + "id": "f8194007-6ea7-4e00-8931-a37ca2d0dd20" + }, + "source": [ + "# Post Training Quantization using the Model Compression Toolkit - A Quick-Start Guide" + ] + }, + { + "cell_type": "markdown", + "id": "9be59ea8-e208-4b64-aede-1dd6270b3540", + "metadata": { + "id": "9be59ea8-e208-4b64-aede-1dd6270b3540" + }, + "source": [ + "[Run this tutorial in Google Colab](https://colab.research.google.com/github/sony/model_optimization/blob/main/tutorials/notebooks/example_keras_imagenet.ipynb)" + ] + }, + { + "cell_type": "markdown", + "id": "930e6d6d-4980-4d66-beed-9ff5a494acf9", + "metadata": { + "id": "930e6d6d-4980-4d66-beed-9ff5a494acf9" + }, + "source": [ + "## Overview" + ] + }, + { + "cell_type": "markdown", + "id": "699be4fd-d382-4eec-9d3f-e2e85cfb1762", + "metadata": { + "id": "699be4fd-d382-4eec-9d3f-e2e85cfb1762" + }, + "source": [ + "This tutorial shows how to quantize a pre-trained model using the Model Compression Toolkit (MCT). We will do so by giving an example of MCT's post-training quantization. As we will see, post-training quantization is a low complexity yet effective quantization method. In this example, we will quantize the model and evaluate the accuracy before and after quantization." + ] + }, + { + "cell_type": "markdown", + "id": "85199e25-c587-41b1-aaf5-e1d23ce97ca1", + "metadata": { + "id": "85199e25-c587-41b1-aaf5-e1d23ce97ca1" + }, + "source": [ + "## Summary" + ] + }, + { + "cell_type": "markdown", + "id": "9c0e9543-d356-412f-acf1-c2ecad553e06", + "metadata": { + "id": "9c0e9543-d356-412f-acf1-c2ecad553e06" + }, + "source": [ + "In this tutorial we will cover:\n", + "\n", + "1. Post-Training Quantization using MCT.\n", + "2. Loading and preprocessing ImageNet's validation dataset.\n", + "3. Loading and preprocessing an unlabeled representative dataset from the ImageNet trainset.\n", + "4. Accuracy evaluation of the floating-point and the quantized models." + ] + }, + { + "cell_type": "markdown", + "id": "04228b7c-00f1-4ded-bead-722e2a4e89a0", + "metadata": { + "tags": [], + "id": "04228b7c-00f1-4ded-bead-722e2a4e89a0" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "markdown", + "id": "2657cf1a-654d-45a6-b877-8bf42fc26d0d", + "metadata": { + "id": "2657cf1a-654d-45a6-b877-8bf42fc26d0d" + }, + "source": [ + "Install and import the relevant packages:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "324685b9-5dcc-4d22-80f4-dec9a93d3324", + "metadata": { + "tags": [], + "id": "324685b9-5dcc-4d22-80f4-dec9a93d3324" + }, + "outputs": [], + "source": [ + "!pip install -q tensorflow\n", + "!pip install -q model-compression-toolkit" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3f0acc8-281c-4bca-b0b9-3d7677105f19", + "metadata": { + "id": "b3f0acc8-281c-4bca-b0b9-3d7677105f19" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import keras\n", + "import model_compression_toolkit as mct" + ] + }, + { + "cell_type": "markdown", + "id": "0c7fed0d-cfc8-41ee-adf1-22a98110397b", + "metadata": { + "id": "0c7fed0d-cfc8-41ee-adf1-22a98110397b" + }, + "source": [ + "## Dataset preparation" + ] + }, + { + "cell_type": "markdown", + "id": "719a8ca4-6c62-4624-a1ec-662c03dde902", + "metadata": { + "id": "719a8ca4-6c62-4624-a1ec-662c03dde902" + }, + "source": [ + "Assuming we've downloaded ImageNet's training dataset to a folder, let's set the folder path:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c9893131-0a95-4472-aa42-a73bd8d50576", + "metadata": { + "tags": [], + "id": "c9893131-0a95-4472-aa42-a73bd8d50576" + }, + "outputs": [], + "source": [ + "TRAIN_DATASET_FOLDER = '/path/to/imagenet/training/dir'" + ] + }, + { + "cell_type": "markdown", + "id": "028112db-3143-4fcb-96ae-e639e6476c31", + "metadata": { + "id": "028112db-3143-4fcb-96ae-e639e6476c31" + }, + "source": [ + "Now, let's create two functions. The first is for preprocessing the dataset and the second is for creating an unlabeled representative dataset for quantization calibration. We will use a batch size of 50:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ed56f505-97ff-4acb-8ad8-ef09c53e9d57", + "metadata": { + "id": "ed56f505-97ff-4acb-8ad8-ef09c53e9d57" + }, + "outputs": [], + "source": [ + "def imagenet_preprocess_input(images, labels):\n", + " return tf.keras.applications.mobilenet_v2.preprocess_input(images), labels" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0408f624-ab68-4989-95f8-f9d327882840", + "metadata": { + "id": "0408f624-ab68-4989-95f8-f9d327882840" + }, + "outputs": [], + "source": [ + "BATCH_SIZE = 50\n", + "n_iter=10\n", + "\n", + "def get_representative_dataset():\n", + " print('loading dataset, this may take few minutes ...')\n", + " dataset = tf.keras.utils.image_dataset_from_directory(\n", + " directory=TRAIN_DATASET_FOLDER,\n", + " batch_size=BATCH_SIZE,\n", + " image_size=[224, 224],\n", + " shuffle=True,\n", + " crop_to_aspect_ratio=True,\n", + " interpolation='bilinear')\n", + " dataset = dataset.map(lambda x, y: (imagenet_preprocess_input(x, y)))\n", + "\n", + " def representative_dataset():\n", + " for _ in range(n_iter):\n", + " yield dataset.take(1).get_single_element()[0].numpy()\n", + "\n", + " return representative_dataset\n", + "representative_dataset_gen = get_representative_dataset()" + ] + }, + { + "cell_type": "markdown", + "id": "4a1e9ba6-2954-4506-ad5c-0da273701ba5", + "metadata": { + "id": "4a1e9ba6-2954-4506-ad5c-0da273701ba5" + }, + "source": [ + "## Model post training quantization using MCT" + ] + }, + { + "cell_type": "markdown", + "id": "55edbb99-ab2f-4dde-aa74-4ddee61b2615", + "metadata": { + "id": "55edbb99-ab2f-4dde-aa74-4ddee61b2615" + }, + "source": [ + "Now for the main part.\n", + "\n", + "First, let's load a pre-trained mobilenet-v2 model from Keras, in 32-bits floating-point precision format:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "80cac59f-ec5e-41ca-b673-96220924a47c", + "metadata": { + "id": "80cac59f-ec5e-41ca-b673-96220924a47c" + }, + "outputs": [], + "source": [ + "from keras.applications.mobilenet_v2 import MobileNetV2\n", + "float_model = MobileNetV2()" + ] + }, + { + "cell_type": "markdown", + "id": "8a8b486a-ca39-45d9-8699-f7116b0414c9", + "metadata": { + "id": "8a8b486a-ca39-45d9-8699-f7116b0414c9" + }, + "source": [ + "Now, we apply post-training quantization on the model. In this example, we use the default 8-bits precision and 10 calibration iterations over the representative dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "33f8373a-82a5-4b97-9a10-25ee2341d148", + "metadata": { + "id": "33f8373a-82a5-4b97-9a10-25ee2341d148" + }, + "outputs": [], + "source": [ + "quantized_model, quantization_info = mct.ptq.keras_post_training_quantization_experimental(float_model, representative_dataset_gen)" + ] + }, + { + "cell_type": "markdown", + "id": "7382ada6-d001-4564-907d-767fa4e9ec56", + "metadata": { + "id": "7382ada6-d001-4564-907d-767fa4e9ec56" + }, + "source": [ + "That's it! Our model is now quantized." + ] + }, + { + "cell_type": "markdown", + "id": "5a7a5150-3b92-49b5-abb2-06e6c5c91d6b", + "metadata": { + "id": "5a7a5150-3b92-49b5-abb2-06e6c5c91d6b" + }, + "source": [ + "## Models evaluation" + ] + }, + { + "cell_type": "markdown", + "id": "0ce4fc61-e13c-48be-9f7c-d441ad76a386", + "metadata": { + "id": "0ce4fc61-e13c-48be-9f7c-d441ad76a386" + }, + "source": [ + "In order to evaluate our models, we first need to load the validation dataset. As before, let's assume we downloaded the ImageNet validation dataset to a folder with the path below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eef7c875-c4fc-4819-97e5-721805cba546", + "metadata": { + "tags": [], + "id": "eef7c875-c4fc-4819-97e5-721805cba546" + }, + "outputs": [], + "source": [ + "TEST_DATASET_FOLDER = '/path/to/imagenet/test/dir'\n", + "def get_validation_dataset():\n", + " dataset = tf.keras.utils.image_dataset_from_directory(\n", + " directory=TEST_DATASET_FOLDER,\n", + " batch_size=BATCH_SIZE,\n", + " image_size=[224, 224],\n", + " shuffle=False,\n", + " crop_to_aspect_ratio=True,\n", + " interpolation='bilinear')\n", + " dataset = dataset.map(lambda x, y: (imagenet_preprocess_input(x, y)))\n", + " return dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fd661b39-e033-4efc-a916-f97a1642cb36", + "metadata": { + "id": "fd661b39-e033-4efc-a916-f97a1642cb36" + }, + "outputs": [], + "source": [ + "evaluation_dataset = get_validation_dataset()" + ] + }, + { + "cell_type": "markdown", + "id": "9889d217-90a6-4615-8569-38dc9cdd5999", + "metadata": { + "id": "9889d217-90a6-4615-8569-38dc9cdd5999" + }, + "source": [ + "Let's start with the floating-point model evaluation.\n", + "\n", + "We need to compile the model before evaluation and set the loss and the evaluation metric:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1d3a0ae9-beaa-4af8-8481-49d4917c2209", + "metadata": { + "id": "1d3a0ae9-beaa-4af8-8481-49d4917c2209" + }, + "outputs": [], + "source": [ + "float_model.compile(loss=keras.losses.SparseCategoricalCrossentropy(), metrics=[\"accuracy\"])\n", + "results = float_model.evaluate(evaluation_dataset)" + ] + }, + { + "cell_type": "markdown", + "id": "ead4a6f3-86a0-4e6c-8229-a2ff514f7b8c", + "metadata": { + "id": "ead4a6f3-86a0-4e6c-8229-a2ff514f7b8c" + }, + "source": [ + "Finally, let's evaluate the quantized model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1bc377ee-39b4-4ced-95db-f7d51ab60848", + "metadata": { + "id": "1bc377ee-39b4-4ced-95db-f7d51ab60848" + }, + "outputs": [], + "source": [ + "quantized_model.compile(loss=keras.losses.SparseCategoricalCrossentropy(), metrics=[\"accuracy\"])\n", + "results = quantized_model.evaluate(evaluation_dataset)" + ] + }, + { + "cell_type": "markdown", + "id": "ebfbb4de-5b6e-4732-83d3-a21e96cdd866", + "metadata": { + "id": "ebfbb4de-5b6e-4732-83d3-a21e96cdd866" + }, + "source": [ + "You can see that we got a very small degradation with a compression rate of x4 !" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Now, we can export the model to Keras and TFLite:" + ], + "metadata": { + "id": "6YjIdiRRjgkL" + }, + "id": "6YjIdiRRjgkL" + }, + { + "cell_type": "code", + "source": [ + "import tempfile\n", + "_, tflite_file_path = tempfile.mkstemp('.tflite') # Path of exported model\n", + "mct.exporter.keras_export_model(model=quantized_model, \n", + " save_model_path=tflite_file_path,\n", + " serialization_format=mct.exporter.KerasExportSerializationFormat.TFLITE,\n", + " quantization_format=mct.exporter.QuantizationFormat.FAKELY_QUANT)\n", + "\n", + "_, keras_file_path = tempfile.mkstemp('.h5') # Path of exported model\n", + "mct.exporter.keras_export_model(model=quantized_model, \n", + " save_model_path=keras_file_path,\n", + " serialization_format=mct.exporter.KerasExportSerializationFormat.KERAS_H5)" + ], + "metadata": { + "id": "z3CA16-ojoFL" + }, + "id": "z3CA16-ojoFL", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "id": "14877777", + "metadata": { + "id": "14877777" + }, + "source": [ + "## Conclusion" + ] + }, + { + "cell_type": "markdown", + "id": "bb7e1572", + "metadata": { + "id": "bb7e1572" + }, + "source": [ + "In this tutorial, we demonstrated how to quantize a pre-trained model using MCT with a few lines of code. We saw that we can achieve an x4 compression ratio with minimal performance degradation.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "01c1645e-205c-4d9a-8af3-e497b3addec1", + "metadata": { + "id": "01c1645e-205c-4d9a-8af3-e497b3addec1" + }, + "source": [ + "\n", + "\n", + "Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "you may not use this file except in compliance with the License.\n", + "You may obtain a copy of the License at\n", + "\n", + " http://www.apache.org/licenses/LICENSE-2.0\n", + "\n", + "Unless required by applicable law or agreed to in writing, software\n", + "distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "See the License for the specific language governing permissions and\n", + "limitations under the License.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + }, + "colab": { + "provenance": [] + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tutorials/notebooks/example_keras_mobilenet.py b/tutorials/notebooks/example_keras_mobilenet.py index 146625789..8a68b5ee4 100644 --- a/tutorials/notebooks/example_keras_mobilenet.py +++ b/tutorials/notebooks/example_keras_mobilenet.py @@ -110,11 +110,11 @@ def representative_data_gen() -> list: # Export quantized model to TFLite and Keras. # For more details please see: https://github.com/sony/model_optimization/blob/main/model_compression_toolkit/exporter/README.md _, tflite_file_path = tempfile.mkstemp('.tflite') # Path of exported model - mct.exporter.keras_export_model(model=quantized_model, save_model_path=tflite_file_path, - target_platform_capabilities=target_platform_cap, - serialization_format=mct.exporter.KerasExportSerializationFormat.TFLITE) + mct.exporter.keras_export_model(model=quantized_model, + save_model_path=tflite_file_path, + serialization_format=mct.exporter.KerasExportSerializationFormat.TFLITE, + quantization_format=mct.exporter.QuantizationFormat.FAKELY_QUANT) _, keras_file_path = tempfile.mkstemp('.h5') # Path of exported model - mct.exporter.keras_export_model(model=quantized_model, save_model_path=keras_file_path, - target_platform_capabilities=target_platform_cap, - serialization_format=mct.exporter.KerasExportSerializationFormat.KERAS_H5) + mct.exporter.keras_export_model(model=quantized_model, + save_model_path=keras_file_path) diff --git a/tutorials/notebooks/example_keras_mobilenet_gptq.py b/tutorials/notebooks/example_keras_mobilenet_gptq.py index c739f1265..e4d2e7b5a 100644 --- a/tutorials/notebooks/example_keras_mobilenet_gptq.py +++ b/tutorials/notebooks/example_keras_mobilenet_gptq.py @@ -119,11 +119,11 @@ def representative_data_gen() -> list: # Export quantized model to TFLite and Keras. # For more details please see: https://github.com/sony/model_optimization/blob/main/model_compression_toolkit/exporter/README.md _, tflite_file_path = tempfile.mkstemp('.tflite') # Path of exported model - mct.exporter.keras_export_model(model=quantized_model, save_model_path=tflite_file_path, - target_platform_capabilities=target_platform_cap, - serialization_format=mct.exporter.KerasExportSerializationFormat.TFLITE) + mct.exporter.keras_export_model(model=quantized_model, + save_model_path=tflite_file_path, + serialization_format=mct.exporter.KerasExportSerializationFormat.TFLITE, + quantization_format=mct.exporter.QuantizationFormat.FAKELY_QUANT) _, keras_file_path = tempfile.mkstemp('.h5') # Path of exported model - mct.exporter.keras_export_model(model=quantized_model, save_model_path=keras_file_path, - target_platform_capabilities=target_platform_cap, - serialization_format=mct.exporter.KerasExportSerializationFormat.KERAS_H5) + mct.exporter.keras_export_model(model=quantized_model, + save_model_path=keras_file_path) diff --git a/tutorials/notebooks/example_keras_mobilenet_gptq_mixed_precision.py b/tutorials/notebooks/example_keras_mobilenet_gptq_mixed_precision.py index c9bdbb74b..24400a034 100644 --- a/tutorials/notebooks/example_keras_mobilenet_gptq_mixed_precision.py +++ b/tutorials/notebooks/example_keras_mobilenet_gptq_mixed_precision.py @@ -152,11 +152,11 @@ def representative_data_gen() -> list: # Export quantized model to TFLite and Keras. # For more details please see: https://github.com/sony/model_optimization/blob/main/model_compression_toolkit/exporter/README.md _, tflite_file_path = tempfile.mkstemp('.tflite') # Path of exported model - mct.exporter.keras_export_model(model=quantized_model, save_model_path=tflite_file_path, - target_platform_capabilities=target_platform_cap, - serialization_format=mct.exporter.KerasExportSerializationFormat.TFLITE) + mct.exporter.keras_export_model(model=quantized_model, + save_model_path=tflite_file_path, + serialization_format=mct.exporter.KerasExportSerializationFormat.TFLITE, + quantization_format=mct.exporter.QuantizationFormat.FAKELY_QUANT) _, keras_file_path = tempfile.mkstemp('.h5') # Path of exported model - mct.exporter.keras_export_model(model=quantized_model, save_model_path=keras_file_path, - target_platform_capabilities=target_platform_cap, - serialization_format=mct.exporter.KerasExportSerializationFormat.KERAS_H5) \ No newline at end of file + mct.exporter.keras_export_model(model=quantized_model, + save_model_path=keras_file_path) diff --git a/tutorials/notebooks/example_keras_mobilenet_mixed_precision.py b/tutorials/notebooks/example_keras_mobilenet_mixed_precision.py index 9e162798e..81e9e80d5 100644 --- a/tutorials/notebooks/example_keras_mobilenet_mixed_precision.py +++ b/tutorials/notebooks/example_keras_mobilenet_mixed_precision.py @@ -142,11 +142,11 @@ def representative_data_gen() -> list: # Export quantized model to TFLite and Keras. # For more details please see: https://github.com/sony/model_optimization/blob/main/model_compression_toolkit/exporter/README.md _, tflite_file_path = tempfile.mkstemp('.tflite') # Path of exported model - mct.exporter.keras_export_model(model=quantized_model, save_model_path=tflite_file_path, - target_platform_capabilities=target_platform_cap, - serialization_format=mct.exporter.KerasExportSerializationFormat.TFLITE) + mct.exporter.keras_export_model(model=quantized_model, + save_model_path=tflite_file_path, + serialization_format=mct.exporter.KerasExportSerializationFormat.TFLITE, + quantization_format=mct.exporter.QuantizationFormat.FAKELY_QUANT) _, keras_file_path = tempfile.mkstemp('.h5') # Path of exported model - mct.exporter.keras_export_model(model=quantized_model, save_model_path=keras_file_path, - target_platform_capabilities=target_platform_cap, - serialization_format=mct.exporter.KerasExportSerializationFormat.KERAS_H5) \ No newline at end of file + mct.exporter.keras_export_model(model=quantized_model, + save_model_path=keras_file_path) diff --git a/tutorials/notebooks/example_keras_mobilenet_mixed_precision_lut.py b/tutorials/notebooks/example_keras_mobilenet_mixed_precision_lut.py index d6184c91f..831fa01fd 100644 --- a/tutorials/notebooks/example_keras_mobilenet_mixed_precision_lut.py +++ b/tutorials/notebooks/example_keras_mobilenet_mixed_precision_lut.py @@ -146,11 +146,11 @@ def representative_data_gen() -> list: # Export quantized model to TFLite and Keras. # For more details please see: https://github.com/sony/model_optimization/blob/main/model_compression_toolkit/exporter/README.md _, tflite_file_path = tempfile.mkstemp('.tflite') # Path of exported model - mct.exporter.keras_export_model(model=quantized_model, save_model_path=tflite_file_path, - target_platform_capabilities=target_platform_cap, - serialization_format=mct.exporter.KerasExportSerializationFormat.TFLITE) + mct.exporter.keras_export_model(model=quantized_model, + save_model_path=tflite_file_path, + serialization_format=mct.exporter.KerasExportSerializationFormat.TFLITE, + quantization_format=mct.exporter.QuantizationFormat.FAKELY_QUANT) _, keras_file_path = tempfile.mkstemp('.h5') # Path of exported model - mct.exporter.keras_export_model(model=quantized_model, save_model_path=keras_file_path, - target_platform_capabilities=target_platform_cap, - serialization_format=mct.exporter.KerasExportSerializationFormat.KERAS_H5) \ No newline at end of file + mct.exporter.keras_export_model(model=quantized_model, + save_model_path=keras_file_path) \ No newline at end of file diff --git a/tutorials/notebooks/example_keras_qat.ipynb b/tutorials/notebooks/example_keras_qat.ipynb index d35ca6e05..63a7e9574 100644 --- a/tutorials/notebooks/example_keras_qat.ipynb +++ b/tutorials/notebooks/example_keras_qat.ipynb @@ -276,14 +276,14 @@ "import tempfile\n", "# Export quantized model to TFLite\n", "_, tflite_file_path = tempfile.mkstemp('.tflite') # Path of exported model\n", - "mct.exporter.keras_export_model(model=quantized_model, save_model_path=tflite_file_path,\n", - " target_platform_capabilities=target_platform_cap,\n", - " serialization_format=mct.exporter.KerasExportSerializationFormat.TFLITE)\n", + "mct.exporter.keras_export_model(model=quantized_model, \n", + " save_model_path=tflite_file_path,\n", + " serialization_format=mct.exporter.KerasExportSerializationFormat.TFLITE,\n", + " quantization_format=mct.exporter.QuantizationFormat.FAKELY_QUANT)\n", "# Export quantized model to Keras\n", "_, keras_file_path = tempfile.mkstemp('.h5') # Path of exported model\n", - "mct.exporter.keras_export_model(model=quantized_model, save_model_path=keras_file_path,\n", - " target_platform_capabilities=target_platform_cap,\n", - " serialization_format=mct.exporter.KerasExportSerializationFormat.KERAS_H5)" + "mct.exporter.keras_export_model(model=quantized_model, \n", + " save_model_path=keras_file_path)" ] }, { diff --git a/tutorials/notebooks/example_keras_qat.py b/tutorials/notebooks/example_keras_qat.py index 305562d98..abc066636 100644 --- a/tutorials/notebooks/example_keras_qat.py +++ b/tutorials/notebooks/example_keras_qat.py @@ -219,14 +219,14 @@ def argument_handler(): # Export quantized model to TFLite and Keras. # For more details please see: https://github.com/sony/model_optimization/blob/main/model_compression_toolkit/exporter/README.md _, tflite_file_path = tempfile.mkstemp('.tflite') # Path of exported model - mct.exporter.keras_export_model(model=quantized_model, save_model_path=tflite_file_path, - target_platform_capabilities=get_tpc(), - serialization_format=mct.exporter.KerasExportSerializationFormat.TFLITE) + mct.exporter.keras_export_model(model=quantized_model, + save_model_path=tflite_file_path, + serialization_format=mct.exporter.KerasExportSerializationFormat.TFLITE, + quantization_format=mct.exporter.QuantizationFormat.FAKELY_QUANT) print(f"Quantized model was exporting to TFLite here: {tflite_file_path}") # Export quantized model to Keras _, keras_file_path = tempfile.mkstemp('.h5') # Path of exported model - mct.exporter.keras_export_model(model=quantized_model, save_model_path=keras_file_path, - target_platform_capabilities=get_tpc(), - serialization_format=mct.exporter.KerasExportSerializationFormat.KERAS_H5) + mct.exporter.keras_export_model(model=quantized_model, + save_model_path=keras_file_path) print(f"Quantized model was exporting to Keras here: {keras_file_path}") \ No newline at end of file diff --git a/tutorials/notebooks/example_pytorch_mobilenet_mixed_precision.py b/tutorials/notebooks/example_pytorch_mobilenet_mixed_precision.py index fc5327a22..4d437d763 100644 --- a/tutorials/notebooks/example_pytorch_mobilenet_mixed_precision.py +++ b/tutorials/notebooks/example_pytorch_mobilenet_mixed_precision.py @@ -135,6 +135,6 @@ def representative_data_gen() -> list: # Export quantized model to ONNX _, onnx_file_path = tempfile.mkstemp('.onnx') # Path of exported model - mct.exporter.pytorch_export_model(model=quantized_model, save_model_path=onnx_file_path, - repr_dataset=representative_data_gen, target_platform_capabilities=target_platform_cap, - serialization_format=mct.exporter.PytorchExportSerializationFormat.ONNX) \ No newline at end of file + mct.exporter.pytorch_export_model(model=quantized_model, + save_model_path=onnx_file_path, + repr_dataset=representative_data_gen) \ No newline at end of file diff --git a/tutorials/notebooks/example_pytorch_mobilenet_v2.py b/tutorials/notebooks/example_pytorch_mobilenet_v2.py index 0b50a4d90..0ad5506ff 100644 --- a/tutorials/notebooks/example_pytorch_mobilenet_v2.py +++ b/tutorials/notebooks/example_pytorch_mobilenet_v2.py @@ -123,6 +123,6 @@ def representative_data_gen() -> list: # Export quantized model to ONNX _, onnx_file_path = tempfile.mkstemp('.onnx') # Path of exported model - mct.exporter.pytorch_export_model(model=quantized_model, save_model_path=onnx_file_path, - repr_dataset=representative_data_gen, target_platform_capabilities=target_platform_cap, - serialization_format=mct.exporter.PytorchExportSerializationFormat.ONNX) \ No newline at end of file + mct.exporter.pytorch_export_model(model=quantized_model, + save_model_path=onnx_file_path, + repr_dataset=representative_data_gen) \ No newline at end of file diff --git a/tutorials/notebooks/example_pytorch_mobilenetv2_cifar100_mixed_precision.ipynb b/tutorials/notebooks/example_pytorch_mobilenetv2_cifar100_mixed_precision.ipynb old mode 100755 new mode 100644 index e4f8d6b7c..880bce82a --- a/tutorials/notebooks/example_pytorch_mobilenetv2_cifar100_mixed_precision.ipynb +++ b/tutorials/notebooks/example_pytorch_mobilenetv2_cifar100_mixed_precision.ipynb @@ -595,9 +595,9 @@ "# Export quantized model to ONNX\n", "import tempfile\n", "_, onnx_file_path = tempfile.mkstemp('.onnx') # Path of exported model\n", - "mct.exporter.pytorch_export_model(model=quantized_model, save_model_path=onnx_file_path,\n", - " repr_dataset=representative_data_gen, target_platform_capabilities=target_platform_cap,\n", - " serialization_format=mct.exporter.PytorchExportSerializationFormat.ONNX)" + "mct.exporter.pytorch_export_model(model=quantized_model, \n", + " save_model_path=onnx_file_path,\n", + " repr_dataset=representative_data_gen)" ] }, { diff --git a/tutorials/notebooks/example_pytorch_mobilenetv2_cifar100_mixed_precision.py b/tutorials/notebooks/example_pytorch_mobilenetv2_cifar100_mixed_precision.py index 8eba05b6f..e90dccd86 100644 --- a/tutorials/notebooks/example_pytorch_mobilenetv2_cifar100_mixed_precision.py +++ b/tutorials/notebooks/example_pytorch_mobilenetv2_cifar100_mixed_precision.py @@ -257,7 +257,6 @@ def representative_data_gen() -> list: # Export quantized model to ONNX _, onnx_file_path = tempfile.mkstemp('.onnx') # Path of exported model - mct.exporter.pytorch_export_model(model=quantized_model, save_model_path=onnx_file_path, - repr_dataset=representative_data_gen, - target_platform_capabilities=target_platform_cap, - serialization_format=mct.exporter.PytorchExportSerializationFormat.ONNX) + mct.exporter.pytorch_export_model(model=quantized_model, + save_model_path=onnx_file_path, + repr_dataset=representative_data_gen) diff --git a/tutorials/notebooks/example_pytorch_qat.py b/tutorials/notebooks/example_pytorch_qat.py index 30b720dfb..e7d3a007a 100644 --- a/tutorials/notebooks/example_pytorch_qat.py +++ b/tutorials/notebooks/example_pytorch_qat.py @@ -229,6 +229,6 @@ def representative_data_gen(): # Export quantized model to ONNX _, onnx_file_path = tempfile.mkstemp('.onnx') # Path of exported model - mct.exporter.pytorch_export_model(model=quantized_model, save_model_path=onnx_file_path, - repr_dataset=representative_data_gen, target_platform_capabilities=get_tpc(), - serialization_format=mct.exporter.PytorchExportSerializationFormat.ONNX) \ No newline at end of file + mct.exporter.pytorch_export_model(model=quantized_model, + save_model_path=onnx_file_path, + repr_dataset=representative_data_gen) \ No newline at end of file diff --git a/tutorials/notebooks/example_pytorch_quantization_mnist.ipynb b/tutorials/notebooks/example_pytorch_quantization_mnist.ipynb old mode 100755 new mode 100644 index 8dd325c49..a98934a52 --- a/tutorials/notebooks/example_pytorch_quantization_mnist.ipynb +++ b/tutorials/notebooks/example_pytorch_quantization_mnist.ipynb @@ -1,460 +1,460 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "7cf96fb4", - "metadata": { - "id": "7cf96fb4" - }, - "source": [ - "# Quantization using the Model Compression Toolkit - example in Pytorch" - ] - }, - { - "cell_type": "markdown", - "id": "59ed8f02", - "metadata": { - "id": "59ed8f02" - }, - "source": [ - "[Run this tutorial in Google Colab](https://colab.research.google.com/github/sony/model_optimization/blob/main/tutorials/notebooks/example_pytorch_quantization_mnist.ipynb)" - ] - }, - { - "cell_type": "markdown", - "id": "822944a1", - "metadata": { - "id": "822944a1" - }, - "source": [ - "## Overview" - ] - }, - { - "cell_type": "markdown", - "id": "743dbc3d", - "metadata": { - "id": "743dbc3d" - }, - "source": [ - "This quick start guide covers how to use the Model Compression Toolkit (MCT) for quantizing a PyTorch model. We will do so by giving an end-to-end example, training a model from scratch on MNIST data, then quantizing it using the MCT." - ] - }, - { - "cell_type": "markdown", - "id": "59e2eeae", - "metadata": { - "id": "59e2eeae" - }, - "source": [ - "## Summary" - ] - }, - { - "cell_type": "markdown", - "id": "1daf577a", - "metadata": { - "id": "1daf577a" - }, - "source": [ - "In this tutorial we will cover:\n", - "1. Training a Pytorch model from scratch on MNIST.\n", - "2. Quantizing the model in a hardware-friendly manner (symmetric quantization, power-of-2 thresholds) using 8-bit activations and weights.\n", - "3. We will examine the output quantized model, evaluate it and compare its performance to the original model.\n", - "4. We will approximate the compression gains due to quantization." - ] - }, - { - "cell_type": "markdown", - "id": "8b3396bf", - "metadata": { - "id": "8b3396bf" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "markdown", - "id": "5e7690ef", - "metadata": { - "id": "5e7690ef" - }, - "source": [ - "Install the relevant packages:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "89e0bb04", - "metadata": { - "id": "89e0bb04" - }, - "outputs": [], - "source": [ - "! pip install -q model-compression-toolkit\n", - "! pip install -q torch\n", - "! pip install -q torchvision" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a82928d0", - "metadata": { - "id": "a82928d0" - }, - "outputs": [], - "source": [ - "from __future__ import print_function\n", - "import argparse\n", - "import torch\n", - "import torch.nn as nn\n", - "import torch.nn.functional as F\n", - "import torch.optim as optim\n", - "from torchvision import datasets, transforms\n", - "from torch.optim.lr_scheduler import StepLR\n", - "import model_compression_toolkit as mct" - ] - }, - { - "cell_type": "markdown", - "id": "1653425b", - "metadata": { - "id": "1653425b" - }, - "source": [ - "## Train a Pytorch classifier model on MNIST" - ] - }, - { - "cell_type": "markdown", - "id": "02312089", - "metadata": { - "id": "02312089" - }, - "source": [ - "Let us define the network and some helper functions to train and evaluate the model. These are taken from the official Pytorch examples https://github.com/pytorch/examples/blob/main/mnist/main.py" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "16f9bcc0", - "metadata": { - "id": "16f9bcc0" - }, - "outputs": [], - "source": [ - "class Net(nn.Module):\n", - " def __init__(self):\n", - " super(Net, self).__init__()\n", - " self.conv1 = nn.Conv2d(1, 32, 3, 1)\n", - " self.conv2 = nn.Conv2d(32, 64, 3, 1)\n", - " self.dropout1 = nn.Dropout(0.25)\n", - " self.dropout2 = nn.Dropout(0.5)\n", - " self.fc1 = nn.Linear(9216, 128)\n", - " self.fc2 = nn.Linear(128, 10)\n", - "\n", - " def forward(self, x):\n", - " x = self.conv1(x)\n", - " x = F.relu(x)\n", - " x = self.conv2(x)\n", - " x = F.relu(x)\n", - " x = F.max_pool2d(x, 2)\n", - " x = self.dropout1(x)\n", - " x = torch.flatten(x, 1)\n", - " x = self.fc1(x)\n", - " x = F.relu(x)\n", - " x = self.dropout2(x)\n", - " x = self.fc2(x)\n", - " output = F.log_softmax(x, dim=1)\n", - " return output\n", - "\n", - "\n", - "def train(model, device, train_loader, optimizer, epoch):\n", - " model.train()\n", - " for batch_idx, (data, target) in enumerate(train_loader):\n", - " data, target = data.to(device), target.to(device)\n", - " optimizer.zero_grad()\n", - " output = model(data)\n", - " loss = F.nll_loss(output, target)\n", - " loss.backward()\n", - " optimizer.step()\n", - " if batch_idx % 100 == 0:\n", - " print('Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}'.format(\n", - " epoch, batch_idx * len(data), len(train_loader.dataset),\n", - " 100. * batch_idx / len(train_loader), loss.item()))\n", - "\n", - "\n", - "def test(model, device, test_loader):\n", - " model.eval()\n", - " test_loss = 0\n", - " correct = 0\n", - " with torch.no_grad():\n", - " for data, target in test_loader:\n", - " data, target = data.to(device), target.to(device)\n", - " output = model(data)\n", - " test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss\n", - " pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability\n", - " correct += pred.eq(target.view_as(pred)).sum().item()\n", - "\n", - " test_loss /= len(test_loader.dataset)\n", - "\n", - " print('\\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\\n'.format(\n", - " test_loss, correct, len(test_loader.dataset),\n", - " 100. * correct / len(test_loader.dataset)))\n", - "\n", - "batch_size = 64\n", - "test_batch_size = 1000\n", - "random_seed = 1\n", - "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", - "torch.backends.cudnn.enabled = False\n", - "torch.manual_seed(random_seed)\n", - "dataset_folder = '/datasets/mnist/images'\n", - "epochs = 2\n", - "gamma = 0.7\n", - "lr = 1.0" - ] - }, - { - "cell_type": "markdown", - "id": "c24d3c5a", - "metadata": { - "id": "c24d3c5a" - }, - "source": [ - "Let us define the dataset loaders, and optimizer and train the model for 2 epochs." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c615a27e", - "metadata": { - "id": "c615a27e" - }, - "outputs": [], - "source": [ - "transform=transforms.Compose([\n", - " transforms.ToTensor(),\n", - " transforms.Normalize((0.1307,), (0.3081,))\n", - " ])\n", - "dataset1 = datasets.MNIST(dataset_folder, train=True, download=True,\n", - " transform=transform)\n", - "dataset2 = datasets.MNIST(dataset_folder, train=False,\n", - " transform=transform)\n", - "train_loader = torch.utils.data.DataLoader(dataset1, num_workers=1, pin_memory=True, batch_size=batch_size, shuffle=True)\n", - "test_loader = torch.utils.data.DataLoader(dataset2, num_workers=1, pin_memory=True, batch_size=test_batch_size, shuffle=False)\n", - "\n", - "model = Net().to(device)\n", - "optimizer = optim.Adadelta(model.parameters(), lr=lr)\n", - "\n", - "scheduler = StepLR(optimizer, step_size=1, gamma=gamma)\n", - "for epoch in range(1, epochs + 1):\n", - " train(model, device, train_loader, optimizer, epoch)\n", - " test(model, device, test_loader)\n", - " scheduler.step()" - ] - }, - { - "cell_type": "markdown", - "id": "69366614", - "metadata": { - "id": "69366614" - }, - "source": [ - "After training for 2 epochs we get an accuracy of 98.5%. Not bad." - ] - }, - { - "cell_type": "markdown", - "id": "e9cd25a7", - "metadata": { - "id": "e9cd25a7" - }, - "source": [ - "## Hardware-friendly quantization using MCT" - ] - }, - { - "cell_type": "markdown", - "id": "c0321aad", - "metadata": { - "id": "c0321aad" - }, - "source": [ - "Now we would like to quantize this model using the Model Compression Toolkit.\n", - "To do so, we need to define a representative dataset, which is a generator that returns a list of images:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "618975be", - "metadata": { - "id": "618975be" - }, - "outputs": [], - "source": [ - "image_data_loader = iter(train_loader)\n", - "n_iter=10\n", - "\n", - "def representative_data_gen() -> list:\n", - " for _ in range(n_iter):\n", - " yield [next(image_data_loader)[0]]" - ] - }, - { - "cell_type": "markdown", - "id": "d0a92bee", - "metadata": { - "id": "d0a92bee" - }, - "source": [ - "Now for the fireworks. Lets run hardware-friendly post training quantization on the model. The output of MCT is a simulated quantized model in the input model's framework. That is, the model adds fake-quantization nodes after layers that need to be quantized. The output model's size on the disk does'nt change, but all the quantization parameters are available for deployment on target hardware." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "63f695dd", - "metadata": { - "id": "63f695dd" - }, - "outputs": [], - "source": [ - "target_platform_cap = mct.get_target_platform_capabilities('pytorch', 'default')\n", - "quantized_model, quantization_info = mct.ptq.pytorch_post_training_quantization_experimental(\n", - " in_module=model,\n", - " representative_data_gen=representative_data_gen,\n", - " target_platform_capabilities=target_platform_cap\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "d3521637", - "metadata": { - "id": "d3521637" - }, - "source": [ - "The MCT prints the approximated model size after real quantization and the compression ratio. In this example, we used the default setting of MCT and compressed the model from 32 bits to 8 bits, hence the compression ratio is x4. Using the simulated quantized model, we can evaluate its performance using the original model's testing environment, and compare its performance to the original model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4f5fa4a2", - "metadata": { - "id": "4f5fa4a2" - }, - "outputs": [], - "source": [ - "print(quantization_info)\n", - "test(quantized_model, device, test_loader)" - ] - }, - { - "cell_type": "markdown", - "id": "fd09fa27", - "metadata": { - "id": "fd09fa27" - }, - "source": [ - "In this scenario, we see that the compression almost didn't affect the accuracy of the model." - ] - }, - { - "cell_type": "markdown", - "source": [ - "Now, we can export the quantized model to ONNX:" - ], - "metadata": { - "id": "9nQBVWFhbKXV" - }, - "id": "9nQBVWFhbKXV" - }, - { - "cell_type": "code", - "source": [ - "# Export quantized model to ONNX\n", - "import tempfile\n", - "_, onnx_file_path = tempfile.mkstemp('.onnx') # Path of exported model\n", - "mct.exporter.pytorch_export_model(model=quantized_model, save_model_path=onnx_file_path,\n", - " repr_dataset=representative_data_gen, target_platform_capabilities=target_platform_cap,\n", - " serialization_format=mct.exporter.PytorchExportSerializationFormat.ONNX)" - ], - "metadata": { - "id": "oXMn6bFjbQad" - }, - "id": "oXMn6bFjbQad", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "id": "14877777", - "metadata": { - "id": "14877777" - }, - "source": [ - "## Conclusion" - ] - }, - { - "cell_type": "markdown", - "id": "bb7e1572", - "metadata": { - "id": "bb7e1572" - }, - "source": [ - "In this tutorial, we demonstrated how to quantize a classification model for MNIST in a hardware-friendly manner using MCT. We saw that we can achieve an x4 compression ratio with minimal performance degradation.\n", - "\n", - "The advantage of quantizing in a hardware-friendly manner is that this model can run more efficiently in the sense of run time, power consumption, and memory on designated hardware.\n", - "\n", - "This is a very simple model and a very simple task. MCT can demonstrate competitive results on a wide variety of tasks and network architectures. Check out the paper for more details: https://arxiv.org/abs/2109.09113\n", - "\n", - "\n", - "Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved.\n", - "\n", - "Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "you may not use this file except in compliance with the License.\n", - "You may obtain a copy of the License at\n", - "\n", - " http://www.apache.org/licenses/LICENSE-2.0\n", - "\n", - "Unless required by applicable law or agreed to in writing, software\n", - "distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "See the License for the specific language governing permissions and\n", - "limitations under the License.\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.7" - }, - "colab": { - "provenance": [] - } + "cells": [ + { + "cell_type": "markdown", + "id": "7cf96fb4", + "metadata": { + "id": "7cf96fb4" + }, + "source": [ + "# Quantization using the Model Compression Toolkit - example in Pytorch" + ] }, - "nbformat": 4, - "nbformat_minor": 5 -} \ No newline at end of file + { + "cell_type": "markdown", + "id": "59ed8f02", + "metadata": { + "id": "59ed8f02" + }, + "source": [ + "[Run this tutorial in Google Colab](https://colab.research.google.com/github/sony/model_optimization/blob/main/tutorials/notebooks/example_pytorch_quantization_mnist.ipynb)" + ] + }, + { + "cell_type": "markdown", + "id": "822944a1", + "metadata": { + "id": "822944a1" + }, + "source": [ + "## Overview" + ] + }, + { + "cell_type": "markdown", + "id": "743dbc3d", + "metadata": { + "id": "743dbc3d" + }, + "source": [ + "This quick start guide covers how to use the Model Compression Toolkit (MCT) for quantizing a PyTorch model. We will do so by giving an end-to-end example, training a model from scratch on MNIST data, then quantizing it using the MCT." + ] + }, + { + "cell_type": "markdown", + "id": "59e2eeae", + "metadata": { + "id": "59e2eeae" + }, + "source": [ + "## Summary" + ] + }, + { + "cell_type": "markdown", + "id": "1daf577a", + "metadata": { + "id": "1daf577a" + }, + "source": [ + "In this tutorial we will cover:\n", + "1. Training a Pytorch model from scratch on MNIST.\n", + "2. Quantizing the model in a hardware-friendly manner (symmetric quantization, power-of-2 thresholds) using 8-bit activations and weights.\n", + "3. We will examine the output quantized model, evaluate it and compare its performance to the original model.\n", + "4. We will approximate the compression gains due to quantization." + ] + }, + { + "cell_type": "markdown", + "id": "8b3396bf", + "metadata": { + "id": "8b3396bf" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "markdown", + "id": "5e7690ef", + "metadata": { + "id": "5e7690ef" + }, + "source": [ + "Install the relevant packages:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "89e0bb04", + "metadata": { + "id": "89e0bb04" + }, + "outputs": [], + "source": [ + "! pip install -q model-compression-toolkit\n", + "! pip install -q torch\n", + "! pip install -q torchvision" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a82928d0", + "metadata": { + "id": "a82928d0" + }, + "outputs": [], + "source": [ + "from __future__ import print_function\n", + "import argparse\n", + "import torch\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "import torch.optim as optim\n", + "from torchvision import datasets, transforms\n", + "from torch.optim.lr_scheduler import StepLR\n", + "import model_compression_toolkit as mct" + ] + }, + { + "cell_type": "markdown", + "id": "1653425b", + "metadata": { + "id": "1653425b" + }, + "source": [ + "## Train a Pytorch classifier model on MNIST" + ] + }, + { + "cell_type": "markdown", + "id": "02312089", + "metadata": { + "id": "02312089" + }, + "source": [ + "Let us define the network and some helper functions to train and evaluate the model. These are taken from the official Pytorch examples https://github.com/pytorch/examples/blob/main/mnist/main.py" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16f9bcc0", + "metadata": { + "id": "16f9bcc0" + }, + "outputs": [], + "source": [ + "class Net(nn.Module):\n", + " def __init__(self):\n", + " super(Net, self).__init__()\n", + " self.conv1 = nn.Conv2d(1, 32, 3, 1)\n", + " self.conv2 = nn.Conv2d(32, 64, 3, 1)\n", + " self.dropout1 = nn.Dropout(0.25)\n", + " self.dropout2 = nn.Dropout(0.5)\n", + " self.fc1 = nn.Linear(9216, 128)\n", + " self.fc2 = nn.Linear(128, 10)\n", + "\n", + " def forward(self, x):\n", + " x = self.conv1(x)\n", + " x = F.relu(x)\n", + " x = self.conv2(x)\n", + " x = F.relu(x)\n", + " x = F.max_pool2d(x, 2)\n", + " x = self.dropout1(x)\n", + " x = torch.flatten(x, 1)\n", + " x = self.fc1(x)\n", + " x = F.relu(x)\n", + " x = self.dropout2(x)\n", + " x = self.fc2(x)\n", + " output = F.log_softmax(x, dim=1)\n", + " return output\n", + "\n", + "\n", + "def train(model, device, train_loader, optimizer, epoch):\n", + " model.train()\n", + " for batch_idx, (data, target) in enumerate(train_loader):\n", + " data, target = data.to(device), target.to(device)\n", + " optimizer.zero_grad()\n", + " output = model(data)\n", + " loss = F.nll_loss(output, target)\n", + " loss.backward()\n", + " optimizer.step()\n", + " if batch_idx % 100 == 0:\n", + " print('Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}'.format(\n", + " epoch, batch_idx * len(data), len(train_loader.dataset),\n", + " 100. * batch_idx / len(train_loader), loss.item()))\n", + "\n", + "\n", + "def test(model, device, test_loader):\n", + " model.eval()\n", + " test_loss = 0\n", + " correct = 0\n", + " with torch.no_grad():\n", + " for data, target in test_loader:\n", + " data, target = data.to(device), target.to(device)\n", + " output = model(data)\n", + " test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss\n", + " pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability\n", + " correct += pred.eq(target.view_as(pred)).sum().item()\n", + "\n", + " test_loss /= len(test_loader.dataset)\n", + "\n", + " print('\\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\\n'.format(\n", + " test_loss, correct, len(test_loader.dataset),\n", + " 100. * correct / len(test_loader.dataset)))\n", + "\n", + "batch_size = 64\n", + "test_batch_size = 1000\n", + "random_seed = 1\n", + "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", + "torch.backends.cudnn.enabled = False\n", + "torch.manual_seed(random_seed)\n", + "dataset_folder = '/datasets/mnist/images'\n", + "epochs = 2\n", + "gamma = 0.7\n", + "lr = 1.0" + ] + }, + { + "cell_type": "markdown", + "id": "c24d3c5a", + "metadata": { + "id": "c24d3c5a" + }, + "source": [ + "Let us define the dataset loaders, and optimizer and train the model for 2 epochs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c615a27e", + "metadata": { + "id": "c615a27e" + }, + "outputs": [], + "source": [ + "transform=transforms.Compose([\n", + " transforms.ToTensor(),\n", + " transforms.Normalize((0.1307,), (0.3081,))\n", + " ])\n", + "dataset1 = datasets.MNIST(dataset_folder, train=True, download=True,\n", + " transform=transform)\n", + "dataset2 = datasets.MNIST(dataset_folder, train=False,\n", + " transform=transform)\n", + "train_loader = torch.utils.data.DataLoader(dataset1, num_workers=1, pin_memory=True, batch_size=batch_size, shuffle=True)\n", + "test_loader = torch.utils.data.DataLoader(dataset2, num_workers=1, pin_memory=True, batch_size=test_batch_size, shuffle=False)\n", + "\n", + "model = Net().to(device)\n", + "optimizer = optim.Adadelta(model.parameters(), lr=lr)\n", + "\n", + "scheduler = StepLR(optimizer, step_size=1, gamma=gamma)\n", + "for epoch in range(1, epochs + 1):\n", + " train(model, device, train_loader, optimizer, epoch)\n", + " test(model, device, test_loader)\n", + " scheduler.step()" + ] + }, + { + "cell_type": "markdown", + "id": "69366614", + "metadata": { + "id": "69366614" + }, + "source": [ + "After training for 2 epochs we get an accuracy of 98.5%. Not bad." + ] + }, + { + "cell_type": "markdown", + "id": "e9cd25a7", + "metadata": { + "id": "e9cd25a7" + }, + "source": [ + "## Hardware-friendly quantization using MCT" + ] + }, + { + "cell_type": "markdown", + "id": "c0321aad", + "metadata": { + "id": "c0321aad" + }, + "source": [ + "Now we would like to quantize this model using the Model Compression Toolkit.\n", + "To do so, we need to define a representative dataset, which is a generator that returns a list of images:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "618975be", + "metadata": { + "id": "618975be" + }, + "outputs": [], + "source": [ + "image_data_loader = iter(train_loader)\n", + "n_iter=10\n", + "\n", + "def representative_data_gen() -> list:\n", + " for _ in range(n_iter):\n", + " yield [next(image_data_loader)[0]]" + ] + }, + { + "cell_type": "markdown", + "id": "d0a92bee", + "metadata": { + "id": "d0a92bee" + }, + "source": [ + "Now for the fireworks. Lets run hardware-friendly post training quantization on the model. The output of MCT is a simulated quantized model in the input model's framework. That is, the model adds fake-quantization nodes after layers that need to be quantized. The output model's size on the disk does'nt change, but all the quantization parameters are available for deployment on target hardware." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "63f695dd", + "metadata": { + "id": "63f695dd" + }, + "outputs": [], + "source": [ + "target_platform_cap = mct.get_target_platform_capabilities('pytorch', 'default')\n", + "quantized_model, quantization_info = mct.ptq.pytorch_post_training_quantization_experimental(\n", + " in_module=model,\n", + " representative_data_gen=representative_data_gen,\n", + " target_platform_capabilities=target_platform_cap\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "d3521637", + "metadata": { + "id": "d3521637" + }, + "source": [ + "The MCT prints the approximated model size after real quantization and the compression ratio. In this example, we used the default setting of MCT and compressed the model from 32 bits to 8 bits, hence the compression ratio is x4. Using the simulated quantized model, we can evaluate its performance using the original model's testing environment, and compare its performance to the original model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4f5fa4a2", + "metadata": { + "id": "4f5fa4a2" + }, + "outputs": [], + "source": [ + "print(quantization_info)\n", + "test(quantized_model, device, test_loader)" + ] + }, + { + "cell_type": "markdown", + "id": "fd09fa27", + "metadata": { + "id": "fd09fa27" + }, + "source": [ + "In this scenario, we see that the compression almost didn't affect the accuracy of the model." + ] + }, + { + "cell_type": "markdown", + "source": [ + "Now, we can export the quantized model to ONNX:" + ], + "metadata": { + "id": "9nQBVWFhbKXV" + }, + "id": "9nQBVWFhbKXV" + }, + { + "cell_type": "code", + "source": [ + "# Export quantized model to ONNX\n", + "import tempfile\n", + "_, onnx_file_path = tempfile.mkstemp('.onnx') # Path of exported model\n", + "mct.exporter.pytorch_export_model(model=quantized_model,\n", + " save_model_path=onnx_file_path,\n", + " repr_dataset=representative_data_gen)" + ], + "metadata": { + "id": "oXMn6bFjbQad" + }, + "id": "oXMn6bFjbQad", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "id": "14877777", + "metadata": { + "id": "14877777" + }, + "source": [ + "## Conclusion" + ] + }, + { + "cell_type": "markdown", + "id": "bb7e1572", + "metadata": { + "id": "bb7e1572" + }, + "source": [ + "In this tutorial, we demonstrated how to quantize a classification model for MNIST in a hardware-friendly manner using MCT. We saw that we can achieve an x4 compression ratio with minimal performance degradation.\n", + "\n", + "The advantage of quantizing in a hardware-friendly manner is that this model can run more efficiently in the sense of run time, power consumption, and memory on designated hardware.\n", + "\n", + "This is a very simple model and a very simple task. MCT can demonstrate competitive results on a wide variety of tasks and network architectures. Check out the paper for more details: https://arxiv.org/abs/2109.09113\n", + "\n", + "\n", + "Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "you may not use this file except in compliance with the License.\n", + "You may obtain a copy of the License at\n", + "\n", + " http://www.apache.org/licenses/LICENSE-2.0\n", + "\n", + "Unless required by applicable law or agreed to in writing, software\n", + "distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "See the License for the specific language governing permissions and\n", + "limitations under the License.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + }, + "colab": { + "provenance": [] + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tutorials/quick_start/pytorch_fw/quant.py b/tutorials/quick_start/pytorch_fw/quant.py index a1914c0f0..0d91d8b30 100644 --- a/tutorials/quick_start/pytorch_fw/quant.py +++ b/tutorials/quick_start/pytorch_fw/quant.py @@ -137,10 +137,9 @@ def quantize(model: nn.Module, # Export quantized model to ONNX if args.get('export_model',False): _, onnx_file_path = tempfile.mkstemp('.onnx') # Path of exported model - mct.exporter.pytorch_export_model(model=quantized_model, save_model_path=onnx_file_path, - repr_dataset=representative_data_gen, target_platform_capabilities=tpc, - serialization_format=mct.exporter.PytorchExportSerializationFormat.ONNX, - use_onnx_custom_quantizer_ops=True) + mct.exporter.pytorch_export_model(model=quantized_model, + save_model_path=onnx_file_path, + repr_dataset=representative_data_gen) return quantized_model, QuantInfo(user_info=quantization_info, tpc_info=tpc.get_info(), quantization_workflow=workflow, mp_weights_compression=mp_wcr) \ No newline at end of file