Merge branch 'main' of https://github.com/gouda-youichi/model_optimiz…

…ation into main
sony · Jan 29, 2025 · ffe0e6d · ffe0e6d
2 parents 66a3bd9 + 4c66805
commit ffe0e6d
Show file tree

Hide file tree

Showing 45 changed files with 1,255 additions and 412 deletions.
diff --git a/.github/workflows/run_keras_sony_custom_layers.yml b/.github/workflows/run_keras_sony_custom_layers.yml
diff --git a/.github/workflows/run_keras_tests.yml b/.github/workflows/run_keras_tests.yml
@@ -23,8 +23,9 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           pip install -r requirements.txt
-          pip install tensorflow==${{ inputs.tf-version }} sony-custom-layers pytest
-
+          pip install tensorflow==${{ inputs.tf-version }} sony-custom-layers 
+          pip install pytest pytest-mock
+          pip check
       - name: Run unittests
         run: |
           python -m unittest discover tests/keras_tests -v

diff --git a/.github/workflows/run_pytorch_tests.yml b/.github/workflows/run_pytorch_tests.yml
@@ -24,7 +24,8 @@ jobs:
           python -m pip install --upgrade pip
           pip install -r requirements.txt          
           pip install torch==${{ inputs.torch-version }} torchvision onnx onnxruntime onnxruntime-extensions
-          pip install pytest
+          pip install pytest pytest-mock
+          pip check
       - name: Run unittests
         run: |
           python -m unittest discover tests/pytorch_tests -v

diff --git a/.github/workflows/run_tests_suite_coverage.yml b/.github/workflows/run_tests_suite_coverage.yml
@@ -30,43 +30,48 @@ jobs:
         with:
           python-version: '3.10'
 
-      - name: Set up Coverage
+      - name: Set up environment for common tests
         run: |
           python -m pip install --upgrade pip
-          pip install coverage
+          pip install -r requirements.txt coverage pytest pytest-mock
+
+      - name: Run common tests (unittest)
+        run: coverage run --parallel-mode -m --omit "*__init__.py" --include "model_compression_toolkit/**/*.py" unittest discover tests/common_tests -v
+
+      - name: Run common tests (pytest)
+        run: coverage run --parallel-mode -m --omit "*__init__.py" --include "model_compression_toolkit/**/*.py" pytest tests_pytest/common
 
       - name: Set up TensorFlow environment
         run: |
           python -m venv tf_env
           source tf_env/bin/activate
           python -m pip install --upgrade pip
-          pip install -r requirements.txt
-          pip install tensorflow==2.13.* coverage pytest
+          pip install -r requirements.txt tensorflow==2.13.* sony-custom-layers coverage pytest pytest-mock
 
-      - name: Run TensorFlow testsuite
+      - name: Run TensorFlow tests (unittest)
         run: |
           source tf_env/bin/activate 
-          coverage run --parallel-mode -m --omit "*__init__.py" --include "model_compression_toolkit/**/*.py" unittest tests/test_suite.py -v
-      
-      - name: Run TensorFlow pytest
+          coverage run --parallel-mode -m --omit "*__init__.py" --include "model_compression_toolkit/**/*.py" unittest discover tests/keras_tests -v
+
+      - name: Run TensorFlow tests (pytest)
         run: |
           source tf_env/bin/activate
           coverage run --parallel-mode -m --omit "*__init__.py" --include "model_compression_toolkit/**/*.py" pytest tests_pytest/keras
 
-      - name: Set up Pytorch environment
+      - name: Set up PyTorch environment
         run: |
           python -m venv torch_env
           source torch_env/bin/activate
           python -m pip install --upgrade pip
           pip install -r requirements.txt
-          pip install torch==2.0.* torchvision onnx onnxruntime onnxruntime-extensions coverage pytest
+          pip install torch==2.0.* torchvision onnx onnxruntime onnxruntime-extensions sony-custom-layers coverage pytest pytest-mock
 
-      - name: Run torch testsuite
+      - name: Run PyTorch tests (unittest)
         run: |
           source torch_env/bin/activate
-          coverage run --parallel-mode -m --omit "*__init__.py" --include "model_compression_toolkit/**/*.py" unittest tests/test_suite.py -v
+          coverage run --parallel-mode -m --omit "*__init__.py" --include "model_compression_toolkit/**/*.py" unittest discover tests/pytorch_tests -v
 
-      - name: Run torch pytest
+      - name: Run PyTorch tests (pytest)
         run: |
           source torch_env/bin/activate
           coverage run --parallel-mode -m --omit "*__init__.py" --include "model_compression_toolkit/**/*.py" pytest tests_pytest/pytorch

diff --git a/.github/workflows/tests_common.yml b/.github/workflows/tests_common.yml
@@ -23,6 +23,13 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install -r requirements.txt
+          pip install -r requirements.txt 
+          pip install pytest pytest-mock
+          pip check
+
       - name: Run unittests
         run: python -m unittest discover tests/common_tests -v
+
+      - name: Run pytest
+        run:  pytest tests_pytest/common
+
diff --git a/docsrc/images/tpc_diagram.png b/docsrc/images/tpc_diagram.png
diff --git a/docsrc/source/api/api_docs/classes/GradientPTQConfig.rst b/docsrc/source/api/api_docs/classes/GradientPTQConfig.rst
@@ -8,7 +8,7 @@ GradientPTQConfig Class
 =================================
 
 
-**The following API can be used to create a GradientPTQConfig instance which can be used for post training quantization using knowledge distillation from a teacher (float Keras model) to a student (the quantized Keras model)**
+**The following API can be used to create a GradientPTQConfig instance which can be used for post training quantization using knowledge distillation from a teacher (float model) to a student (the quantized model)**
 
 .. autoclass:: model_compression_toolkit.gptq.GradientPTQConfig
     :members:
@@ -30,3 +30,22 @@ RoundingType
 
 .. autoclass:: model_compression_toolkit.gptq.RoundingType
     :members:
+
+
+=====================================
+GradualActivationQuantizationConfig
+=====================================
+
+**The following API can be used to configure the gradual activation quantization when using GPTQ.**
+
+.. autoclass:: model_compression_toolkit.gptq.GradualActivationQuantizationConfig
+    :members:
+
+
+=====================================
+QFractionLinearAnnealingConfig
+=====================================
+
+.. autoclass:: model_compression_toolkit.gptq.QFractionLinearAnnealingConfig
+    :members:
+
diff --git a/docsrc/source/api/api_docs/modules/target_platform_capabilities.rst b/docsrc/source/api/api_docs/modules/target_platform_capabilities.rst
@@ -27,7 +27,7 @@ Models for IMX500, TFLite and qnnpack can be observed `here <https://github.com/
 The object MCT should get called TargetPlatformCapabilities (or shortly TPC).
 This diagram demonstrates the main components:
 
-.. image:: ../../../../images/tpc.jpg
+.. image:: ../../../../images/tpc_diagram.png
   :scale: 80%
 
 Now, we will detail about the different components.

diff --git a/docsrc/source/diagrams/tpc.mermaid b/docsrc/source/diagrams/tpc.mermaid
@@ -0,0 +1,46 @@
+flowchart TB
+    subgraph TPC["Target Platform Capabilities"]
+        subgraph QCO["Quantization Config Options"]
+            Default["Default QCO (8-bit)"]
+            Mixed["Mixed Precision QCO (8, 4, 2 bits)"]
+            NoQuant["No Quantization QCO"]
+
+            subgraph OQC["Op Quantization Config"]
+                OQC1["8-bit"]
+                OQC2["4-bit"]
+                OQC3["2-bit"]
+                AQC["AttributeQuantizationConfig:<br>Kernel, Bias"]
+
+                OQC1 -->|contains| AQC
+                OQC2 -->|contains| AQC
+                OQC3 -->|contains| AQC
+            end
+
+            Mixed -->|contains| OQC1
+            Mixed -->|contains| OQC2
+            Mixed -->|contains| OQC3
+        end
+
+        subgraph OPS["Operators Sets"]
+            Conv["Conv, Conv Transpose,<br>Depthwise Conv"]
+            Act["ReLU, ReLU6,<br>Leaky ReLU, etc."]
+            NoQuantOps["Dropout, Flatten,<br>Reshape, etc."]
+        end
+
+        subgraph FP["Fusing Patterns"]
+            FP1["Conv + Activation"]
+        end
+
+        Mixed -->|attached to| Conv
+        Default -->|attached to| Act
+        NoQuant -->|attached to| NoQuantOps
+
+        FP1 -.-> Conv
+        FP1 -.-> Act
+    end
+
+    style TPC fill:#e6f3ff,stroke:#333
+    style QCO fill:#e6ffe6,stroke:#333
+    style OQC fill:#fff9e6,stroke:#333
+    style OPS fill:#ffe6e6,stroke:#333
+    style FP fill:#ffe6f0,stroke:#333
diff --git a/model_compression_toolkit/__init__.py b/model_compression_toolkit/__init__.py
@@ -27,4 +27,4 @@
 from model_compression_toolkit import pruning
 from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
 
-__version__ = "2.2.0"
+__version__ = "2.3.0"
diff --git a/model_compression_toolkit/core/common/graph/base_node.py b/model_compression_toolkit/core/common/graph/base_node.py
@@ -30,6 +30,9 @@
     FrameworkQuantizationCapabilities
 
 
+WeightAttrT = Union[str, int]
+
+
 class BaseNode:
     """
     Class to represent a node in a graph that represents the model.
@@ -40,7 +43,7 @@ def __init__(self,
                  framework_attr: Dict[str, Any],
                  input_shape: Tuple[Any],
                  output_shape: Tuple[Any],
-                 weights: Dict[Union[str, int], np.ndarray],
+                 weights: Dict[WeightAttrT, np.ndarray],
                  layer_class: type,
                  reuse: bool = False,
                  reuse_group: str = None,
@@ -189,7 +192,7 @@ def is_reused(self) -> bool:
         """
         return self.reuse or self.reuse_group is not None
 
-    def _get_weight_name(self, name: Union[str, int]) -> List[Union[str, int]]:
+    def _get_weight_name(self, name: WeightAttrT) -> List[WeightAttrT]:
         """
         Get weight names that match argument name (either string weights or integer for
         positional weights).
@@ -203,7 +206,7 @@ def _get_weight_name(self, name: Union[str, int]) -> List[Union[str, int]]:
         return [k for k in self.weights.keys()
                 if (isinstance(k, int) and name == k) or (isinstance(k, str) and name in k)]
 
-    def get_weights_by_keys(self, name: Union[str, int]) -> np.ndarray:
+    def get_weights_by_keys(self, name: WeightAttrT) -> np.ndarray:
         """
         Get a node's weight by its name.
         Args:

diff --git a/model_compression_toolkit/core/common/graph/memory_graph/compute_graph_max_cut.py b/model_compression_toolkit/core/common/graph/memory_graph/compute_graph_max_cut.py
@@ -53,9 +53,10 @@ def compute_graph_max_cut(memory_graph: MemoryGraph,
         try:
             schedule, max_cut_size, cuts = max_cut_astar.solve(estimate=estimate, iter_limit=astar_n_iter,
                                                                time_limit=None if it == 0 else 300)
-        except TimeoutError:
+        except TimeoutError:  # pragma: no cover
+            # TODO: add test for this.
             if last_result[0] is None:
-                Logger.critical(f"Max-cut solver stopped on timeout in iteration {it} before finding a solution.")  # pragma: no cover
+                Logger.critical(f"Max-cut solver stopped on timeout in iteration {it} before finding a solution.")
             else:
                 Logger.warning(f"Max-cut solver stopped on timeout in iteration {it}.")
                 return last_result

diff --git a/model_compression_toolkit/core/common/graph/memory_graph/max_cut_astar.py b/model_compression_toolkit/core/common/graph/memory_graph/max_cut_astar.py
@@ -151,7 +151,8 @@ def solve(self, estimate: float, iter_limit: int = 500, time_limit: int = None)
         t1 = time()
         while expansion_count < iter_limit and len(open_list) > 0:
             if time_limit is not None and time() - t1 > time_limit:
-                raise TimeoutError
+                # TODO: add test for this.
+                raise TimeoutError  # pragma: no cover
             # Choose next node to expand
             next_cut = self._get_cut_to_expand(open_list, costs, routes, estimate)
 

diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py
@@ -189,9 +189,11 @@ def compute_resource_utilization_for_config(self, config: List[int]) -> Resource
 
         """
         act_qcs, w_qcs = self.ru_helper.get_quantization_candidates(config)
+        act_qcs = None if (RUTarget.ACTIVATION not in self.ru_targets_to_compute and RUTarget.TOTAL not in self.ru_targets_to_compute) else act_qcs
+        w_qcs = None if (RUTarget.WEIGHTS not in self.ru_targets_to_compute and RUTarget.TOTAL not in self.ru_targets_to_compute) else w_qcs
         ru = self.ru_helper.ru_calculator.compute_resource_utilization(
             target_criterion=TargetInclusionCriterion.AnyQuantized, bitwidth_mode=BitwidthMode.QCustom, act_qcs=act_qcs,
-            w_qcs=w_qcs)
+            w_qcs=w_qcs, ru_targets=self.ru_targets_to_compute)
         return ru
 
     def finalize_distance_metric(self, layer_to_metrics_mapping: Dict[int, Dict[int, float]]):