Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use Attach TPC to Framework as part of MCT main track #1308

Merged
merged 44 commits into from
Jan 6, 2025
Merged
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
0af006d
update all TP models to the new schema and removed pytorch fw tpcs
ofirgo Dec 25, 2024
ca7c104
Refactor Target Platform Capabilities - Phase 4
Dec 25, 2024
20fa29f
Add pydantic to requirements file
Dec 25, 2024
33dbe52
Modify pytorch API with attach2fw.
ofirgo Dec 26, 2024
53fe1d8
All Pytorch function tests pass except few special cases to solve later.
ofirgo Dec 26, 2024
80f0d56
Aligned all pytorch APIs, all other pytorch tests pass successfully
ofirgo Dec 26, 2024
b3d4125
merge main into branch
ofirgo Dec 26, 2024
266ebb3
merge tpc phase 4 branch changes
ofirgo Dec 26, 2024
7760dc2
Modify Keras APIs and TPCs and removed fw TPCs
ofirgo Dec 26, 2024
349e68c
finised tpcs modifications to start running keras tests
ofirgo Dec 26, 2024
1439e8a
Move existing tp models to tests
ofirgo Dec 30, 2024
f129054
All keras feature tests pass except some related to const and 16 bit
ofirgo Dec 30, 2024
edcaecd
most of function tests pass except weired issue and 16bit tests
ofirgo Dec 30, 2024
c68f477
All other keras tests pass except one in exporter that requires speci…
ofirgo Dec 30, 2024
d7a4526
All torch tests pass except tp model tests
ofirgo Dec 30, 2024
530e40f
All keras tests pass except tp model tests and some weired maybe loca…
ofirgo Dec 30, 2024
b5285f4
Fix some keras tests
ofirgo Dec 31, 2024
93224a1
- extend opset fw mapping
ofirgo Dec 31, 2024
6e13aa0
verify default tpc version
ofirgo Dec 31, 2024
417a3d3
fix pytorch custom layer opset
ofirgo Dec 31, 2024
1c909b9
merge main
ofirgo Jan 1, 2025
b89ca55
minor fixes
ofirgo Jan 1, 2025
3dfad7f
align tflie and qnnpack
ofirgo Jan 1, 2025
e6f2ec5
fixes to built-in opset
ofirgo Jan 1, 2025
905b190
fix tests
ofirgo Jan 1, 2025
d0e6fd6
fix import
ofirgo Jan 1, 2025
42be1cb
fix keras input layer import in tests
ofirgo Jan 1, 2025
683d95a
fix tests
ofirgo Jan 1, 2025
8b4503e
fix some tests
ofirgo Jan 2, 2025
290ef92
remove tpc from graph object saved in the output pytorch model
ofirgo Jan 2, 2025
1d6f511
Merge branch 'remove-tpc-from-torch-model' into use-attach2fw
ofirgo Jan 2, 2025
34985e9
merge fix to export test
ofirgo Jan 2, 2025
20e25a0
one more test fix
ofirgo Jan 2, 2025
cebef33
fix import in keras tests
ofirgo Jan 2, 2025
620abd1
fix import in keras tests
ofirgo Jan 5, 2025
7dcedd2
fix const quantization test tpc
ofirgo Jan 5, 2025
151522d
change enum to str-enum and remove "value" in usage
ofirgo Jan 5, 2025
37c09bb
fix PR comments and modify opset names enum (change to str enum and r…
ofirgo Jan 5, 2025
26ae10e
align tests and use CustomOpsetLayers
ofirgo Jan 5, 2025
c280dd8
align tests and use CustomOpsetLayers
ofirgo Jan 5, 2025
c2016ea
align tests and use CustomOpsetLayers
ofirgo Jan 6, 2025
8040d69
minor last fixes
ofirgo Jan 6, 2025
7e757b7
minor last fixes
ofirgo Jan 6, 2025
51f8470
minor last fixes
ofirgo Jan 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@
# limitations under the License.
# ==============================================================================

from dataclasses import dataclass, field
from dataclasses import dataclass
import math
from enum import Enum
from typing import Optional, Dict, Any, Tuple, List

from model_compression_toolkit import DefaultDict
from model_compression_toolkit.constants import MIN_THRESHOLD


Expand Down Expand Up @@ -86,6 +88,7 @@ class QuantizationConfig:
concat_threshold_update: bool = False
activation_bias_correction: bool = False
activation_bias_correction_threshold: float = 0.0
custom_tpc_opset_to_layer: Optional[Dict[str, Tuple[List[Any], Optional[Dict[str, DefaultDict]]]]] = None
ofirgo marked this conversation as resolved.
Show resolved Hide resolved


# Default quantization configuration the library use.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization
from model_compression_toolkit.logger import Logger
from model_compression_toolkit.constants import TENSORFLOW
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel
from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization_data import compute_resource_utilization_data
from model_compression_toolkit.verify_packages import FOUND_TF
Expand All @@ -27,6 +28,8 @@
from model_compression_toolkit.core.keras.default_framework_info import DEFAULT_KERAS_INFO
from model_compression_toolkit.core.keras.keras_implementation import KerasImplementation
from tensorflow.keras.models import Model
from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework.attach2keras import \
AttachTpcToKeras

from model_compression_toolkit import get_target_platform_capabilities

Expand All @@ -36,7 +39,7 @@ def keras_resource_utilization_data(in_model: Model,
representative_data_gen: Callable,
core_config: CoreConfig = CoreConfig(
mixed_precision_config=MixedPrecisionQuantizationConfig()),
target_platform_capabilities: TargetPlatformCapabilities = KERAS_DEFAULT_TPC) -> ResourceUtilization:
target_platform_capabilities: TargetPlatformModel = KERAS_DEFAULT_TPC) -> ResourceUtilization:
"""
Computes resource utilization data that can be used to calculate the desired target resource utilization
for mixed-precision quantization.
Expand Down Expand Up @@ -78,6 +81,12 @@ def keras_resource_utilization_data(in_model: Model,

fw_impl = KerasImplementation()

# Attach tpc model to framework
attach2keras = AttachTpcToKeras()
ofirgo marked this conversation as resolved.
Show resolved Hide resolved
target_platform_capabilities = attach2keras.attach(
target_platform_capabilities,
custom_opset2layer=core_config.quantization_config.custom_tpc_opset_to_layer)

return compute_resource_utilization_data(in_model,
representative_data_gen,
core_config,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,21 @@

from model_compression_toolkit.logger import Logger
from model_compression_toolkit.constants import PYTORCH
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel
from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization
from model_compression_toolkit.core.common.framework_info import FrameworkInfo
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization_data import compute_resource_utilization_data
from model_compression_toolkit.core.common.quantization.core_config import CoreConfig
from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import MixedPrecisionQuantizationConfig
from model_compression_toolkit.target_platform_capabilities.constants import DEFAULT_TP_MODEL
from model_compression_toolkit.verify_packages import FOUND_TORCH

if FOUND_TORCH:
from model_compression_toolkit.core.pytorch.default_framework_info import DEFAULT_PYTORCH_INFO
from model_compression_toolkit.core.pytorch.pytorch_implementation import PytorchImplementation
from model_compression_toolkit.target_platform_capabilities.constants import DEFAULT_TP_MODEL
from torch.nn import Module
from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework.attach2pytorch import \
AttachTpcToPytorch

from model_compression_toolkit import get_target_platform_capabilities

Expand All @@ -39,7 +41,7 @@
def pytorch_resource_utilization_data(in_model: Module,
representative_data_gen: Callable,
core_config: CoreConfig = CoreConfig(),
target_platform_capabilities: TargetPlatformCapabilities = PYTORCH_DEFAULT_TPC
target_platform_capabilities: TargetPlatformModel= PYTORCH_DEFAULT_TPC
) -> ResourceUtilization:
"""
Computes resource utilization data that can be used to calculate the desired target resource utilization for mixed-precision quantization.
Expand Down Expand Up @@ -80,6 +82,12 @@ def pytorch_resource_utilization_data(in_model: Module,

fw_impl = PytorchImplementation()

# Attach tpc model to framework
attach2pytorch = AttachTpcToPytorch()
ofirgo marked this conversation as resolved.
Show resolved Hide resolved
target_platform_capabilities = (
attach2pytorch.attach(target_platform_capabilities,
custom_opset2layer=core_config.quantization_config.custom_tpc_opset_to_layer))

return compute_resource_utilization_data(in_model,
representative_data_gen,
core_config,
Expand Down
11 changes: 10 additions & 1 deletion model_compression_toolkit/gptq/keras/quantization_facade.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
LR_BIAS_DEFAULT, GPTQ_MOMENTUM, REG_DEFAULT_SLA
from model_compression_toolkit.logger import Logger
from model_compression_toolkit.constants import TENSORFLOW, ACT_HESSIAN_DEFAULT_BATCH_SIZE, GPTQ_HESSIAN_NUM_SAMPLES
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel
from model_compression_toolkit.verify_packages import FOUND_TF
from model_compression_toolkit.core.common.user_info import UserInformation
from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig, GPTQHessianScoresConfig, \
Expand All @@ -47,6 +48,8 @@
from model_compression_toolkit.exporter.model_wrapper import get_exportable_keras_model
from model_compression_toolkit import get_target_platform_capabilities
from mct_quantizers.keras.metadata import add_metadata
from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework.attach2keras import \
AttachTpcToKeras

# As from TF2.9 optimizers package is changed
if version.parse(tf.__version__) < version.parse("2.9"):
Expand Down Expand Up @@ -152,7 +155,7 @@ def keras_gradient_post_training_quantization(in_model: Model, representative_da
gptq_representative_data_gen: Callable = None,
target_resource_utilization: ResourceUtilization = None,
core_config: CoreConfig = CoreConfig(),
target_platform_capabilities: TargetPlatformCapabilities = DEFAULT_KERAS_TPC) -> Tuple[Model, UserInformation]:
target_platform_capabilities: TargetPlatformModel = DEFAULT_KERAS_TPC) -> Tuple[Model, UserInformation]:
"""
Quantize a trained Keras model using post-training quantization. The model is quantized using a
symmetric constraint quantization thresholds (power of two).
Expand Down Expand Up @@ -237,6 +240,12 @@ def keras_gradient_post_training_quantization(in_model: Model, representative_da

fw_impl = GPTQKerasImplemantation()

# Attach tpc model to framework
attach2keras = AttachTpcToKeras()
target_platform_capabilities = attach2keras.attach(
target_platform_capabilities,
custom_opset2layer=core_config.quantization_config.custom_tpc_opset_to_layer)

tg, bit_widths_config, hessian_info_service, scheduling_info = core_runner(in_model=in_model,
representative_data_gen=representative_data_gen,
core_config=core_config,
Expand Down
11 changes: 10 additions & 1 deletion model_compression_toolkit/gptq/pytorch/quantization_facade.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from model_compression_toolkit.gptq.runner import gptq_runner
from model_compression_toolkit.logger import Logger
from model_compression_toolkit.metadata import create_model_metadata
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel
from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities
from model_compression_toolkit.verify_packages import FOUND_TORCH

Expand All @@ -47,6 +48,9 @@
from torch.optim import Adam, Optimizer
from model_compression_toolkit import get_target_platform_capabilities
from mct_quantizers.pytorch.metadata import add_metadata
from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework.attach2pytorch import \
AttachTpcToPytorch

DEFAULT_PYTORCH_TPC = get_target_platform_capabilities(PYTORCH, DEFAULT_TP_MODEL)

def get_pytorch_gptq_config(n_epochs: int,
Expand Down Expand Up @@ -140,7 +144,7 @@ def pytorch_gradient_post_training_quantization(model: Module,
core_config: CoreConfig = CoreConfig(),
gptq_config: GradientPTQConfig = None,
gptq_representative_data_gen: Callable = None,
target_platform_capabilities: TargetPlatformCapabilities = DEFAULT_PYTORCH_TPC):
target_platform_capabilities: TargetPlatformModel = DEFAULT_PYTORCH_TPC):
"""
Quantize a trained Pytorch module using post-training quantization.
By default, the module is quantized using a symmetric constraint quantization thresholds
Expand Down Expand Up @@ -209,6 +213,11 @@ def pytorch_gradient_post_training_quantization(model: Module,

fw_impl = GPTQPytorchImplemantation()

# Attach tpc model to framework
attach2pytorch = AttachTpcToPytorch()
ofirgo marked this conversation as resolved.
Show resolved Hide resolved
target_platform_capabilities = attach2pytorch.attach(target_platform_capabilities,
core_config.quantization_config.custom_tpc_opset_to_layer)

# ---------------------- #
# Core Runner
# ---------------------- #
Expand Down
10 changes: 8 additions & 2 deletions model_compression_toolkit/pruning/keras/pruning_facade.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@

from model_compression_toolkit import get_target_platform_capabilities
from model_compression_toolkit.constants import TENSORFLOW
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel
from model_compression_toolkit.verify_packages import FOUND_TF
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization
from model_compression_toolkit.core.common.pruning.pruner import Pruner
from model_compression_toolkit.core.common.pruning.pruning_config import PruningConfig
from model_compression_toolkit.core.common.pruning.pruning_info import PruningInfo
from model_compression_toolkit.core.common.quantization.bit_width_config import BitWidthConfig
from model_compression_toolkit.core.common.quantization.set_node_quantization_config import set_quantization_configuration_to_graph
from model_compression_toolkit.core.graph_prep_runner import read_model_to_graph
from model_compression_toolkit.logger import Logger
Expand All @@ -35,14 +35,16 @@
from model_compression_toolkit.core.keras.pruning.pruning_keras_implementation import PruningKerasImplementation
from model_compression_toolkit.core.keras.default_framework_info import DEFAULT_KERAS_INFO
from tensorflow.keras.models import Model
from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework.attach2keras import \
AttachTpcToKeras

DEFAULT_KERAS_TPC = get_target_platform_capabilities(TENSORFLOW, DEFAULT_TP_MODEL)

def keras_pruning_experimental(model: Model,
target_resource_utilization: ResourceUtilization,
representative_data_gen: Callable,
pruning_config: PruningConfig = PruningConfig(),
target_platform_capabilities: TargetPlatformCapabilities = DEFAULT_KERAS_TPC) -> Tuple[Model, PruningInfo]:
target_platform_capabilities: TargetPlatformModel = DEFAULT_KERAS_TPC) -> Tuple[Model, PruningInfo]:
"""
Perform structured pruning on a Keras model to meet a specified target resource utilization.
This function prunes the provided model according to the target resource utilization by grouping and pruning
Expand Down Expand Up @@ -111,6 +113,10 @@ def keras_pruning_experimental(model: Model,
# Instantiate the Keras framework implementation.
fw_impl = PruningKerasImplementation()

# Attach tpc model to framework
attach2keras = AttachTpcToKeras()
target_platform_capabilities = attach2keras.attach(target_platform_capabilities)

# Convert the original Keras model to an internal graph representation.
float_graph = read_model_to_graph(model,
representative_data_gen,
Expand Down
10 changes: 8 additions & 2 deletions model_compression_toolkit/pruning/pytorch/pruning_facade.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@
from typing import Callable, Tuple
from model_compression_toolkit import get_target_platform_capabilities
from model_compression_toolkit.constants import PYTORCH
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel
from model_compression_toolkit.verify_packages import FOUND_TORCH
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization
from model_compression_toolkit.core.common.pruning.pruner import Pruner
from model_compression_toolkit.core.common.pruning.pruning_config import PruningConfig
from model_compression_toolkit.core.common.pruning.pruning_info import PruningInfo
from model_compression_toolkit.core.common.quantization.bit_width_config import BitWidthConfig
from model_compression_toolkit.core.common.quantization.set_node_quantization_config import set_quantization_configuration_to_graph
from model_compression_toolkit.core.graph_prep_runner import read_model_to_graph
from model_compression_toolkit.logger import Logger
Expand All @@ -38,6 +38,8 @@
PruningPytorchImplementation
from model_compression_toolkit.core.pytorch.default_framework_info import DEFAULT_PYTORCH_INFO
from torch.nn import Module
from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework.attach2pytorch import \
AttachTpcToPytorch

# Set the default Target Platform Capabilities (TPC) for PyTorch.
DEFAULT_PYOTRCH_TPC = get_target_platform_capabilities(PYTORCH, DEFAULT_TP_MODEL)
Expand All @@ -46,7 +48,7 @@ def pytorch_pruning_experimental(model: Module,
target_resource_utilization: ResourceUtilization,
representative_data_gen: Callable,
pruning_config: PruningConfig = PruningConfig(),
target_platform_capabilities: TargetPlatformCapabilities = DEFAULT_PYOTRCH_TPC) -> \
target_platform_capabilities: TargetPlatformModel = DEFAULT_PYOTRCH_TPC) -> \
Tuple[Module, PruningInfo]:
"""
Perform structured pruning on a Pytorch model to meet a specified target resource utilization.
Expand Down Expand Up @@ -117,6 +119,10 @@ def pytorch_pruning_experimental(model: Module,
# Instantiate the Pytorch framework implementation.
fw_impl = PruningPytorchImplementation()

# Attach TPC to framework
attach2pytorch = AttachTpcToPytorch()
target_platform_capabilities = attach2pytorch.attach(target_platform_capabilities)

# Convert the original Pytorch model to an internal graph representation.
float_graph = read_model_to_graph(model,
representative_data_gen,
Expand Down
11 changes: 10 additions & 1 deletion model_compression_toolkit/ptq/keras/quantization_facade.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from model_compression_toolkit.core.common.visualization.tensorboard_writer import init_tensorboard_writer
from model_compression_toolkit.logger import Logger
from model_compression_toolkit.constants import TENSORFLOW
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel
from model_compression_toolkit.verify_packages import FOUND_TF
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization
from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import \
Expand All @@ -41,14 +42,17 @@

from model_compression_toolkit import get_target_platform_capabilities
from mct_quantizers.keras.metadata import add_metadata
from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework.attach2keras import \
AttachTpcToKeras

DEFAULT_KERAS_TPC = get_target_platform_capabilities(TENSORFLOW, DEFAULT_TP_MODEL)


def keras_post_training_quantization(in_model: Model,
representative_data_gen: Callable,
target_resource_utilization: ResourceUtilization = None,
core_config: CoreConfig = CoreConfig(),
target_platform_capabilities: TargetPlatformCapabilities = DEFAULT_KERAS_TPC):
target_platform_capabilities: TargetPlatformModel = DEFAULT_KERAS_TPC):
"""
Quantize a trained Keras model using post-training quantization. The model is quantized using a
symmetric constraint quantization thresholds (power of two).
Expand Down Expand Up @@ -134,6 +138,11 @@ def keras_post_training_quantization(in_model: Model,

fw_impl = KerasImplementation()

attach2keras = AttachTpcToKeras()
target_platform_capabilities = attach2keras.attach(
target_platform_capabilities,
custom_opset2layer=core_config.quantization_config.custom_tpc_opset_to_layer)

# Ignore returned hessian service as PTQ does not use it
tg, bit_widths_config, _, scheduling_info = core_runner(in_model=in_model,
representative_data_gen=representative_data_gen,
Expand Down
Loading
Loading