diff --git a/model_compression_toolkit/core/common/hessian/hessian_info_service.py b/model_compression_toolkit/core/common/hessian/hessian_info_service.py index dd6106fac..9facd8afb 100644 --- a/model_compression_toolkit/core/common/hessian/hessian_info_service.py +++ b/model_compression_toolkit/core/common/hessian/hessian_info_service.py @@ -24,7 +24,7 @@ HessianScoresGranularity, HessianMode from model_compression_toolkit.logger import Logger if TYPE_CHECKING: - from model_compression_toolkit.core.common import BaseNode + from model_compression_toolkit.core.common import BaseNode # pragma: no cover class HessianInfoService: @@ -251,9 +251,9 @@ def compute_trackable_per_sample_hessian(self, hessian_score_by_image_hash = {} if not isinstance(inputs_batch, list): - raise TypeError('Expected a list of inputs') + raise TypeError('Expected a list of inputs') # pragma: no cover if len(inputs_batch) > 1: - raise NotImplementedError('Per-sample hessian computation is not supported for networks with multiple inputs') + raise NotImplementedError('Per-sample hessian computation is not supported for networks with multiple inputs') # pragma: no cover # Get the framework-specific calculator Hessian-approximation scores fw_hessian_calculator = self.fw_impl.get_hessian_scores_calculator(graph=self.graph, diff --git a/model_compression_toolkit/core/pytorch/hessian/activation_hessian_scores_calculator_pytorch.py b/model_compression_toolkit/core/pytorch/hessian/activation_hessian_scores_calculator_pytorch.py index c79af62ac..d596f8994 100644 --- a/model_compression_toolkit/core/pytorch/hessian/activation_hessian_scores_calculator_pytorch.py +++ b/model_compression_toolkit/core/pytorch/hessian/activation_hessian_scores_calculator_pytorch.py @@ -129,7 +129,7 @@ def compute(self) -> List[np.ndarray]: elif self.hessian_request.granularity == HessianScoresGranularity.PER_OUTPUT_CHANNEL: hessian_scores = self._compute_per_channel(output, target_activation_tensors) else: - raise NotImplementedError(f'{self.hessian_request.granularity} is not supported') + raise NotImplementedError(f'{self.hessian_request.granularity} is not supported') # pragma: no cover # Convert results to list of numpy arrays hessian_results = [torch_tensor_to_numpy(h) for h in hessian_scores] diff --git a/model_compression_toolkit/gptq/pytorch/quantization_facade.py b/model_compression_toolkit/gptq/pytorch/quantization_facade.py index 8022a5552..e065460fa 100644 --- a/model_compression_toolkit/gptq/pytorch/quantization_facade.py +++ b/model_compression_toolkit/gptq/pytorch/quantization_facade.py @@ -110,7 +110,8 @@ def get_pytorch_gptq_config(n_epochs: int, if use_hessian_sample_attention: if not use_hessian_based_weights: - raise ValueError('use_hessian_based_weights must be set to True in order to use Sample Layer Attention.') + raise ValueError('use_hessian_based_weights must be set to True in order to use Sample Layer Attention.') # pragma: no cover + hessian_weights_config = GPTQHessianScoresConfig( hessians_num_samples=None, norm_scores=False, diff --git a/model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/soft_quantizer_reg.py b/model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/soft_quantizer_reg.py index c8117e002..48645f896 100644 --- a/model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/soft_quantizer_reg.py +++ b/model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/soft_quantizer_reg.py @@ -56,7 +56,7 @@ def __call__(self, model: nn.Module, entropy_reg: float, layer_weights: torch.Te if layer_weights is None: layer_weights = torch.ones((len(layers),)) if len(layer_weights.shape) != 1 or layer_weights.shape[0] != len(layers): - raise ValueError(f'Expected weights to be a vector of length {len(layers)}, received {layer_weights.shape}.') + raise ValueError(f'Expected weights to be a vector of length {len(layers)}, received {layer_weights.shape}.') # pragma: no cover max_w = layer_weights.max() b = self.beta_scheduler(self.count_iter) diff --git a/tests/pytorch_tests/model_tests/feature_models/gptq_test.py b/tests/pytorch_tests/model_tests/feature_models/gptq_test.py index 552fff89c..b1ec5e2d0 100644 --- a/tests/pytorch_tests/model_tests/feature_models/gptq_test.py +++ b/tests/pytorch_tests/model_tests/feature_models/gptq_test.py @@ -21,6 +21,7 @@ import mct_quantizers from model_compression_toolkit import DefaultDict from model_compression_toolkit.constants import GPTQ_HESSIAN_NUM_SAMPLES +from model_compression_toolkit.core.common.hessian import HessianEstimationDistribution from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod from model_compression_toolkit.gptq.common.gptq_constants import QUANT_PARAM_LEARNING_STR, MAX_LSB_STR from tests.pytorch_tests.model_tests.base_pytorch_feature_test import BasePytorchFeatureNetworkTest @@ -59,7 +60,7 @@ def __init__(self, unit_test, weights_bits=8, weights_quant_method=QuantizationM hessian_weights=True, norm_scores=True, log_norm_weights=True, scaled_log_norm=False, params_learning=True, num_calibration_iter=GPTQ_HESSIAN_NUM_SAMPLES, gradual_activation_quantization=False, hessian_num_samples=GPTQ_HESSIAN_NUM_SAMPLES, sample_layer_attention=False, - loss=multiple_tensors_mse_loss, hessian_batch_size=1): + loss=multiple_tensors_mse_loss, hessian_batch_size=1, estimator_distribution=HessianEstimationDistribution.GAUSSIAN): super().__init__(unit_test, input_shape=(3, 16, 16), num_calibration_iter=num_calibration_iter) self.seed = 0 self.rounding_type = rounding_type @@ -78,6 +79,7 @@ def __init__(self, unit_test, weights_bits=8, weights_quant_method=QuantizationM self.sample_layer_attention = sample_layer_attention self.loss = loss self.hessian_batch_size = hessian_batch_size + self.estimator_distribution = estimator_distribution def get_quantization_config(self): return mct.core.QuantizationConfig(mct.core.QuantizationErrorMethod.NOCLIPPING, @@ -154,7 +156,8 @@ def get_gptq_config(self): norm_scores=self.norm_scores, per_sample=self.sample_layer_attention, hessians_num_samples=self.hessian_num_samples, - hessian_batch_size=self.hessian_batch_size), + hessian_batch_size=self.hessian_batch_size, + estimator_distribution=self.estimator_distribution), gptq_quantizer_params_override=self.override_params, diff --git a/tests/pytorch_tests/model_tests/test_feature_models_runner.py b/tests/pytorch_tests/model_tests/test_feature_models_runner.py index 29e228d9b..8eface3cd 100644 --- a/tests/pytorch_tests/model_tests/test_feature_models_runner.py +++ b/tests/pytorch_tests/model_tests/test_feature_models_runner.py @@ -21,6 +21,7 @@ import torch from torch import nn import model_compression_toolkit as mct +from model_compression_toolkit.core.common.hessian import HessianEstimationDistribution from model_compression_toolkit.core.common.mixed_precision.distance_weighting import MpDistanceWeighting from model_compression_toolkit.core.common.network_editors import NodeTypeFilter, NodeNameFilter from model_compression_toolkit.gptq.common.gptq_config import RoundingType @@ -658,6 +659,7 @@ def test_gptq_with_gradual_activation(self): def test_gptq_with_sample_layer_attention(self): kwargs = dict(sample_layer_attention=True, loss=sample_layer_attention_loss, hessian_weights=True, hessian_num_samples=None, + estimator_distribution=HessianEstimationDistribution.RADEMACHER, norm_scores=False, log_norm_weights=False, scaled_log_norm=False) GPTQAccuracyTest(self, **kwargs).run_test() GPTQAccuracyTest(self, hessian_batch_size=16, rounding_type=RoundingType.SoftQuantizer, **kwargs).run_test()