|  | 
|  | 1 | +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. | 
|  | 2 | +# | 
|  | 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | 4 | +# you may not use this file except in compliance with the License. | 
|  | 5 | +# You may obtain a copy of the License at | 
|  | 6 | +# | 
|  | 7 | +#    http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 8 | +# | 
|  | 9 | +# Unless required by applicable law or agreed to in writing, | 
|  | 10 | +# software distributed under the License is distributed on an "AS IS" BASIS, | 
|  | 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | 12 | +# See the License for the specific language governing permissions and | 
|  | 13 | +# limitations under the License. | 
|  | 14 | + | 
|  | 15 | +""" | 
|  | 16 | +End-to-end tests for asymmetric quantization with zero-point decompression. | 
|  | 17 | +""" | 
|  | 18 | + | 
|  | 19 | +import pytest | 
|  | 20 | +import torch | 
|  | 21 | +from compressed_tensors.compressors.model_compressors.model_compressor import ( | 
|  | 22 | +    ModelCompressor, | 
|  | 23 | +) | 
|  | 24 | +from compressed_tensors.config import CompressionFormat | 
|  | 25 | +from compressed_tensors.quantization import ( | 
|  | 26 | +    QuantizationArgs, | 
|  | 27 | +    QuantizationConfig, | 
|  | 28 | +    QuantizationScheme, | 
|  | 29 | +    QuantizationStrategy, | 
|  | 30 | +    apply_quantization_config, | 
|  | 31 | +) | 
|  | 32 | +from torch.nn import Linear, Module | 
|  | 33 | + | 
|  | 34 | + | 
|  | 35 | +class SimpleModel(Module): | 
|  | 36 | +    """Simple model for testing""" | 
|  | 37 | + | 
|  | 38 | +    def __init__(self, input_dim=512, hidden_dim=256, output_dim=128): | 
|  | 39 | +        super().__init__() | 
|  | 40 | +        self.layer1 = Linear(input_dim, hidden_dim, bias=False) | 
|  | 41 | +        self.layer2 = Linear(hidden_dim, output_dim, bias=False) | 
|  | 42 | + | 
|  | 43 | +    def forward(self, x): | 
|  | 44 | +        x = self.layer1(x) | 
|  | 45 | +        x = torch.relu(x) | 
|  | 46 | +        x = self.layer2(x) | 
|  | 47 | +        return x | 
|  | 48 | + | 
|  | 49 | + | 
|  | 50 | +def create_asymmetric_quant_config( | 
|  | 51 | +    num_bits=4, strategy=QuantizationStrategy.GROUP, group_size=128 | 
|  | 52 | +) -> QuantizationConfig: | 
|  | 53 | +    """Create an asymmetric quantization config""" | 
|  | 54 | +    config_groups = { | 
|  | 55 | +        "group_1": QuantizationScheme( | 
|  | 56 | +            targets=["Linear"], | 
|  | 57 | +            weights=QuantizationArgs( | 
|  | 58 | +                num_bits=num_bits, | 
|  | 59 | +                strategy=strategy.value, | 
|  | 60 | +                group_size=( | 
|  | 61 | +                    group_size if strategy == QuantizationStrategy.GROUP else None | 
|  | 62 | +                ), | 
|  | 63 | +                symmetric=False, | 
|  | 64 | +            ), | 
|  | 65 | +        ), | 
|  | 66 | +    } | 
|  | 67 | +    return QuantizationConfig(config_groups=config_groups) | 
|  | 68 | + | 
|  | 69 | + | 
|  | 70 | +@pytest.mark.parametrize( | 
|  | 71 | +    "strategy,group_size", | 
|  | 72 | +    [ | 
|  | 73 | +        (QuantizationStrategy.GROUP, 128), | 
|  | 74 | +        (QuantizationStrategy.CHANNEL, None), | 
|  | 75 | +    ], | 
|  | 76 | +) | 
|  | 77 | +def test_end_to_end_asymmetric_quantization( | 
|  | 78 | +    strategy, | 
|  | 79 | +    group_size, | 
|  | 80 | +    mock_per_group_calibration, | 
|  | 81 | +    mock_per_channel_calibration, | 
|  | 82 | +): | 
|  | 83 | +    """ | 
|  | 84 | +    Test end-to-end workflow: quantize -> compress -> decompress in memory | 
|  | 85 | +    """ | 
|  | 86 | +    model = SimpleModel() | 
|  | 87 | +    original_weights = { | 
|  | 88 | +        "layer1": model.layer1.weight.detach().clone(), | 
|  | 89 | +        "layer2": model.layer2.weight.detach().clone(), | 
|  | 90 | +    } | 
|  | 91 | + | 
|  | 92 | +    quant_config = create_asymmetric_quant_config( | 
|  | 93 | +        num_bits=4, strategy=strategy, group_size=group_size | 
|  | 94 | +    ) | 
|  | 95 | +    # Set pack-quantized format for ModelCompressor usage | 
|  | 96 | +    quant_config.format = CompressionFormat.pack_quantized.value | 
|  | 97 | +    apply_quantization_config(model, quant_config) | 
|  | 98 | + | 
|  | 99 | +    if strategy == QuantizationStrategy.GROUP: | 
|  | 100 | +        mock_per_group_calibration( | 
|  | 101 | +            model.layer1, "weight", model.layer1.weight, group_size | 
|  | 102 | +        ) | 
|  | 103 | +        mock_per_group_calibration( | 
|  | 104 | +            model.layer2, "weight", model.layer2.weight, group_size | 
|  | 105 | +        ) | 
|  | 106 | +    else: | 
|  | 107 | +        mock_per_channel_calibration(model.layer1, "weight", model.layer1.weight) | 
|  | 108 | +        mock_per_channel_calibration(model.layer2, "weight", model.layer2.weight) | 
|  | 109 | + | 
|  | 110 | +    # Compress and decompress in memory using ModelCompressor | 
|  | 111 | +    mc = ModelCompressor(quantization_config=quant_config) | 
|  | 112 | +    mc.compress_model(model) | 
|  | 113 | + | 
|  | 114 | +    # Verify compression created zero-point parameters | 
|  | 115 | +    assert hasattr(model.layer1, "weight_zero_point") | 
|  | 116 | +    assert hasattr(model.layer2, "weight_zero_point") | 
|  | 117 | +    assert model.layer1.weight_zero_point.dtype == torch.int32 | 
|  | 118 | +    assert model.layer2.weight_zero_point.dtype == torch.int32 | 
|  | 119 | + | 
|  | 120 | +    # Decompress in memory | 
|  | 121 | +    mc.decompress_model(model) | 
|  | 122 | + | 
|  | 123 | +    # Verify decompression restored weights correctly | 
|  | 124 | +    assert model.layer1.weight.shape == original_weights["layer1"].shape | 
|  | 125 | +    assert model.layer2.weight.shape == original_weights["layer2"].shape | 
|  | 126 | +    assert model.layer1.weight.dtype.is_floating_point | 
|  | 127 | +    assert model.layer2.weight.dtype.is_floating_point | 
|  | 128 | +    assert not torch.isnan(model.layer1.weight).any() | 
|  | 129 | +    assert not torch.isnan(model.layer2.weight).any() | 
|  | 130 | +    assert not torch.isinf(model.layer1.weight).any() | 
|  | 131 | +    assert not torch.isinf(model.layer2.weight).any() | 
|  | 132 | + | 
|  | 133 | + | 
|  | 134 | +@pytest.mark.parametrize("num_bits", [4, 8]) | 
|  | 135 | +def test_asymmetric_quantization_accuracy(num_bits, mock_per_group_calibration): | 
|  | 136 | +    """ | 
|  | 137 | +    Test that asymmetric quantization with zero-point preserves accuracy better | 
|  | 138 | +    than symmetric quantization for biased weight distributions. | 
|  | 139 | +    """ | 
|  | 140 | +    shape = (256, 512) | 
|  | 141 | +    biased_weights = torch.randn(shape) + 2.0 | 
|  | 142 | + | 
|  | 143 | +    quant_config = create_asymmetric_quant_config( | 
|  | 144 | +        num_bits=num_bits, | 
|  | 145 | +        strategy=QuantizationStrategy.GROUP, | 
|  | 146 | +        group_size=128, | 
|  | 147 | +    ) | 
|  | 148 | +    quant_config.format = CompressionFormat.pack_quantized.value | 
|  | 149 | + | 
|  | 150 | +    class SingleLayer(Module): | 
|  | 151 | +        def __init__(self): | 
|  | 152 | +            super().__init__() | 
|  | 153 | +            self.layer = Linear(shape[1], shape[0], bias=False) | 
|  | 154 | + | 
|  | 155 | +    model = SingleLayer() | 
|  | 156 | +    apply_quantization_config(model, quant_config) | 
|  | 157 | + | 
|  | 158 | +    with torch.no_grad(): | 
|  | 159 | +        model.layer.weight.copy_(biased_weights) | 
|  | 160 | +    mock_per_group_calibration(model.layer, "weight", model.layer.weight, 128) | 
|  | 161 | + | 
|  | 162 | +    # Compress and decompress in memory using ModelCompressor | 
|  | 163 | +    mc = ModelCompressor(quantization_config=quant_config) | 
|  | 164 | +    mc.compress_model(model) | 
|  | 165 | +    mc.decompress_model(model) | 
|  | 166 | + | 
|  | 167 | +    decompressed_weights = model.layer.weight | 
|  | 168 | +    assert decompressed_weights.shape == shape | 
|  | 169 | +    assert not torch.isnan(decompressed_weights).any() | 
|  | 170 | +    assert not torch.isinf(decompressed_weights).any() | 
|  | 171 | +    threshold = torch.std(torch.rand(shape) - torch.rand(shape)) | 
|  | 172 | +    assert torch.std(biased_weights - decompressed_weights) < threshold | 
0 commit comments