|
97 | 97 | to_weight_tensor_with_linear_activation_quantization_metadata, |
98 | 98 | ) |
99 | 99 | from torchao.utils import ( |
100 | | - _ConfigDeprecationWrapper, |
101 | 100 | is_MI300, |
102 | 101 | is_sm_at_least_89, |
103 | 102 | is_sm_at_least_90, |
|
146 | 145 | "autoquant", |
147 | 146 | "_get_subclass_inserter", |
148 | 147 | "quantize_", |
149 | | - "int8_dynamic_activation_int4_weight", |
150 | | - "int8_dynamic_activation_int8_weight", |
151 | | - "int8_dynamic_activation_int8_semi_sparse_weight", |
152 | | - "int4_weight_only", |
153 | | - "int8_weight_only", |
154 | 148 | "intx_quantization_aware_training", |
155 | | - "float8_weight_only", |
156 | | - "uintx_weight_only", |
157 | | - "fpx_weight_only", |
158 | | - "gemlite_uintx_weight_only", |
159 | | - "float8_dynamic_activation_float8_weight", |
160 | | - "float8_static_activation_float8_weight", |
161 | 149 | "Int8DynActInt4WeightQuantizer", |
162 | 150 | "Float8DynamicActivationFloat8SemiSparseWeightConfig", |
163 | 151 | "ModuleFqnToConfig", |
@@ -464,7 +452,7 @@ def quantize_( |
464 | 452 | # Int8DynamicActivationInt8WeightConfig (optimized with int8 mm op and torch.compile) |
465 | 453 | # Int4WeightOnlyConfig (optimized with int4 tinygemm kernel and torch.compile) |
466 | 454 | # Int8WeightOnlyConfig (optimized with int8 mm op and torch.compile |
467 | | - from torchao.quantization.quant_api import int4_weight_only |
| 455 | + from torchao.quantization.quant_api import Int4WeightOnlyConfig |
468 | 456 |
|
469 | 457 | m = nn.Sequential(nn.Linear(32, 1024), nn.Linear(1024, 32)) |
470 | 458 | quantize_(m, Int4WeightOnlyConfig(group_size=32, version=1)) |
@@ -596,12 +584,6 @@ def __post_init__(self): |
596 | 584 | ) |
597 | 585 |
|
598 | 586 |
|
599 | | -# for BC |
600 | | -int8_dynamic_activation_int4_weight = _ConfigDeprecationWrapper( |
601 | | - "int8_dynamic_activation_int4_weight", Int8DynamicActivationInt4WeightConfig |
602 | | -) |
603 | | - |
604 | | - |
605 | 587 | @register_quantize_module_handler(Int8DynamicActivationInt4WeightConfig) |
606 | 588 | def _int8_dynamic_activation_int4_weight_transform( |
607 | 589 | module: torch.nn.Module, |
@@ -970,12 +952,6 @@ def __post_init__(self): |
970 | 952 | ) |
971 | 953 |
|
972 | 954 |
|
973 | | -# for bc |
974 | | -int4_dynamic_activation_int4_weight = _ConfigDeprecationWrapper( |
975 | | - "int4_dynamic_activation_int4_weight", Int4DynamicActivationInt4WeightConfig |
976 | | -) |
977 | | - |
978 | | - |
979 | 955 | @register_quantize_module_handler(Int4DynamicActivationInt4WeightConfig) |
980 | 956 | def _int4_dynamic_activation_int4_weight_transform( |
981 | 957 | module: torch.nn.Module, config: Int4DynamicActivationInt4WeightConfig |
@@ -1036,12 +1012,6 @@ def __post_init__(self): |
1036 | 1012 | ) |
1037 | 1013 |
|
1038 | 1014 |
|
1039 | | -# for BC |
1040 | | -gemlite_uintx_weight_only = _ConfigDeprecationWrapper( |
1041 | | - "gemlite_uintx_weight_only", GemliteUIntXWeightOnlyConfig |
1042 | | -) |
1043 | | - |
1044 | | - |
1045 | 1015 | @register_quantize_module_handler(GemliteUIntXWeightOnlyConfig) |
1046 | 1016 | def _gemlite_uintx_weight_only_transform( |
1047 | 1017 | module: torch.nn.Module, config: GemliteUIntXWeightOnlyConfig |
@@ -1119,11 +1089,6 @@ def __post_init__(self): |
1119 | 1089 | torch._C._log_api_usage_once("torchao.quantization.Int4WeightOnlyConfig") |
1120 | 1090 |
|
1121 | 1091 |
|
1122 | | -# for BC |
1123 | | -# TODO maybe change other callsites |
1124 | | -int4_weight_only = _ConfigDeprecationWrapper("int4_weight_only", Int4WeightOnlyConfig) |
1125 | | - |
1126 | | - |
1127 | 1092 | def _int4_weight_only_quantize_tensor(weight, config): |
1128 | 1093 | # TODO(future PR): perhaps move this logic to a different file, to keep the API |
1129 | 1094 | # file clean of implementation details |
@@ -1335,10 +1300,6 @@ def __post_init__(self): |
1335 | 1300 | torch._C._log_api_usage_once("torchao.quantization.Int8WeightOnlyConfig") |
1336 | 1301 |
|
1337 | 1302 |
|
1338 | | -# for BC |
1339 | | -int8_weight_only = _ConfigDeprecationWrapper("int8_weight_only", Int8WeightOnlyConfig) |
1340 | | - |
1341 | | - |
1342 | 1303 | def _int8_weight_only_quantize_tensor(weight, config): |
1343 | 1304 | mapping_type = MappingType.SYMMETRIC |
1344 | 1305 | target_dtype = torch.int8 |
@@ -1503,12 +1464,6 @@ def __post_init__(self): |
1503 | 1464 | ) |
1504 | 1465 |
|
1505 | 1466 |
|
1506 | | -# for BC |
1507 | | -int8_dynamic_activation_int8_weight = _ConfigDeprecationWrapper( |
1508 | | - "int8_dynamic_activation_int8_weight", Int8DynamicActivationInt8WeightConfig |
1509 | | -) |
1510 | | - |
1511 | | - |
1512 | 1467 | def _int8_dynamic_activation_int8_weight_quantize_tensor(weight, config): |
1513 | 1468 | layout = config.layout |
1514 | 1469 | act_mapping_type = config.act_mapping_type |
@@ -1614,12 +1569,6 @@ def __post_init__(self): |
1614 | 1569 | torch._C._log_api_usage_once("torchao.quantization.Float8WeightOnlyConfig") |
1615 | 1570 |
|
1616 | 1571 |
|
1617 | | -# for BC |
1618 | | -float8_weight_only = _ConfigDeprecationWrapper( |
1619 | | - "float8_weight_only", Float8WeightOnlyConfig |
1620 | | -) |
1621 | | - |
1622 | | - |
1623 | 1572 | def _float8_weight_only_quant_tensor(weight, config): |
1624 | 1573 | if config.version == 1: |
1625 | 1574 | warnings.warn( |
@@ -1797,12 +1746,6 @@ def __post_init__(self): |
1797 | 1746 | self.mm_config = Float8MMConfig(use_fast_accum=default_use_fast_accum) |
1798 | 1747 |
|
1799 | 1748 |
|
1800 | | -# for bc |
1801 | | -float8_dynamic_activation_float8_weight = _ConfigDeprecationWrapper( |
1802 | | - "float8_dynamic_activation_float8_weight", Float8DynamicActivationFloat8WeightConfig |
1803 | | -) |
1804 | | - |
1805 | | - |
1806 | 1749 | def _float8_dynamic_activation_float8_weight_quantize_tensor(weight, config): |
1807 | 1750 | activation_dtype = config.activation_dtype |
1808 | 1751 | weight_dtype = config.weight_dtype |
@@ -2005,12 +1948,6 @@ def __post_init__(self): |
2005 | 1948 | ) |
2006 | 1949 |
|
2007 | 1950 |
|
2008 | | -# for bc |
2009 | | -float8_static_activation_float8_weight = _ConfigDeprecationWrapper( |
2010 | | - "float8_static_activation_float8_weight", Float8StaticActivationFloat8WeightConfig |
2011 | | -) |
2012 | | - |
2013 | | - |
2014 | 1951 | @register_quantize_module_handler(Float8StaticActivationFloat8WeightConfig) |
2015 | 1952 | def _float8_static_activation_float8_weight_transform( |
2016 | 1953 | module: torch.nn.Module, config: Float8StaticActivationFloat8WeightConfig |
@@ -2096,12 +2033,6 @@ def __post_init__(self): |
2096 | 2033 | ) |
2097 | 2034 |
|
2098 | 2035 |
|
2099 | | -# for BC |
2100 | | -uintx_weight_only = _ConfigDeprecationWrapper( |
2101 | | - "uintx_weight_only", UIntXWeightOnlyConfig |
2102 | | -) |
2103 | | - |
2104 | | - |
2105 | 2036 | @register_quantize_module_handler(UIntXWeightOnlyConfig) |
2106 | 2037 | def _uintx_weight_only_transform( |
2107 | 2038 | module: torch.nn.Module, config: UIntXWeightOnlyConfig |
@@ -2383,10 +2314,6 @@ def __post_init__(self): |
2383 | 2314 | ) |
2384 | 2315 |
|
2385 | 2316 |
|
2386 | | -# for BC |
2387 | | -fpx_weight_only = _ConfigDeprecationWrapper("fpx_weight_only", FPXWeightOnlyConfig) |
2388 | | - |
2389 | | - |
2390 | 2317 | @register_quantize_module_handler(FPXWeightOnlyConfig) |
2391 | 2318 | def _fpx_weight_only_transform( |
2392 | 2319 | module: torch.nn.Module, config: FPXWeightOnlyConfig |
|
0 commit comments