Skip to content

Commit ba8563c

Browse files
authored
GPTQ add Arkiv link, move file location (#1100)
## Purpose ## * Better docstring for GPTQ * Reduce unnecessary file hierarchy Signed-off-by: Kyle Sayers <[email protected]>
1 parent a76563a commit ba8563c

File tree

3 files changed

+4
-5
lines changed

3 files changed

+4
-5
lines changed

src/llmcompressor/modifiers/quantization/gptq/base.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from llmcompressor.core import State
1717
from llmcompressor.modifiers import Modifier, ModifierFactory
1818
from llmcompressor.modifiers.quantization.calibration import freeze_module_quantization
19-
from llmcompressor.modifiers.quantization.gptq.utils.gptq_quantize import (
19+
from llmcompressor.modifiers.quantization.gptq.gptq_quantize import (
2020
accumulate_hessian,
2121
make_empty_hessian,
2222
quantize_weight,
@@ -36,7 +36,9 @@
3636

3737
class GPTQModifier(Modifier, HooksMixin):
3838
"""
39-
Modifier for applying the one-shot OBCQ algorithm to a model
39+
Implements the GPTQ algorithm from https://arxiv.org/abs/2210.17323. This modifier
40+
uses activations to calibrate a hessian matrix, which is then used to determine
41+
optimal quantizion values and orderings for the model weights.
4042
4143
| Sample yaml:
4244
| test_stage:

src/llmcompressor/modifiers/quantization/gptq/utils/__init__.py

-3
This file was deleted.

0 commit comments

Comments
 (0)