Skip to content

Commit

Permalink
Fix default for sequential updates (#186)
Browse files Browse the repository at this point in the history
* fix default

* update exmple

* update docstring

---------

Co-authored-by: Kyle Sayers <[email protected]>
  • Loading branch information
dsikka and Kyle Sayers authored Sep 19, 2024
1 parent 77f377b commit 3fb4212
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
2 changes: 1 addition & 1 deletion examples/quantization_w4a16/llama3_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

# Select model and load it.
MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"

model = SparseAutoModelForCausalLM.from_pretrained(
MODEL_ID,
device_map="auto",
Expand Down Expand Up @@ -54,7 +55,6 @@ def tokenize(sample):

# Configure the quantization algorithm to run.
# * quantize the weights to 4 bit with GPTQ with a group size 128
# Note: to reduce GPU memory use `sequential_update=False`
recipe = GPTQModifier(targets="Linear", scheme="W4A16", ignore=["lm_head"])

# Apply algorithms.
Expand Down
6 changes: 3 additions & 3 deletions src/llmcompressor/modifiers/quantization/gptq/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class GPTQModifier(Modifier):
| test_stage:
| obcq_modifiers:
| GPTQModifier:
| sequential_update: True
| sequential_update: true
| dampening_frac: 0.001
| block_size: 128
| config_groups:
Expand All @@ -63,7 +63,7 @@ class GPTQModifier(Modifier):
:param sequential_update: Whether or not to update weights sequentially by layer,
True saves on GPU memory
True saves on GPU memory, default is True
:param targets: list of layer names to compress during GPTQ, or '__ALL__'
to compress every layer in the model
:param block_size: Used to determine number of columns to compress in one pass
Expand Down Expand Up @@ -93,7 +93,7 @@ class GPTQModifier(Modifier):
and activation 8 bit quantization on the Linear layers.
"""

sequential_update: Optional[bool] = False
sequential_update: Optional[bool] = True
targets: Union[str, List[str], None] = None
sequential_targets: Union[str, List[str], None] = None
block_size: int = 128
Expand Down

0 comments on commit 3fb4212

Please sign in to comment.