-
Notifications
You must be signed in to change notification settings - Fork 96
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* allow for SAM and AM support * adding tests
- Loading branch information
Sara Adkins
authored
Sep 3, 2024
1 parent
d32d287
commit fea3b2f
Showing
6 changed files
with
75 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
3 changes: 3 additions & 0 deletions
3
tests/llmcompressor/transformers/compression/run_compressed_configs/fp8_dynamic.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
cadence: "commit" | ||
test_type: "regression" | ||
model_stub: "nm-testing/tinyllama-fp8-dynamic-compressed" |
3 changes: 3 additions & 0 deletions
3
tests/llmcompressor/transformers/compression/run_compressed_configs/w4a16.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
cadence: "commit" | ||
test_type: "regression" | ||
model_stub: "nm-testing/tinyllama-w4a16-compressed" |
3 changes: 3 additions & 0 deletions
3
tests/llmcompressor/transformers/compression/run_compressed_configs/w8a16_dense.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
cadence: "commit" | ||
test_type: "regression" | ||
model_stub: "nm-testing/tinyllama-w8a16-dense" |
3 changes: 3 additions & 0 deletions
3
tests/llmcompressor/transformers/compression/run_compressed_configs/w8a8.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
cadence: "commit" | ||
test_type: "regression" | ||
model_stub: "nm-testing/tinyllama-w4a16-compressed" |
59 changes: 59 additions & 0 deletions
59
tests/llmcompressor/transformers/compression/test_run_compressed.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
import shutil | ||
import tempfile | ||
import unittest | ||
|
||
import torch | ||
from parameterized import parameterized_class | ||
from transformers import AutoTokenizer | ||
|
||
from llmcompressor.transformers import SparseAutoModelForCausalLM | ||
from tests.testing_utils import parse_params, requires_gpu, requires_torch | ||
|
||
CONFIG_DIR = "tests/llmcompressor/transformers/compression/run_compressed_configs" | ||
|
||
|
||
@requires_torch | ||
@requires_gpu | ||
@parameterized_class(parse_params(CONFIG_DIR)) | ||
class TestQuantizationMatches(unittest.TestCase): | ||
model_stub = None | ||
|
||
@classmethod | ||
def setUpClass(cls): | ||
cls.test_dir = tempfile.mkdtemp() | ||
|
||
cls.compressed_model = SparseAutoModelForCausalLM.from_pretrained( | ||
cls.model_stub, torch_dtype="auto", device_map="auto", run_compressed=True | ||
) | ||
cls.uncompressed_model = SparseAutoModelForCausalLM.from_pretrained( | ||
cls.model_stub, torch_dtype="auto", device_map="auto", run_compressed=False | ||
) | ||
cls.tokenizer = AutoTokenizer.from_pretrained(cls.model_stub) | ||
cls.device = cls.compressed_model.device | ||
|
||
def test_compressed_matches_uncompressed(self): | ||
SAMPLE_INPUT = [ | ||
"I love 4-bit quantization because", | ||
"What is the capital of Paris?", | ||
"def fibonacci(n):", | ||
] | ||
|
||
inputs = self.tokenizer(SAMPLE_INPUT, return_tensors="pt", padding=True).to( | ||
self.device | ||
) | ||
compressed_output = self.tokenizer.batch_decode( | ||
self.compressed_model.generate(**inputs, max_length=50) | ||
) | ||
uncompressed_output = self.tokenizer.batch_decode( | ||
self.uncompressed_model.generate(**inputs, max_length=50) | ||
) | ||
|
||
for idx in range(len(SAMPLE_INPUT)): | ||
assert compressed_output[idx] == uncompressed_output[idx] | ||
|
||
@classmethod | ||
def tearDownClass(cls): | ||
shutil.rmtree(cls.test_dir) | ||
del cls.compressed_model | ||
del cls.uncompressed_model | ||
torch.cuda.empty_cache() |