Skip to content

Commit

Permalink
apply style
Browse files Browse the repository at this point in the history
Signed-off-by: Kyle Sayers <[email protected]>
  • Loading branch information
kylesayrs committed Mar 10, 2025
1 parent fb6fef7 commit 4ba201b
Show file tree
Hide file tree
Showing 16 changed files with 95 additions and 28 deletions.
4 changes: 3 additions & 1 deletion tests/llmcompressor/entrypoints/test_oneshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
def test_oneshot_from_args():
# Select model and load it.
stub = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
model = AutoModelForCausalLM.from_pretrained(stub, use_safetensors=("stories" not in stub))
model = AutoModelForCausalLM.from_pretrained(
stub, use_safetensors=("stories" not in stub)
)
dataset = "HuggingFaceH4/ultrachat_200k"

NUM_CALIBRATION_SAMPLES = 512
Expand Down
5 changes: 4 additions & 1 deletion tests/llmcompressor/observers/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@ def _prep_for_input_quant_calibration(module: torch.nn.Module):


def test_get_observer_token_count():
model = AutoModelForCausalLM.from_pretrained("Isotonic/TinyMixtral-4x248M-MoE", use_safetensors=("stories" not in "Isotonic/TinyMixtral-4x248M-MoE"))
model = AutoModelForCausalLM.from_pretrained(
"Isotonic/TinyMixtral-4x248M-MoE",
use_safetensors=("stories" not in "Isotonic/TinyMixtral-4x248M-MoE"),
)
tokenizer = AutoTokenizer.from_pretrained("Isotonic/TinyMixtral-4x248M-MoE")
model.eval()
config = QuantizationConfig(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@ def setUpClass(cls):
cls.test_dir = tempfile.mkdtemp()

cls.model = AutoModelForCausalLM.from_pretrained(
cls.model_stub, torch_dtype=cls.weight_dtype, device_map="cuda:0", use_safetensors=("stories" not in cls.model_stub)
cls.model_stub,
torch_dtype=cls.weight_dtype,
device_map="cuda:0",
use_safetensors=("stories" not in cls.model_stub),
)
model = cls._run_oneshot(
cls.model,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,10 @@ def setUp(self):
self.output = "./oneshot_output"

self.model = AutoModelForCausalLM.from_pretrained(
self.model, device_map=self.device, torch_dtype=torch.bfloat16, use_safetensors=("stories" not in self.model)
self.model,
device_map=self.device,
torch_dtype=torch.bfloat16,
use_safetensors=("stories" not in self.model),
)

def test_oneshot_then_finetune_gpu(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,10 @@ def setUp(self):
self.output = "./finetune_output"

self.model = AutoModelForCausalLM.from_pretrained(
self.model, device_map=self.device, torch_dtype=torch.bfloat16, use_safetensors=("stories" not in self.model)
self.model,
device_map=self.device,
torch_dtype=torch.bfloat16,
use_safetensors=("stories" not in self.model),
)

def test_oneshot_then_finetune_gpu(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ def setUp(self):
def test_oneshot_sparsification_then_finetune(self):
recipe_str = "tests/llmcompressor/transformers/obcq/recipes/test_tiny2.yaml"
model = AutoModelForCausalLM.from_pretrained(
"Xenova/llama2.c-stories15M", device_map="auto", use_safetensors=("stories" not in "Xenova/llama2.c-stories15M")
"Xenova/llama2.c-stories15M",
device_map="auto",
use_safetensors=("stories" not in "Xenova/llama2.c-stories15M"),
)
dataset = "open_platypus"
concatenate_data = False
Expand Down Expand Up @@ -52,7 +54,9 @@ def test_oneshot_sparsification_then_finetune(self):
use_safetensors=("stories" not in str(self.output / "oneshot_out")),
)
distill_teacher = AutoModelForCausalLM.from_pretrained(
"Xenova/llama2.c-stories15M", device_map="auto", use_safetensors=("stories" not in "Xenova/llama2.c-stories15M")
"Xenova/llama2.c-stories15M",
device_map="auto",
use_safetensors=("stories" not in "Xenova/llama2.c-stories15M"),
)
dataset = "open_platypus"
concatenate_data = False
Expand All @@ -76,7 +80,10 @@ def test_oneshot_sparsification_then_finetune(self):
# with the saved model
# Explictly decompress the model for training using quantization_config
model = AutoModelForCausalLM.from_pretrained(
output_dir, device_map="auto", quantization_config=self.quantization_config, use_safetensors=("stories" not in str(output_dir))
output_dir,
device_map="auto",
quantization_config=self.quantization_config,
use_safetensors=("stories" not in str(output_dir)),
)
with create_session():
train(
Expand Down Expand Up @@ -145,7 +152,10 @@ def test_oneshot_quantization_then_finetune(self):

# test reloading checkpoint and final model
model = AutoModelForCausalLM.from_pretrained(
output_dir, device_map="auto", quantization_config=quantization_config, use_safetensors=("stories" not in str(output_dir))
output_dir,
device_map="auto",
quantization_config=quantization_config,
use_safetensors=("stories" not in str(output_dir)),
)
with create_session():
train(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ def __init__(
@pytest.mark.unit
def test_mixin_init():
model_state_path = "Xenova/llama2.c-stories15M"
model = AutoModelForCausalLM.from_pretrained(model_state_path, use_safetensors=("stories" not in model_state_path))
model = AutoModelForCausalLM.from_pretrained(
model_state_path, use_safetensors=("stories" not in model_state_path)
)
recipe = "tests/llmcompressor/transformers/finetune/test_quantization.yaml"

session_mixin = MixInTest(model=model, recipe=recipe)
Expand All @@ -46,7 +48,9 @@ def test_mixin_init():
@pytest.fixture
def mixin_trainer():
model_state_path = "Xenova/llama2.c-stories15M"
model = AutoModelForCausalLM.from_pretrained(model_state_path, use_safetensors=("stories" not in model_state_path))
model = AutoModelForCausalLM.from_pretrained(
model_state_path, use_safetensors=("stories" not in model_state_path)
)
recipe = "tests/llmcompressor/transformers/finetune/test_quantization.yaml"
train_dataset = "open-platypus"

Expand Down
4 changes: 3 additions & 1 deletion tests/llmcompressor/transformers/gptq/test_oneshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,9 @@ def test_oneshot_application(self):
num_calibration_samples=9,
)
model_loaded = AutoModelForCausalLM.from_pretrained(
self.output, device_map=self.device, use_safetensors=("stories" not in self.output)
self.output,
device_map=self.device,
use_safetensors=("stories" not in self.output),
)

# Check that the model is quantized
Expand Down
12 changes: 9 additions & 3 deletions tests/llmcompressor/transformers/kv_cache/test_kv_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,9 @@ def test_kv_cache_model_state_dict_attr(oneshot_fixture, tmp_path):
model, used_args = next(oneshot_fixture(tmp_path))
output_dir = used_args["output_dir"]
with init_empty_weights():
model = AutoModelForCausalLM.from_pretrained(str(output_dir), use_safetensors=("stories" not in str(output_dir)))
model = AutoModelForCausalLM.from_pretrained(
str(output_dir), use_safetensors=("stories" not in str(output_dir))
)

counts = 0
for name, submodule in iter_named_quantizable_modules(
Expand Down Expand Up @@ -196,7 +198,9 @@ def test_kv_cache_gptq_config_format(kv_cache_fixture, tmp_path):
assert kv_cache_scheme["symmetric"] == used_args["symmetric"]

with init_empty_weights():
model = AutoModelForCausalLM.from_pretrained(output_dir, use_safetensors=("stories" not in output_dir))
model = AutoModelForCausalLM.from_pretrained(
output_dir, use_safetensors=("stories" not in output_dir)
)

counts = 0
for name, submodule in iter_named_quantizable_modules(
Expand Down Expand Up @@ -238,7 +242,9 @@ def test_kv_cache_gptq_model_state_dict_attr(kv_cache_fixture, tmp_path):
output_dir, _ = next(kv_cache_fixture(recipe, tmp_path))

with init_empty_weights():
model = AutoModelForCausalLM.from_pretrained(output_dir, use_safetensors=("stories" not in output_dir))
model = AutoModelForCausalLM.from_pretrained(
output_dir, use_safetensors=("stories" not in output_dir)
)

counts = 0
for name, submodule in iter_named_quantizable_modules(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,10 @@ def setUp(self):

self.model_name = self.model
self.model = AutoModelForCausalLM.from_pretrained(
self.model, device_map=self.device, torch_dtype=torch.bfloat16, use_safetensors=("stories" not in self.model)
self.model,
device_map=self.device,
torch_dtype=torch.bfloat16,
use_safetensors=("stories" not in self.model),
)

def test_oneshot_completion_gpu(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@
def test_infer_targets():
modifier = SparseGPTModifier(sparsity=0.0)
with init_empty_weights():
model = AutoModelForCausalLM.from_pretrained("Xenova/llama2.c-stories15M", use_safetensors=("stories" not in "Xenova/llama2.c-stories15M"))
model = AutoModelForCausalLM.from_pretrained(
"Xenova/llama2.c-stories15M",
use_safetensors=("stories" not in "Xenova/llama2.c-stories15M"),
)

inferred = modifier._infer_sequential_targets(model)
assert inferred == ["LlamaDecoderLayer"]
4 changes: 3 additions & 1 deletion tests/llmcompressor/transformers/obcq/test_obcq_lm_head.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ def setUp(self):
self.device = "cuda:0" if torch.cuda.is_available() else "cpu"

self.model = AutoModelForCausalLM.from_pretrained(
"Xenova/llama2.c-stories15M", device_map=self.device, use_safetensors=("stories" not in "Xenova/llama2.c-stories15M")
"Xenova/llama2.c-stories15M",
device_map=self.device,
use_safetensors=("stories" not in "Xenova/llama2.c-stories15M"),
)

self.kwargs = {
Expand Down
5 changes: 4 additions & 1 deletion tests/llmcompressor/transformers/obcq/test_obcq_owl.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@ def test_infer_owl_layer_sparsity():
modifier = SparseGPTModifier(
sparsity=0.7, sparsity_profile="owl", owl_m=5, owl_lmbda=0.05
)
model = AutoModelForCausalLM.from_pretrained("Xenova/llama2.c-stories15M", use_safetensors=("stories" not in "Xenova/llama2.c-stories15M"))
model = AutoModelForCausalLM.from_pretrained(
"Xenova/llama2.c-stories15M",
use_safetensors=("stories" not in "Xenova/llama2.c-stories15M"),
)

dataset = Dataset.from_dict(
{"input_ids": torch.randint(0, vocab_size, (ds_size, seq_len))}
Expand Down
5 changes: 4 additions & 1 deletion tests/llmcompressor/transformers/obcq/test_obcq_sparsity.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,10 @@ def setUp(self):
self.output = "./oneshot_output"

self.model = AutoModelForCausalLM.from_pretrained(
self.model, device_map=self.device, torch_dtype=torch.bfloat16, use_safetensors=("stories" not in self.model)
self.model,
device_map=self.device,
torch_dtype=torch.bfloat16,
use_safetensors=("stories" not in self.model),
)

def test_sparsities_gpu(self):
Expand Down
4 changes: 3 additions & 1 deletion tests/llmcompressor/transformers/oneshot/test_api_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ def setUp(self):
from transformers import AutoModelForCausalLM, AutoTokenizer

self.tokenizer = AutoTokenizer.from_pretrained(self.model)
self.model = AutoModelForCausalLM.from_pretrained(self.model, use_safetensors=("stories" not in self.model))
self.model = AutoModelForCausalLM.from_pretrained(
self.model, use_safetensors=("stories" not in self.model)
)
self.output = "./oneshot_output"
self.kwargs = {"dataset_config_name": self.dataset_config_name}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,9 @@ def test_sparse_model_reload(compressed, config, dtype, tmp_path):
transformers_logger.setLevel(level=logging.ERROR)

model = AutoModelForCausalLM.from_pretrained(
tmp_path / "oneshot_out", torch_dtype=dtype, use_safetensors=("stories" not in str(tmp_path / "oneshot_out"))
tmp_path / "oneshot_out",
torch_dtype=dtype,
use_safetensors=("stories" not in str(tmp_path / "oneshot_out")),
)

# restore transformers logging level now that model shell is loaded
Expand Down Expand Up @@ -113,7 +115,9 @@ def test_sparse_model_reload(compressed, config, dtype, tmp_path):
assert sparsity_config["sparsity_structure"] == inferred_structure

dense_model = AutoModelForCausalLM.from_pretrained(
tmp_path / "compress_out", torch_dtype="auto", use_safetensors=("stories" not in str(tmp_path / "compress_out"))
tmp_path / "compress_out",
torch_dtype="auto",
use_safetensors=("stories" not in str(tmp_path / "compress_out")),
)

og_state_dict = model.state_dict()
Expand All @@ -136,7 +140,9 @@ def test_dense_model_save(tmp_path, skip_compression_stats, save_compressed):
reset_session()

model_path = "Xenova/llama2.c-stories15M"
model = AutoModelForCausalLM.from_pretrained(model_path, use_safetensors=("stories" not in model_path))
model = AutoModelForCausalLM.from_pretrained(
model_path, use_safetensors=("stories" not in model_path)
)

inferred_global_sparsity = SparsityConfigMetadata.infer_global_sparsity(model)
assert math.isclose(inferred_global_sparsity, 0.0, rel_tol=1e-3)
Expand Down Expand Up @@ -272,7 +278,10 @@ def test_model_reload(offload, torch_dtype, tie_word_embeddings, device_map, tmp
model.save_pretrained(save_path, safe_serialization=True)

reloaded = AutoModelForCausalLM.from_pretrained(
save_path, torch_dtype="auto", device_map="cpu", use_safetensors=("stories" not in str(save_path))
save_path,
torch_dtype="auto",
device_map="cpu",
use_safetensors=("stories" not in str(save_path)),
)

model_dict = get_state_dict_offloaded_model(model)
Expand Down Expand Up @@ -385,7 +394,9 @@ def test_compressor_stacking(model_stub, recipe, sparse_format, quant_format, tm
concatenate_data = False
num_calibration_samples = 64
splits = {"calibration": "train[:10%]"}
empty_model = AutoModelForCausalLM.from_pretrained(model_stub, torch_dtype="auto", use_safetensors=("stories" not in model_stub))
empty_model = AutoModelForCausalLM.from_pretrained(
model_stub, torch_dtype="auto", use_safetensors=("stories" not in model_stub)
)

oneshot(
model=model_stub,
Expand Down Expand Up @@ -470,7 +481,9 @@ def test_sparse_24_compressor_is_lossless(model_stub, recipe, sparse_format, tmp
concatenate_data = False
num_calibration_samples = 64
splits = {"calibration": "train[:10%]"}
empty_model = AutoModelForCausalLM.from_pretrained(model_stub, torch_dtype="auto", use_safetensors=("stories" not in model_stub))
empty_model = AutoModelForCausalLM.from_pretrained(
model_stub, torch_dtype="auto", use_safetensors=("stories" not in model_stub)
)

oneshot(
model=model_stub,
Expand Down Expand Up @@ -525,7 +538,9 @@ def test_sparse_24_compressor_is_lossless(model_stub, recipe, sparse_format, tmp
def test_disable_sparse_compression_flag(tmp_path):
two_four_sparse_model_id = "nm-testing/llama2.c-stories42M-pruned2.4"
two_four_sparse_model = AutoModelForCausalLM.from_pretrained(
two_four_sparse_model_id, torch_dtype="auto", use_safetensors=("stories" not in two_four_sparse_model_id)
two_four_sparse_model_id,
torch_dtype="auto",
use_safetensors=("stories" not in two_four_sparse_model_id),
)
modify_save_pretrained(two_four_sparse_model)

Expand Down

0 comments on commit 4ba201b

Please sign in to comment.