Skip to content

Commit

Permalink
replace model stub
Browse files Browse the repository at this point in the history
Signed-off-by: Kyle Sayers <[email protected]>
  • Loading branch information
kylesayrs committed Mar 10, 2025
1 parent 2a59554 commit bacea26
Show file tree
Hide file tree
Showing 28 changed files with 36 additions and 36 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cadence: "commit"
test_type: "regression"
model_stub: "Xenova/llama2.c-stories15M"
model_stub: "nm-testing/llama2.c-stories15M"
new_recipe: "tests/llmcompressor/transformers/compression/recipes/new_quant_channel.yaml"
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cadence: "commit"
test_type: "regression"
model_stub: "Xenova/llama2.c-stories15M"
model_stub: "nm-testing/llama2.c-stories15M"
new_recipe: "tests/llmcompressor/transformers/compression/recipes/new_quant_fp8.yaml"
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cadence: "commit"
test_type: "regression"
model_stub: "Xenova/llama2.c-stories15M"
model_stub: "nm-testing/llama2.c-stories15M"
new_recipe: "tests/llmcompressor/transformers/compression/recipes/new_quant_full.yaml"
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cadence: "commit"
test_type: "regression"
model_stub: "Xenova/llama2.c-stories15M"
model_stub: "nm-testing/llama2.c-stories15M"
new_recipe: "tests/llmcompressor/transformers/compression/recipes/new_quant_weight.yaml"
2 changes: 1 addition & 1 deletion tests/llmcompressor/transformers/finetune/data/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

@pytest.fixture
def tiny_llama_path():
return "Xenova/llama2.c-stories15M"
return "nm-testing/llama2.c-stories15M"


@pytest.fixture
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
cadence: "commit"
test_type: "sanity"
model: "Xenova/llama2.c-stories15M"
model: "nm-testing/llama2.c-stories15M"
file_extension: json
num_train_epochs: 1
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
cadence: "commit"
test_type: "sanity"
model: "Xenova/llama2.c-stories15M"
model: "nm-testing/llama2.c-stories15M"
file_extension: csv
num_train_epochs: 1
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cadence: "nightly"
test_type: "regression"
model: "Xenova/llama2.c-stories15M"
model: "nm-testing/llama2.c-stories15M"
dataset: open_platypus
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cadence: "commit"
test_type: "sanity"
model: "Xenova/llama2.c-stories15M"
model: "nm-testing/llama2.c-stories15M"
dataset: wikitext
dataset_config_name: "wikitext-2-raw-v1"
recipe: "tests/llmcompressor/transformers/finetune/test_alternate_recipe.yaml"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def tearDown(self):
@pytest.mark.integration
@parameterized_class(parse_params(CONFIGS_DIRECTORY))
class TestOneshotCustomDatasetSmall(TestFinetuneNoRecipeCustomDataset):
model = None # "Xenova/llama2.c-stories15M"
model = None # "nm-testing/llama2.c-stories15M"
file_extension = None # ["json", "csv"]
num_train_epochs = None

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def setUp(self):
def test_oneshot_sparsification_then_finetune(self):
recipe_str = "tests/llmcompressor/transformers/obcq/recipes/test_tiny2.yaml"
model = AutoModelForCausalLM.from_pretrained(
"Xenova/llama2.c-stories15M", device_map="auto"
"nm-testing/llama2.c-stories15M", device_map="auto"
)
dataset = "open_platypus"
concatenate_data = False
Expand Down Expand Up @@ -51,7 +51,7 @@ def test_oneshot_sparsification_then_finetune(self):
quantization_config=self.quantization_config,
)
distill_teacher = AutoModelForCausalLM.from_pretrained(
"Xenova/llama2.c-stories15M", device_map="auto"
"nm-testing/llama2.c-stories15M", device_map="auto"
)
dataset = "open_platypus"
concatenate_data = False
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def __init__(

@pytest.mark.unit
def test_mixin_init():
model_state_path = "Xenova/llama2.c-stories15M"
model_state_path = "nm-testing/llama2.c-stories15M"
model = AutoModelForCausalLM.from_pretrained(model_state_path)
recipe = "tests/llmcompressor/transformers/finetune/test_quantization.yaml"

Expand All @@ -45,7 +45,7 @@ def test_mixin_init():

@pytest.fixture
def mixin_trainer():
model_state_path = "Xenova/llama2.c-stories15M"
model_state_path = "nm-testing/llama2.c-stories15M"
model = AutoModelForCausalLM.from_pretrained(model_state_path)
recipe = "tests/llmcompressor/transformers/finetune/test_quantization.yaml"
train_dataset = "open-platypus"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cadence: "nightly"
test_type: "sanity"
model: "Xenova/llama2.c-stories15M"
model: "nm-testing/llama2.c-stories15M"
dataset: open_platypus
recipe: "tests/llmcompressor/transformers/obcq/recipes/quant.yaml"
num_samples: 32
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cadence: "nightly"
test_type: "sanity"
model: "Xenova/llama2.c-stories15M"
model: "nm-testing/llama2.c-stories15M"
dataset: open_platypus
recipe: "tests/llmcompressor/transformers/obcq/recipes/quant_and_sparse.yaml"
num_samples: 32
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cadence: "commit"
test_type: "sanity"
model: "Xenova/llama2.c-stories15M"
model: "nm-testing/llama2.c-stories15M"
dataset: open_platypus
initial_pruning_only_recipe: "tests/llmcompressor/transformers/obcq/recipes/sparse_with_mask_structure.yaml"
initial_sparsity: 0.5
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cadence: "commit"
test_type: "sanity"
model: "Xenova/llama2.c-stories15M"
model: "nm-testing/llama2.c-stories15M"
dataset: open_platypus
first_recipe: "tests/llmcompressor/transformers/obcq/recipes/quant_and_sparse.yaml"
second_recipe: "tests/llmcompressor/transformers/obcq/recipes/additional_sparsity.yaml"
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cadence: "commit"
test_type: "sanity"
model: "Xenova/llama2.c-stories15M"
model: "nm-testing/llama2.c-stories15M"
dataset: open_platypus
recipe: "tests/llmcompressor/transformers/obcq/recipes/sparse.yaml"
sparsity: 0.3
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
def test_infer_targets():
modifier = SparseGPTModifier(sparsity=0.0)
with init_empty_weights():
model = AutoModelForCausalLM.from_pretrained("Xenova/llama2.c-stories15M")
model = AutoModelForCausalLM.from_pretrained("nm-testing/llama2.c-stories15M")

inferred = modifier._infer_sequential_targets(model)
assert inferred == ["LlamaDecoderLayer"]
2 changes: 1 addition & 1 deletion tests/llmcompressor/transformers/obcq/test_obcq_lm_head.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def setUp(self):
self.device = "cuda:0" if torch.cuda.is_available() else "cpu"

self.model = AutoModelForCausalLM.from_pretrained(
"Xenova/llama2.c-stories15M", device_map=self.device
"nm-testing/llama2.c-stories15M", device_map=self.device
)

self.kwargs = {
Expand Down
2 changes: 1 addition & 1 deletion tests/llmcompressor/transformers/obcq/test_obcq_owl.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def test_infer_owl_layer_sparsity():
modifier = SparseGPTModifier(
sparsity=0.7, sparsity_profile="owl", owl_m=5, owl_lmbda=0.05
)
model = AutoModelForCausalLM.from_pretrained("Xenova/llama2.c-stories15M")
model = AutoModelForCausalLM.from_pretrained("nm-testing/llama2.c-stories15M")

dataset = Dataset.from_dict(
{"input_ids": torch.randint(0, vocab_size, (ds_size, seq_len))}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cadence: "commit"
test_type: "smoke"
tokenize: False
model: "Xenova/llama2.c-stories15M"
model: "nm-testing/llama2.c-stories15M"
dataset: open_platypus
recipe: |
test_stage:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cadence: "commit"
test_type: "smoke"
tokenize: False
model: "Xenova/llama2.c-stories15M"
model: "nm-testing/llama2.c-stories15M"
dataset: open_platypus
recipe: "tests/llmcompressor/transformers/oneshot/oneshot_configs/recipes/recipe.yaml"
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cadence: "commit"
test_type: "smoke"
tokenize: False
model: "Xenova/llama2.c-stories15M"
model: "nm-testing/llama2.c-stories15M"
dataset: "gsm8k"
dataset_config_name: "main"
recipe: "tests/llmcompressor/transformers/oneshot/oneshot_configs/recipes/recipe.yaml"
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cadence: "commit"
test_type: "smoke"
tokenize: False
model: "Xenova/llama2.c-stories15M"
model: "nm-testing/llama2.c-stories15M"
dataset: "gsm8k"
dataset_config_name: "main"
recipe: |
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cadence: "commit"
test_type: "smoke"
tokenize: True
model: "Xenova/llama2.c-stories15M"
model: "nm-testing/llama2.c-stories15M"
dataset: open_platypus
recipe: "tests/llmcompressor/transformers/oneshot/oneshot_configs/recipes/recipe.yaml"
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cadence: "commit"
test_type: "smoke"
tokenize: True
model: "Xenova/llama2.c-stories15M"
model: "nm-testing/llama2.c-stories15M"
dataset: "gsm8k"
recipe: "tests/llmcompressor/transformers/oneshot/oneshot_configs/recipes/recipe.yaml"
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
def test_sparse_model_reload(compressed, config, dtype, tmp_path):
recipe_str = "tests/llmcompressor/transformers/obcq/recipes/test_tiny2.yaml"
expected_sparsity = 0.5
model_path = "Xenova/llama2.c-stories15M"
model_path = "nm-testing/llama2.c-stories15M"
device = "cuda:0"
if not torch.cuda.is_available():
device = "cpu"
Expand Down Expand Up @@ -135,7 +135,7 @@ def test_sparse_model_reload(compressed, config, dtype, tmp_path):
def test_dense_model_save(tmp_path, skip_compression_stats, save_compressed):
reset_session()

model_path = "Xenova/llama2.c-stories15M"
model_path = "nm-testing/llama2.c-stories15M"
model = AutoModelForCausalLM.from_pretrained(model_path)

inferred_global_sparsity = SparsityConfigMetadata.infer_global_sparsity(model)
Expand Down Expand Up @@ -170,7 +170,7 @@ def test_quant_model_reload(format, dtype, tmp_path):
recipe_str = (
"tests/llmcompressor/transformers/compression/recipes/new_quant_simple.yaml"
)
model_path = "Xenova/llama2.c-stories15M"
model_path = "nm-testing/llama2.c-stories15M"
device = "cuda:0"
if not torch.cuda.is_available():
device = "cpu"
Expand Down Expand Up @@ -253,7 +253,7 @@ def test_quant_model_reload(format, dtype, tmp_path):
],
)
def test_model_reload(offload, torch_dtype, tie_word_embeddings, device_map, tmp_path):
model_path = "Xenova/llama2.c-stories15M"
model_path = "nm-testing/llama2.c-stories15M"
save_path = tmp_path / "save_path"

model = AutoModelForCausalLM.from_pretrained(
Expand Down Expand Up @@ -313,7 +313,7 @@ def test_model_shared_tensors(
):
# load model
model = AutoModelForCausalLM.from_pretrained(
"Xenova/llama2.c-stories15M",
"nm-testing/llama2.c-stories15M",
torch_dtype=torch_dtype,
tie_word_embeddings=tie_word_embeddings,
device_map=device_map,
Expand Down Expand Up @@ -365,7 +365,7 @@ def test_model_shared_tensors_gpu(
"model_stub, recipe, sparse_format, quant_format",
[
(
"Xenova/llama2.c-stories15M",
"nm-testing/llama2.c-stories15M",
"tests/llmcompressor/transformers/compression/recipes/sparse_24_fp8.yaml",
CompressionFormat.sparse_24_bitmask.value,
CompressionFormat.float_quantized.value,
Expand Down Expand Up @@ -451,7 +451,7 @@ def test_compressor_stacking(model_stub, recipe, sparse_format, quant_format, tm
"model_stub, recipe, sparse_format",
[
(
"Xenova/llama2.c-stories15M",
"nm-testing/llama2.c-stories15M",
"tests/llmcompressor/transformers/compression/recipes/sparse_24.yaml",
CompressionFormat.sparse_24_bitmask.value,
),
Expand Down
2 changes: 1 addition & 1 deletion tests/llmcompressor/transformers/test_clear_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
@pytest.mark.skipif(not is_clearml, reason="clearML not installed")
def test_finetune_wout_recipe(tmp_path: Path):
recipe_str = None
model = "Xenova/llama2.c-stories15M"
model = "nm-testing/llama2.c-stories15M"
device = "cuda:0"
if not torch.cuda.is_available():
device = "cpu"
Expand Down

0 comments on commit bacea26

Please sign in to comment.