Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace Xenova model stub with nm-testing model stub #1239

Merged
merged 1 commit into from
Mar 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cadence: "commit"
test_type: "regression"
model_stub: "Xenova/llama2.c-stories15M"
model_stub: "nm-testing/llama2.c-stories15M"
new_recipe: "tests/llmcompressor/transformers/compression/recipes/new_quant_channel.yaml"
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cadence: "commit"
test_type: "regression"
model_stub: "Xenova/llama2.c-stories15M"
model_stub: "nm-testing/llama2.c-stories15M"
new_recipe: "tests/llmcompressor/transformers/compression/recipes/new_quant_fp8.yaml"
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cadence: "commit"
test_type: "regression"
model_stub: "Xenova/llama2.c-stories15M"
model_stub: "nm-testing/llama2.c-stories15M"
new_recipe: "tests/llmcompressor/transformers/compression/recipes/new_quant_full.yaml"
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cadence: "commit"
test_type: "regression"
model_stub: "Xenova/llama2.c-stories15M"
model_stub: "nm-testing/llama2.c-stories15M"
new_recipe: "tests/llmcompressor/transformers/compression/recipes/new_quant_weight.yaml"
2 changes: 1 addition & 1 deletion tests/llmcompressor/transformers/finetune/data/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

@pytest.fixture
def tiny_llama_path():
return "Xenova/llama2.c-stories15M"
return "nm-testing/llama2.c-stories15M"


@pytest.fixture
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
cadence: "commit"
test_type: "sanity"
model: "Xenova/llama2.c-stories15M"
model: "nm-testing/llama2.c-stories15M"
file_extension: json
num_train_epochs: 1
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
cadence: "commit"
test_type: "sanity"
model: "Xenova/llama2.c-stories15M"
model: "nm-testing/llama2.c-stories15M"
file_extension: csv
num_train_epochs: 1
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cadence: "nightly"
test_type: "regression"
model: "Xenova/llama2.c-stories15M"
model: "nm-testing/llama2.c-stories15M"
dataset: open_platypus
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cadence: "commit"
test_type: "sanity"
model: "Xenova/llama2.c-stories15M"
model: "nm-testing/llama2.c-stories15M"
dataset: wikitext
dataset_config_name: "wikitext-2-raw-v1"
recipe: "tests/llmcompressor/transformers/finetune/test_alternate_recipe.yaml"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def tearDown(self):
@pytest.mark.integration
@parameterized_class(parse_params(CONFIGS_DIRECTORY))
class TestOneshotCustomDatasetSmall(TestFinetuneNoRecipeCustomDataset):
model = None # "Xenova/llama2.c-stories15M"
model = None # "nm-testing/llama2.c-stories15M"
file_extension = None # ["json", "csv"]
num_train_epochs = None

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def setUp(self):
def test_oneshot_sparsification_then_finetune(self):
recipe_str = "tests/llmcompressor/transformers/obcq/recipes/test_tiny2.yaml"
model = AutoModelForCausalLM.from_pretrained(
"Xenova/llama2.c-stories15M", device_map="auto"
"nm-testing/llama2.c-stories15M", device_map="auto"
)
dataset = "open_platypus"
concatenate_data = False
Expand Down Expand Up @@ -51,7 +51,7 @@ def test_oneshot_sparsification_then_finetune(self):
quantization_config=self.quantization_config,
)
distill_teacher = AutoModelForCausalLM.from_pretrained(
"Xenova/llama2.c-stories15M", device_map="auto"
"nm-testing/llama2.c-stories15M", device_map="auto"
)
dataset = "open_platypus"
concatenate_data = False
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def __init__(

@pytest.mark.unit
def test_mixin_init():
model_state_path = "Xenova/llama2.c-stories15M"
model_state_path = "nm-testing/llama2.c-stories15M"
model = AutoModelForCausalLM.from_pretrained(model_state_path)
recipe = "tests/llmcompressor/transformers/finetune/test_quantization.yaml"

Expand All @@ -45,7 +45,7 @@ def test_mixin_init():

@pytest.fixture
def mixin_trainer():
model_state_path = "Xenova/llama2.c-stories15M"
model_state_path = "nm-testing/llama2.c-stories15M"
model = AutoModelForCausalLM.from_pretrained(model_state_path)
recipe = "tests/llmcompressor/transformers/finetune/test_quantization.yaml"
train_dataset = "open-platypus"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cadence: "nightly"
test_type: "sanity"
model: "Xenova/llama2.c-stories15M"
model: "nm-testing/llama2.c-stories15M"
dataset: open_platypus
recipe: "tests/llmcompressor/transformers/obcq/recipes/quant.yaml"
num_samples: 32
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cadence: "nightly"
test_type: "sanity"
model: "Xenova/llama2.c-stories15M"
model: "nm-testing/llama2.c-stories15M"
dataset: open_platypus
recipe: "tests/llmcompressor/transformers/obcq/recipes/quant_and_sparse.yaml"
num_samples: 32
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cadence: "commit"
test_type: "sanity"
model: "Xenova/llama2.c-stories15M"
model: "nm-testing/llama2.c-stories15M"
dataset: open_platypus
initial_pruning_only_recipe: "tests/llmcompressor/transformers/obcq/recipes/sparse_with_mask_structure.yaml"
initial_sparsity: 0.5
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cadence: "commit"
test_type: "sanity"
model: "Xenova/llama2.c-stories15M"
model: "nm-testing/llama2.c-stories15M"
dataset: open_platypus
first_recipe: "tests/llmcompressor/transformers/obcq/recipes/quant_and_sparse.yaml"
second_recipe: "tests/llmcompressor/transformers/obcq/recipes/additional_sparsity.yaml"
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cadence: "commit"
test_type: "sanity"
model: "Xenova/llama2.c-stories15M"
model: "nm-testing/llama2.c-stories15M"
dataset: open_platypus
recipe: "tests/llmcompressor/transformers/obcq/recipes/sparse.yaml"
sparsity: 0.3
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
def test_infer_targets():
modifier = SparseGPTModifier(sparsity=0.0)
with init_empty_weights():
model = AutoModelForCausalLM.from_pretrained("Xenova/llama2.c-stories15M")
model = AutoModelForCausalLM.from_pretrained("nm-testing/llama2.c-stories15M")

inferred = modifier._infer_sequential_targets(model)
assert inferred == ["LlamaDecoderLayer"]
2 changes: 1 addition & 1 deletion tests/llmcompressor/transformers/obcq/test_obcq_lm_head.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def setUp(self):
self.device = "cuda:0" if torch.cuda.is_available() else "cpu"

self.model = AutoModelForCausalLM.from_pretrained(
"Xenova/llama2.c-stories15M", device_map=self.device
"nm-testing/llama2.c-stories15M", device_map=self.device
)

self.kwargs = {
Expand Down
2 changes: 1 addition & 1 deletion tests/llmcompressor/transformers/obcq/test_obcq_owl.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def test_infer_owl_layer_sparsity():
modifier = SparseGPTModifier(
sparsity=0.7, sparsity_profile="owl", owl_m=5, owl_lmbda=0.05
)
model = AutoModelForCausalLM.from_pretrained("Xenova/llama2.c-stories15M")
model = AutoModelForCausalLM.from_pretrained("nm-testing/llama2.c-stories15M")

dataset = Dataset.from_dict(
{"input_ids": torch.randint(0, vocab_size, (ds_size, seq_len))}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cadence: "commit"
test_type: "smoke"
tokenize: False
model: "Xenova/llama2.c-stories15M"
model: "nm-testing/llama2.c-stories15M"
dataset: open_platypus
recipe: |
test_stage:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cadence: "commit"
test_type: "smoke"
tokenize: False
model: "Xenova/llama2.c-stories15M"
model: "nm-testing/llama2.c-stories15M"
dataset: open_platypus
recipe: "tests/llmcompressor/transformers/oneshot/oneshot_configs/recipes/recipe.yaml"
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cadence: "commit"
test_type: "smoke"
tokenize: False
model: "Xenova/llama2.c-stories15M"
model: "nm-testing/llama2.c-stories15M"
dataset: "gsm8k"
dataset_config_name: "main"
recipe: "tests/llmcompressor/transformers/oneshot/oneshot_configs/recipes/recipe.yaml"
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cadence: "commit"
test_type: "smoke"
tokenize: False
model: "Xenova/llama2.c-stories15M"
model: "nm-testing/llama2.c-stories15M"
dataset: "gsm8k"
dataset_config_name: "main"
recipe: |
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cadence: "commit"
test_type: "smoke"
tokenize: True
model: "Xenova/llama2.c-stories15M"
model: "nm-testing/llama2.c-stories15M"
dataset: open_platypus
recipe: "tests/llmcompressor/transformers/oneshot/oneshot_configs/recipes/recipe.yaml"
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cadence: "commit"
test_type: "smoke"
tokenize: True
model: "Xenova/llama2.c-stories15M"
model: "nm-testing/llama2.c-stories15M"
dataset: "gsm8k"
recipe: "tests/llmcompressor/transformers/oneshot/oneshot_configs/recipes/recipe.yaml"
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
def test_sparse_model_reload(compressed, config, dtype, tmp_path):
recipe_str = "tests/llmcompressor/transformers/obcq/recipes/test_tiny2.yaml"
expected_sparsity = 0.5
model_path = "Xenova/llama2.c-stories15M"
model_path = "nm-testing/llama2.c-stories15M"
device = "cuda:0"
if not torch.cuda.is_available():
device = "cpu"
Expand Down Expand Up @@ -135,7 +135,7 @@ def test_sparse_model_reload(compressed, config, dtype, tmp_path):
def test_dense_model_save(tmp_path, skip_compression_stats, save_compressed):
reset_session()

model_path = "Xenova/llama2.c-stories15M"
model_path = "nm-testing/llama2.c-stories15M"
model = AutoModelForCausalLM.from_pretrained(model_path)

inferred_global_sparsity = SparsityConfigMetadata.infer_global_sparsity(model)
Expand Down Expand Up @@ -170,7 +170,7 @@ def test_quant_model_reload(format, dtype, tmp_path):
recipe_str = (
"tests/llmcompressor/transformers/compression/recipes/new_quant_simple.yaml"
)
model_path = "Xenova/llama2.c-stories15M"
model_path = "nm-testing/llama2.c-stories15M"
device = "cuda:0"
if not torch.cuda.is_available():
device = "cpu"
Expand Down Expand Up @@ -253,7 +253,7 @@ def test_quant_model_reload(format, dtype, tmp_path):
],
)
def test_model_reload(offload, torch_dtype, tie_word_embeddings, device_map, tmp_path):
model_path = "Xenova/llama2.c-stories15M"
model_path = "nm-testing/llama2.c-stories15M"
save_path = tmp_path / "save_path"

model = AutoModelForCausalLM.from_pretrained(
Expand Down Expand Up @@ -313,7 +313,7 @@ def test_model_shared_tensors(
):
# load model
model = AutoModelForCausalLM.from_pretrained(
"Xenova/llama2.c-stories15M",
"nm-testing/llama2.c-stories15M",
torch_dtype=torch_dtype,
tie_word_embeddings=tie_word_embeddings,
device_map=device_map,
Expand Down Expand Up @@ -365,7 +365,7 @@ def test_model_shared_tensors_gpu(
"model_stub, recipe, sparse_format, quant_format",
[
(
"Xenova/llama2.c-stories15M",
"nm-testing/llama2.c-stories15M",
"tests/llmcompressor/transformers/compression/recipes/sparse_24_fp8.yaml",
CompressionFormat.sparse_24_bitmask.value,
CompressionFormat.float_quantized.value,
Expand Down Expand Up @@ -451,7 +451,7 @@ def test_compressor_stacking(model_stub, recipe, sparse_format, quant_format, tm
"model_stub, recipe, sparse_format",
[
(
"Xenova/llama2.c-stories15M",
"nm-testing/llama2.c-stories15M",
"tests/llmcompressor/transformers/compression/recipes/sparse_24.yaml",
CompressionFormat.sparse_24_bitmask.value,
),
Expand Down
2 changes: 1 addition & 1 deletion tests/llmcompressor/transformers/test_clear_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
@pytest.mark.skipif(not is_clearml, reason="clearML not installed")
def test_finetune_wout_recipe(tmp_path: Path):
recipe_str = None
model = "Xenova/llama2.c-stories15M"
model = "nm-testing/llama2.c-stories15M"
device = "cuda:0"
if not torch.cuda.is_available():
device = "cpu"
Expand Down