diff --git a/tests/llmcompressor/transformers/compression/configs/channelwise_15m.yaml b/tests/llmcompressor/transformers/compression/configs/channelwise_15m.yaml index 628521890..8e86f6560 100644 --- a/tests/llmcompressor/transformers/compression/configs/channelwise_15m.yaml +++ b/tests/llmcompressor/transformers/compression/configs/channelwise_15m.yaml @@ -1,4 +1,4 @@ cadence: "commit" test_type: "regression" -model_stub: "Xenova/llama2.c-stories15M" +model_stub: "nm-testing/llama2.c-stories15M" new_recipe: "tests/llmcompressor/transformers/compression/recipes/new_quant_channel.yaml" \ No newline at end of file diff --git a/tests/llmcompressor/transformers/compression/configs/fp8_15m.yaml b/tests/llmcompressor/transformers/compression/configs/fp8_15m.yaml index 6837be14e..b452b5bab 100644 --- a/tests/llmcompressor/transformers/compression/configs/fp8_15m.yaml +++ b/tests/llmcompressor/transformers/compression/configs/fp8_15m.yaml @@ -1,4 +1,4 @@ cadence: "commit" test_type: "regression" -model_stub: "Xenova/llama2.c-stories15M" +model_stub: "nm-testing/llama2.c-stories15M" new_recipe: "tests/llmcompressor/transformers/compression/recipes/new_quant_fp8.yaml" \ No newline at end of file diff --git a/tests/llmcompressor/transformers/compression/configs/inputs_15m.yaml b/tests/llmcompressor/transformers/compression/configs/inputs_15m.yaml index ca3c1286b..e95b0638a 100644 --- a/tests/llmcompressor/transformers/compression/configs/inputs_15m.yaml +++ b/tests/llmcompressor/transformers/compression/configs/inputs_15m.yaml @@ -1,4 +1,4 @@ cadence: "commit" test_type: "regression" -model_stub: "Xenova/llama2.c-stories15M" +model_stub: "nm-testing/llama2.c-stories15M" new_recipe: "tests/llmcompressor/transformers/compression/recipes/new_quant_full.yaml" \ No newline at end of file diff --git a/tests/llmcompressor/transformers/compression/configs/weights_only_15m.yaml b/tests/llmcompressor/transformers/compression/configs/weights_only_15m.yaml index d7aa73f58..62242b92e 100644 --- a/tests/llmcompressor/transformers/compression/configs/weights_only_15m.yaml +++ b/tests/llmcompressor/transformers/compression/configs/weights_only_15m.yaml @@ -1,4 +1,4 @@ cadence: "commit" test_type: "regression" -model_stub: "Xenova/llama2.c-stories15M" +model_stub: "nm-testing/llama2.c-stories15M" new_recipe: "tests/llmcompressor/transformers/compression/recipes/new_quant_weight.yaml" \ No newline at end of file diff --git a/tests/llmcompressor/transformers/finetune/data/conftest.py b/tests/llmcompressor/transformers/finetune/data/conftest.py index aa2f056bc..8599eec0f 100644 --- a/tests/llmcompressor/transformers/finetune/data/conftest.py +++ b/tests/llmcompressor/transformers/finetune/data/conftest.py @@ -6,7 +6,7 @@ @pytest.fixture def tiny_llama_path(): - return "Xenova/llama2.c-stories15M" + return "nm-testing/llama2.c-stories15M" @pytest.fixture diff --git a/tests/llmcompressor/transformers/finetune/finetune_custom/config1.yaml b/tests/llmcompressor/transformers/finetune/finetune_custom/config1.yaml index 5f16c5aaa..08363b5af 100644 --- a/tests/llmcompressor/transformers/finetune/finetune_custom/config1.yaml +++ b/tests/llmcompressor/transformers/finetune/finetune_custom/config1.yaml @@ -1,5 +1,5 @@ cadence: "commit" test_type: "sanity" -model: "Xenova/llama2.c-stories15M" +model: "nm-testing/llama2.c-stories15M" file_extension: json num_train_epochs: 1 \ No newline at end of file diff --git a/tests/llmcompressor/transformers/finetune/finetune_custom/config2.yaml b/tests/llmcompressor/transformers/finetune/finetune_custom/config2.yaml index 8c20b7d08..f59aac673 100644 --- a/tests/llmcompressor/transformers/finetune/finetune_custom/config2.yaml +++ b/tests/llmcompressor/transformers/finetune/finetune_custom/config2.yaml @@ -1,5 +1,5 @@ cadence: "commit" test_type: "sanity" -model: "Xenova/llama2.c-stories15M" +model: "nm-testing/llama2.c-stories15M" file_extension: csv num_train_epochs: 1 \ No newline at end of file diff --git a/tests/llmcompressor/transformers/finetune/finetune_generic/config1.yaml b/tests/llmcompressor/transformers/finetune/finetune_generic/config1.yaml index 06ebca429..6b7968a82 100644 --- a/tests/llmcompressor/transformers/finetune/finetune_generic/config1.yaml +++ b/tests/llmcompressor/transformers/finetune/finetune_generic/config1.yaml @@ -1,4 +1,4 @@ cadence: "nightly" test_type: "regression" -model: "Xenova/llama2.c-stories15M" +model: "nm-testing/llama2.c-stories15M" dataset: open_platypus \ No newline at end of file diff --git a/tests/llmcompressor/transformers/finetune/finetune_oneshot_configs/config.yaml b/tests/llmcompressor/transformers/finetune/finetune_oneshot_configs/config.yaml index 30b4658cb..4068a3471 100644 --- a/tests/llmcompressor/transformers/finetune/finetune_oneshot_configs/config.yaml +++ b/tests/llmcompressor/transformers/finetune/finetune_oneshot_configs/config.yaml @@ -1,6 +1,6 @@ cadence: "commit" test_type: "sanity" -model: "Xenova/llama2.c-stories15M" +model: "nm-testing/llama2.c-stories15M" dataset: wikitext dataset_config_name: "wikitext-2-raw-v1" recipe: "tests/llmcompressor/transformers/finetune/test_alternate_recipe.yaml" diff --git a/tests/llmcompressor/transformers/finetune/test_finetune_no_recipe_custom_dataset.py b/tests/llmcompressor/transformers/finetune/test_finetune_no_recipe_custom_dataset.py index 37524069c..758669d5e 100644 --- a/tests/llmcompressor/transformers/finetune/test_finetune_no_recipe_custom_dataset.py +++ b/tests/llmcompressor/transformers/finetune/test_finetune_no_recipe_custom_dataset.py @@ -113,7 +113,7 @@ def tearDown(self): @pytest.mark.integration @parameterized_class(parse_params(CONFIGS_DIRECTORY)) class TestOneshotCustomDatasetSmall(TestFinetuneNoRecipeCustomDataset): - model = None # "Xenova/llama2.c-stories15M" + model = None # "nm-testing/llama2.c-stories15M" file_extension = None # ["json", "csv"] num_train_epochs = None diff --git a/tests/llmcompressor/transformers/finetune/test_oneshot_then_finetune.py b/tests/llmcompressor/transformers/finetune/test_oneshot_then_finetune.py index e8e0ae426..be116f794 100644 --- a/tests/llmcompressor/transformers/finetune/test_oneshot_then_finetune.py +++ b/tests/llmcompressor/transformers/finetune/test_oneshot_then_finetune.py @@ -21,7 +21,7 @@ def setUp(self): def test_oneshot_sparsification_then_finetune(self): recipe_str = "tests/llmcompressor/transformers/obcq/recipes/test_tiny2.yaml" model = AutoModelForCausalLM.from_pretrained( - "Xenova/llama2.c-stories15M", device_map="auto" + "nm-testing/llama2.c-stories15M", device_map="auto" ) dataset = "open_platypus" concatenate_data = False @@ -51,7 +51,7 @@ def test_oneshot_sparsification_then_finetune(self): quantization_config=self.quantization_config, ) distill_teacher = AutoModelForCausalLM.from_pretrained( - "Xenova/llama2.c-stories15M", device_map="auto" + "nm-testing/llama2.c-stories15M", device_map="auto" ) dataset = "open_platypus" concatenate_data = False diff --git a/tests/llmcompressor/transformers/finetune/test_session_mixin.py b/tests/llmcompressor/transformers/finetune/test_session_mixin.py index 65e5140bf..3bd647581 100644 --- a/tests/llmcompressor/transformers/finetune/test_session_mixin.py +++ b/tests/llmcompressor/transformers/finetune/test_session_mixin.py @@ -32,7 +32,7 @@ def __init__( @pytest.mark.unit def test_mixin_init(): - model_state_path = "Xenova/llama2.c-stories15M" + model_state_path = "nm-testing/llama2.c-stories15M" model = AutoModelForCausalLM.from_pretrained(model_state_path) recipe = "tests/llmcompressor/transformers/finetune/test_quantization.yaml" @@ -45,7 +45,7 @@ def test_mixin_init(): @pytest.fixture def mixin_trainer(): - model_state_path = "Xenova/llama2.c-stories15M" + model_state_path = "nm-testing/llama2.c-stories15M" model = AutoModelForCausalLM.from_pretrained(model_state_path) recipe = "tests/llmcompressor/transformers/finetune/test_quantization.yaml" train_dataset = "open-platypus" diff --git a/tests/llmcompressor/transformers/obcq/obcq_configs/completion/tiny_llama_quant.yaml b/tests/llmcompressor/transformers/obcq/obcq_configs/completion/tiny_llama_quant.yaml index 2c0dc19ca..b2d3bc7d9 100644 --- a/tests/llmcompressor/transformers/obcq/obcq_configs/completion/tiny_llama_quant.yaml +++ b/tests/llmcompressor/transformers/obcq/obcq_configs/completion/tiny_llama_quant.yaml @@ -1,6 +1,6 @@ cadence: "nightly" test_type: "sanity" -model: "Xenova/llama2.c-stories15M" +model: "nm-testing/llama2.c-stories15M" dataset: open_platypus recipe: "tests/llmcompressor/transformers/obcq/recipes/quant.yaml" num_samples: 32 diff --git a/tests/llmcompressor/transformers/obcq/obcq_configs/completion/tiny_llama_quant_and_sparse.yaml b/tests/llmcompressor/transformers/obcq/obcq_configs/completion/tiny_llama_quant_and_sparse.yaml index f121a0d8f..2c7e3d8a5 100644 --- a/tests/llmcompressor/transformers/obcq/obcq_configs/completion/tiny_llama_quant_and_sparse.yaml +++ b/tests/llmcompressor/transformers/obcq/obcq_configs/completion/tiny_llama_quant_and_sparse.yaml @@ -1,6 +1,6 @@ cadence: "nightly" test_type: "sanity" -model: "Xenova/llama2.c-stories15M" +model: "nm-testing/llama2.c-stories15M" dataset: open_platypus recipe: "tests/llmcompressor/transformers/obcq/recipes/quant_and_sparse.yaml" num_samples: 32 diff --git a/tests/llmcompressor/transformers/obcq/obcq_configs/consec_runs/mask_structure/tiny_llama_mask_structure_preservation.yaml b/tests/llmcompressor/transformers/obcq/obcq_configs/consec_runs/mask_structure/tiny_llama_mask_structure_preservation.yaml index 6f099fc23..2973bf6e5 100644 --- a/tests/llmcompressor/transformers/obcq/obcq_configs/consec_runs/mask_structure/tiny_llama_mask_structure_preservation.yaml +++ b/tests/llmcompressor/transformers/obcq/obcq_configs/consec_runs/mask_structure/tiny_llama_mask_structure_preservation.yaml @@ -1,6 +1,6 @@ cadence: "commit" test_type: "sanity" -model: "Xenova/llama2.c-stories15M" +model: "nm-testing/llama2.c-stories15M" dataset: open_platypus initial_pruning_only_recipe: "tests/llmcompressor/transformers/obcq/recipes/sparse_with_mask_structure.yaml" initial_sparsity: 0.5 diff --git a/tests/llmcompressor/transformers/obcq/obcq_configs/consec_runs/tiny_llama_consec_runs.yaml b/tests/llmcompressor/transformers/obcq/obcq_configs/consec_runs/tiny_llama_consec_runs.yaml index fffc60c6f..998e22b6f 100644 --- a/tests/llmcompressor/transformers/obcq/obcq_configs/consec_runs/tiny_llama_consec_runs.yaml +++ b/tests/llmcompressor/transformers/obcq/obcq_configs/consec_runs/tiny_llama_consec_runs.yaml @@ -1,6 +1,6 @@ cadence: "commit" test_type: "sanity" -model: "Xenova/llama2.c-stories15M" +model: "nm-testing/llama2.c-stories15M" dataset: open_platypus first_recipe: "tests/llmcompressor/transformers/obcq/recipes/quant_and_sparse.yaml" second_recipe: "tests/llmcompressor/transformers/obcq/recipes/additional_sparsity.yaml" \ No newline at end of file diff --git a/tests/llmcompressor/transformers/obcq/obcq_configs/sparse/tiny_llama_sparse.yaml b/tests/llmcompressor/transformers/obcq/obcq_configs/sparse/tiny_llama_sparse.yaml index dd72e141c..3d8be0edc 100644 --- a/tests/llmcompressor/transformers/obcq/obcq_configs/sparse/tiny_llama_sparse.yaml +++ b/tests/llmcompressor/transformers/obcq/obcq_configs/sparse/tiny_llama_sparse.yaml @@ -1,6 +1,6 @@ cadence: "commit" test_type: "sanity" -model: "Xenova/llama2.c-stories15M" +model: "nm-testing/llama2.c-stories15M" dataset: open_platypus recipe: "tests/llmcompressor/transformers/obcq/recipes/sparse.yaml" sparsity: 0.3 \ No newline at end of file diff --git a/tests/llmcompressor/transformers/obcq/test_obcq_infer_targets.py b/tests/llmcompressor/transformers/obcq/test_obcq_infer_targets.py index fef5ebc37..abe74da19 100644 --- a/tests/llmcompressor/transformers/obcq/test_obcq_infer_targets.py +++ b/tests/llmcompressor/transformers/obcq/test_obcq_infer_targets.py @@ -9,7 +9,7 @@ def test_infer_targets(): modifier = SparseGPTModifier(sparsity=0.0) with init_empty_weights(): - model = AutoModelForCausalLM.from_pretrained("Xenova/llama2.c-stories15M") + model = AutoModelForCausalLM.from_pretrained("nm-testing/llama2.c-stories15M") inferred = modifier._infer_sequential_targets(model) assert inferred == ["LlamaDecoderLayer"] diff --git a/tests/llmcompressor/transformers/obcq/test_obcq_lm_head.py b/tests/llmcompressor/transformers/obcq/test_obcq_lm_head.py index 4ddf36a51..f87cae28d 100644 --- a/tests/llmcompressor/transformers/obcq/test_obcq_lm_head.py +++ b/tests/llmcompressor/transformers/obcq/test_obcq_lm_head.py @@ -16,7 +16,7 @@ def setUp(self): self.device = "cuda:0" if torch.cuda.is_available() else "cpu" self.model = AutoModelForCausalLM.from_pretrained( - "Xenova/llama2.c-stories15M", device_map=self.device + "nm-testing/llama2.c-stories15M", device_map=self.device ) self.kwargs = { diff --git a/tests/llmcompressor/transformers/obcq/test_obcq_owl.py b/tests/llmcompressor/transformers/obcq/test_obcq_owl.py index 17effeb7a..cdaeca22a 100644 --- a/tests/llmcompressor/transformers/obcq/test_obcq_owl.py +++ b/tests/llmcompressor/transformers/obcq/test_obcq_owl.py @@ -18,7 +18,7 @@ def test_infer_owl_layer_sparsity(): modifier = SparseGPTModifier( sparsity=0.7, sparsity_profile="owl", owl_m=5, owl_lmbda=0.05 ) - model = AutoModelForCausalLM.from_pretrained("Xenova/llama2.c-stories15M") + model = AutoModelForCausalLM.from_pretrained("nm-testing/llama2.c-stories15M") dataset = Dataset.from_dict( {"input_ids": torch.randint(0, vocab_size, (ds_size, seq_len))} diff --git a/tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_stories_conf1.yaml b/tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_stories_conf1.yaml index 7b795ba8e..0bc0ebebe 100644 --- a/tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_stories_conf1.yaml +++ b/tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_stories_conf1.yaml @@ -1,7 +1,7 @@ cadence: "commit" test_type: "smoke" tokenize: False -model: "Xenova/llama2.c-stories15M" +model: "nm-testing/llama2.c-stories15M" dataset: open_platypus recipe: | test_stage: diff --git a/tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_stories_conf2.yaml b/tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_stories_conf2.yaml index de5b20616..4398cc807 100644 --- a/tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_stories_conf2.yaml +++ b/tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_stories_conf2.yaml @@ -1,6 +1,6 @@ cadence: "commit" test_type: "smoke" tokenize: False -model: "Xenova/llama2.c-stories15M" +model: "nm-testing/llama2.c-stories15M" dataset: open_platypus recipe: "tests/llmcompressor/transformers/oneshot/oneshot_configs/recipes/recipe.yaml" \ No newline at end of file diff --git a/tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_stories_conf3.yaml b/tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_stories_conf3.yaml index f9a148a40..c2bce4797 100644 --- a/tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_stories_conf3.yaml +++ b/tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_stories_conf3.yaml @@ -1,7 +1,7 @@ cadence: "commit" test_type: "smoke" tokenize: False -model: "Xenova/llama2.c-stories15M" +model: "nm-testing/llama2.c-stories15M" dataset: "gsm8k" dataset_config_name: "main" recipe: "tests/llmcompressor/transformers/oneshot/oneshot_configs/recipes/recipe.yaml" \ No newline at end of file diff --git a/tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_stories_conf4.yaml b/tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_stories_conf4.yaml index 712413a31..883f1695d 100644 --- a/tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_stories_conf4.yaml +++ b/tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_stories_conf4.yaml @@ -1,7 +1,7 @@ cadence: "commit" test_type: "smoke" tokenize: False -model: "Xenova/llama2.c-stories15M" +model: "nm-testing/llama2.c-stories15M" dataset: "gsm8k" dataset_config_name: "main" recipe: | diff --git a/tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_stories_conf5.yaml b/tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_stories_conf5.yaml index 0190dda27..2ccfc1b6e 100644 --- a/tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_stories_conf5.yaml +++ b/tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_stories_conf5.yaml @@ -1,6 +1,6 @@ cadence: "commit" test_type: "smoke" tokenize: True -model: "Xenova/llama2.c-stories15M" +model: "nm-testing/llama2.c-stories15M" dataset: open_platypus recipe: "tests/llmcompressor/transformers/oneshot/oneshot_configs/recipes/recipe.yaml" \ No newline at end of file diff --git a/tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_stories_conf6.yaml b/tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_stories_conf6.yaml index 502b2e2a0..a51485bf2 100644 --- a/tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_stories_conf6.yaml +++ b/tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_stories_conf6.yaml @@ -1,6 +1,6 @@ cadence: "commit" test_type: "smoke" tokenize: True -model: "Xenova/llama2.c-stories15M" +model: "nm-testing/llama2.c-stories15M" dataset: "gsm8k" recipe: "tests/llmcompressor/transformers/oneshot/oneshot_configs/recipes/recipe.yaml" \ No newline at end of file diff --git a/tests/llmcompressor/transformers/sparsification/test_compress_tensor_utils.py b/tests/llmcompressor/transformers/sparsification/test_compress_tensor_utils.py index d8eced287..fde56f433 100644 --- a/tests/llmcompressor/transformers/sparsification/test_compress_tensor_utils.py +++ b/tests/llmcompressor/transformers/sparsification/test_compress_tensor_utils.py @@ -45,7 +45,7 @@ def test_sparse_model_reload(compressed, config, dtype, tmp_path): recipe_str = "tests/llmcompressor/transformers/obcq/recipes/test_tiny2.yaml" expected_sparsity = 0.5 - model_path = "Xenova/llama2.c-stories15M" + model_path = "nm-testing/llama2.c-stories15M" device = "cuda:0" if not torch.cuda.is_available(): device = "cpu" @@ -135,7 +135,7 @@ def test_sparse_model_reload(compressed, config, dtype, tmp_path): def test_dense_model_save(tmp_path, skip_compression_stats, save_compressed): reset_session() - model_path = "Xenova/llama2.c-stories15M" + model_path = "nm-testing/llama2.c-stories15M" model = AutoModelForCausalLM.from_pretrained(model_path) inferred_global_sparsity = SparsityConfigMetadata.infer_global_sparsity(model) @@ -170,7 +170,7 @@ def test_quant_model_reload(format, dtype, tmp_path): recipe_str = ( "tests/llmcompressor/transformers/compression/recipes/new_quant_simple.yaml" ) - model_path = "Xenova/llama2.c-stories15M" + model_path = "nm-testing/llama2.c-stories15M" device = "cuda:0" if not torch.cuda.is_available(): device = "cpu" @@ -253,7 +253,7 @@ def test_quant_model_reload(format, dtype, tmp_path): ], ) def test_model_reload(offload, torch_dtype, tie_word_embeddings, device_map, tmp_path): - model_path = "Xenova/llama2.c-stories15M" + model_path = "nm-testing/llama2.c-stories15M" save_path = tmp_path / "save_path" model = AutoModelForCausalLM.from_pretrained( @@ -313,7 +313,7 @@ def test_model_shared_tensors( ): # load model model = AutoModelForCausalLM.from_pretrained( - "Xenova/llama2.c-stories15M", + "nm-testing/llama2.c-stories15M", torch_dtype=torch_dtype, tie_word_embeddings=tie_word_embeddings, device_map=device_map, @@ -365,7 +365,7 @@ def test_model_shared_tensors_gpu( "model_stub, recipe, sparse_format, quant_format", [ ( - "Xenova/llama2.c-stories15M", + "nm-testing/llama2.c-stories15M", "tests/llmcompressor/transformers/compression/recipes/sparse_24_fp8.yaml", CompressionFormat.sparse_24_bitmask.value, CompressionFormat.float_quantized.value, @@ -451,7 +451,7 @@ def test_compressor_stacking(model_stub, recipe, sparse_format, quant_format, tm "model_stub, recipe, sparse_format", [ ( - "Xenova/llama2.c-stories15M", + "nm-testing/llama2.c-stories15M", "tests/llmcompressor/transformers/compression/recipes/sparse_24.yaml", CompressionFormat.sparse_24_bitmask.value, ), diff --git a/tests/llmcompressor/transformers/test_clear_ml.py b/tests/llmcompressor/transformers/test_clear_ml.py index 4a7922a66..e65215c67 100644 --- a/tests/llmcompressor/transformers/test_clear_ml.py +++ b/tests/llmcompressor/transformers/test_clear_ml.py @@ -16,7 +16,7 @@ @pytest.mark.skipif(not is_clearml, reason="clearML not installed") def test_finetune_wout_recipe(tmp_path: Path): recipe_str = None - model = "Xenova/llama2.c-stories15M" + model = "nm-testing/llama2.c-stories15M" device = "cuda:0" if not torch.cuda.is_available(): device = "cpu"