diff --git a/tests/e2e/vLLM/configs/kv_cache_gptq_tinyllama.yaml b/tests/e2e/vLLM/configs/kv_cache_gptq_tinyllama.yaml index 6dd112d3c..5a5bbc537 100644 --- a/tests/e2e/vLLM/configs/kv_cache_gptq_tinyllama.yaml +++ b/tests/e2e/vLLM/configs/kv_cache_gptq_tinyllama.yaml @@ -4,4 +4,4 @@ model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 recipe: tests/e2e/vLLM/recipes/kv_cache/gptq.yaml dataset_id: HuggingFaceH4/ultrachat_200k dataset_split: train_sft -scheme: kv_cache_default_tinyllama \ No newline at end of file +scheme: kv_cache_default_gptq_tinyllama \ No newline at end of file diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py index 5ea0be43c..6632ca633 100644 --- a/tests/e2e/vLLM/test_vllm.py +++ b/tests/e2e/vLLM/test_vllm.py @@ -182,7 +182,7 @@ def tear_down(self): p.mkdir(parents=True, exist_ok=True) df = pd.DataFrame(measurements) - df.to_csv(p / f"{self.save_dir}.csv") + df.to_csv(p / f"{self.save_dir}.csv", index=False) @log_time def _save_compressed_model(self, oneshot_model, tokenizer):