diff --git a/tests/integration/defs/conftest.py b/tests/integration/defs/conftest.py index 333941b57cf..0962c24b636 100644 --- a/tests/integration/defs/conftest.py +++ b/tests/integration/defs/conftest.py @@ -15,6 +15,7 @@ # -*- coding: utf-8 -*- import datetime +import gc import os import platform import re @@ -2562,4 +2563,5 @@ def torch_empty_cache() -> None: Manually empty the torch CUDA cache before each test, to reduce risk of OOM errors. """ if torch.cuda.is_available(): + gc.collect() torch.cuda.empty_cache() diff --git a/tests/integration/defs/test_e2e.py b/tests/integration/defs/test_e2e.py index 3defa73aac2..ad4f3e6a621 100644 --- a/tests/integration/defs/test_e2e.py +++ b/tests/integration/defs/test_e2e.py @@ -2262,7 +2262,7 @@ def test_ptp_quickstart_advanced_deepseek_r1_w4afp8_8gpus( @pytest.mark.skip_less_device_memory(80000) @pytest.mark.parametrize("model_name,model_path,gpu_count", [ - ("Llama3.1-70B-BF16", "llama-3.1-model/Meta-Llama-3.1-70B", 2), + ("Llama3.1-70B-BF16", "llama-3.1-model/Meta-Llama-3.1-70B", 8), ("Mixtral-8x7B-BF16", "Mixtral-8x7B-v0.1", 8), pytest.param('Llama3.1-70B-FP8', 'llama-3.1-model/Llama-3.1-70B-Instruct-FP8', @@ -2293,7 +2293,7 @@ def test_ptp_quickstart_advanced_multi_gpus(llm_root, llm_venv, model_name, pytest.skip(f"Not enough GPUs for {model_name}") example_root = Path(os.path.join(llm_root, "examples", "llm-api")) mapping = { - "Llama3.1-70B-BF16": 91.0, + "Llama3.1-70B-BF16": 24.6, "Mixtral-8x7B-BF16": 16.5, "Llama3.1-70B-FP8": 58.5, "Llama3.1-405B-FP8": 63.2, diff --git a/tests/integration/test_lists/qa/llm_function_core.txt b/tests/integration/test_lists/qa/llm_function_core.txt index 0d3985d4fde..d851cae5432 100644 --- a/tests/integration/test_lists/qa/llm_function_core.txt +++ b/tests/integration/test_lists/qa/llm_function_core.txt @@ -634,7 +634,7 @@ test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-NVFP4-nvfp4-quantized/Meta test_e2e.py::test_ptp_quickstart_advanced[Llama3.2-11B-BF16-llama-3.2-models/Llama-3.2-11B-Vision] test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B-Qwen3/Qwen3-30B-A3B] test_e2e.py::test_ptp_quickstart_advanced_ngram[Llama-3.1-8B-Instruct-llama-3.1-model/Llama-3.1-8B-Instruct] -test_e2e.py::test_ptp_quickstart_advanced_multi_gpus[Llama3.1-70B-BF16-llama-3.1-model/Meta-Llama-3.1-70B-2] +test_e2e.py::test_ptp_quickstart_advanced_multi_gpus[Llama3.1-70B-BF16-llama-3.1-model/Meta-Llama-3.1-70B-8] test_e2e.py::test_ptp_quickstart_advanced_multi_gpus[Llama3.1-70B-FP8-llama-3.1-model/Llama-3.1-70B-Instruct-FP8-2] test_e2e.py::test_ptp_quickstart_advanced_multi_gpus[Llama3.1-405B-FP8-llama-3.1-model/Llama-3.1-405B-Instruct-FP8-8] test_e2e.py::test_ptp_quickstart_advanced_multi_gpus[Mixtral-8x7B-BF16-Mixtral-8x7B-v0.1-8] diff --git a/tests/integration/test_lists/qa/llm_function_core_sanity.txt b/tests/integration/test_lists/qa/llm_function_core_sanity.txt index 4ad8bea44c1..fd8b9cc979f 100644 --- a/tests/integration/test_lists/qa/llm_function_core_sanity.txt +++ b/tests/integration/test_lists/qa/llm_function_core_sanity.txt @@ -210,7 +210,7 @@ test_e2e.py::test_openai_consistent_chat test_e2e.py::test_openai_multi_chat_example test_e2e.py::test_ptp_quickstart test_e2e.py::test_ptp_quickstart_advanced_multi_gpus[Llama3.1-405B-FP8-llama-3.1-model/Llama-3.1-405B-Instruct-FP8-8] -test_e2e.py::test_ptp_quickstart_advanced_multi_gpus[Llama3.1-70B-BF16-llama-3.1-model/Meta-Llama-3.1-70B-2] +test_e2e.py::test_ptp_quickstart_advanced_multi_gpus[Llama3.1-70B-BF16-llama-3.1-model/Meta-Llama-3.1-70B-8] test_e2e.py::test_ptp_quickstart_advanced_multi_gpus[Llama3.1-70B-FP8-llama-3.1-model/Llama-3.1-70B-Instruct-FP8-2] test_e2e.py::test_ptp_quickstart_advanced_multi_gpus[Mixtral-8x7B-BF16-Mixtral-8x7B-v0.1-8] test_e2e.py::test_ptp_quickstart_advanced_multi_gpus[Mixtral-8x7B-NVFP4-nvfp4-quantized/Mixtral-8x7B-Instruct-v0.1-8] diff --git a/tests/integration/test_lists/qa/llm_function_nim.txt b/tests/integration/test_lists/qa/llm_function_nim.txt index 1e538f25a59..4e3812ddd06 100644 --- a/tests/integration/test_lists/qa/llm_function_nim.txt +++ b/tests/integration/test_lists/qa/llm_function_nim.txt @@ -408,7 +408,7 @@ test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-BF16-llama-3.1-model/Meta- test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-FP8-llama-3.1-model/Llama-3.1-8B-Instruct-FP8] test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-NVFP4-nvfp4-quantized/Meta-Llama-3.1-8B] test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B-Qwen3/Qwen3-30B-A3B] -test_e2e.py::test_ptp_quickstart_advanced_multi_gpus[Llama3.1-70B-BF16-llama-3.1-model/Meta-Llama-3.1-70B-2] +test_e2e.py::test_ptp_quickstart_advanced_multi_gpus[Llama3.1-70B-BF16-llama-3.1-model/Meta-Llama-3.1-70B-8] test_e2e.py::test_ptp_quickstart_advanced_multi_gpus[Llama3.1-70B-FP8-llama-3.1-model/Llama-3.1-70B-Instruct-FP8-2] test_e2e.py::test_ptp_quickstart_advanced_multi_gpus[Llama3.1-405B-FP8-llama-3.1-model/Llama-3.1-405B-Instruct-FP8-8] test_e2e.py::test_ptp_quickstart_advanced_multi_gpus[Mixtral-8x7B-BF16-Mixtral-8x7B-v0.1-8] diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index 4e2bb48655d..6286626159b 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -313,7 +313,6 @@ full:H100/accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8 full:H100/accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8[tp8-cuda_graph=True] SKIP (https://nvbugs/5512734) full:H100/accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8_eagle3[tp8-torch_compile=True] SKIP (https://nvbugs/5483534) full:A100/test_e2e.py::test_ptp_quickstart_multimodal[NVILA-8B-FP16-vila/NVILA-8B-video-False] SKIP (https://nvbugs/5453725) -test_e2e.py::test_ptp_scaffolding[DeepSeek-R1-Distill-Qwen-7B-DeepSeek-R1/DeepSeek-R1-Distill-Qwen-7B] SKIP (https://nvbugs/5517260) test_e2e.py::test_ptp_quickstart_multimodal[NVILA-8B-FP16-vila/NVILA-8B-image-False] SKIP (https://nvbugs/5509024) test_e2e.py::test_ptp_quickstart_multimodal[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video-False] SKIP (https://nvbugs/5509024) test_e2e.py::test_ptp_quickstart_multimodal[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video-True] SKIP (https://nvbugs/5509024) @@ -347,3 +346,6 @@ triton_server/test_triton_llm.py::test_llmapi_backend[1-0-enableDecoupleMode-ten cpp/test_e2e.py::test_benchmarks[gpt-80] SKIP (https://nvbugs/5601670) disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_bf16_empty_batch[DeepSeek-V3-Lite-bf16] SKIP (https://nvbugs/5601682) disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-v3-8b-hf] SKIP (https://nvbugs/5587574) +full:H20-3e/accuracy/test_llm_api_pytorch.py::TestNemotronUltra::test_auto_dtype[tp8ep4-cuda_graph=True] SKIP (slow I/O) +full:H20-3e/accuracy/test_llm_api_pytorch.py::TestKimiK2::test_fp8_blockscale[latency] SKIP (slow I/O) +full:H20-3e/test_e2e.py::test_ptp_quickstart_advanced_multi_gpus[DeepSeek-V3-671B-FP8-DeepSeek-V3-0324-8] SKIP (slow I/O)