From 3f82fd57f3827a6b1e69e0560505f76fb059b70d Mon Sep 17 00:00:00 2001 From: Baizhou Zhang Date: Sun, 20 Apr 2025 21:23:03 +0000 Subject: [PATCH] Fix bug in torch profiler --- tensorrt_llm/_torch/pyexecutor/py_executor.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorrt_llm/_torch/pyexecutor/py_executor.py b/tensorrt_llm/_torch/pyexecutor/py_executor.py index 661158ace0..77d39e46a3 100644 --- a/tensorrt_llm/_torch/pyexecutor/py_executor.py +++ b/tensorrt_llm/_torch/pyexecutor/py_executor.py @@ -432,7 +432,7 @@ def _profiler(self): f"iteration 10-20: export {PROFILE_START_STOP_ENV_VAR_NAME}=10-20" ) - if enable_torch_trace: + if enable_torch_trace and self.dist.rank == 0: activities = [ torch.profiler.ProfilerActivity.CPU, torch.profiler.ProfilerActivity.CUDA, @@ -446,7 +446,7 @@ def profile_step(): nonlocal it, enabled, start_time if it in self.profile_stop_iters: assert enabled, "Inconsistent CUDA profiling state" - if enable_torch_trace: + if enable_torch_trace and self.dist.rank == 0: torch_profiler.stop() torch_profiler.export_chrome_trace(torch_trace_path) logger.info(f"Profiling stopped at iteration {it}, " @@ -473,7 +473,7 @@ def profile_step(): if it in self.profile_start_iters: assert not enabled, "Inconsistent CUDA profiling state" torch.cuda.cudart().cudaProfilerStart() - if enable_torch_trace: + if enable_torch_trace and self.dist.rank == 0: torch_profiler.start() logger.info(f"Profiling started at iteration {it}.") enabled = True @@ -484,7 +484,7 @@ def profile_step(): finally: if enabled: # Stop on early exit / exception - if enable_torch_trace: + if enable_torch_trace and self.dist.rank == 0: torch_profiler.stop() torch_profiler.export_chrome_trace(torch_trace_path) logger.info(f"Profiling stopped at iteration {it}, "