diff --git a/tensorrt_llm/_torch/pyexecutor/py_executor.py b/tensorrt_llm/_torch/pyexecutor/py_executor.py index 0d2ae0a0c3..44e3f93866 100644 --- a/tensorrt_llm/_torch/pyexecutor/py_executor.py +++ b/tensorrt_llm/_torch/pyexecutor/py_executor.py @@ -436,7 +436,7 @@ def _profiler(self): f"iteration 10-20: export {PROFILE_START_STOP_ENV_VAR_NAME}=10-20" ) - if enable_torch_trace: + if enable_torch_trace and self.dist.rank == 0: activities = [ torch.profiler.ProfilerActivity.CPU, torch.profiler.ProfilerActivity.CUDA, @@ -450,7 +450,7 @@ def profile_step(): nonlocal it, enabled, start_time if it in self.profile_stop_iters and not self.is_warmup: assert enabled, "Inconsistent CUDA profiling state" - if enable_torch_trace: + if enable_torch_trace and self.dist.rank == 0: torch_profiler.stop() torch_profiler.export_chrome_trace(torch_trace_path) logger.info(f"Profiling stopped at iteration {it}, " @@ -478,7 +478,7 @@ def profile_step(): if it in self.profile_start_iters and not self.is_warmup: assert not enabled, "Inconsistent CUDA profiling state" torch.cuda.cudart().cudaProfilerStart() - if enable_torch_trace: + if enable_torch_trace and self.dist.rank == 0: torch_profiler.start() logger.info(f"Profiling started at iteration {it}.") enabled = True @@ -489,7 +489,7 @@ def profile_step(): finally: if enabled: # Stop on early exit / exception - if enable_torch_trace: + if enable_torch_trace and self.dist.rank == 0: torch_profiler.stop() torch_profiler.export_chrome_trace(torch_trace_path) logger.info(f"Profiling stopped at iteration {it}, "