From 5b298c53a48da94580af33ea57648f49379bad72 Mon Sep 17 00:00:00 2001 From: jeffnvidia Date: Sun, 19 May 2024 15:25:18 +0300 Subject: [PATCH] add new logging lines --- src/cloudai/__main__.py | 7 ++++++- src/cloudai/runner/slurm/slurm_runner.py | 2 ++ src/cloudai/runner/standalone/standalone_runner.py | 4 +++- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/cloudai/__main__.py b/src/cloudai/__main__.py index f063bc0a4..49bf87f90 100644 --- a/src/cloudai/__main__.py +++ b/src/cloudai/__main__.py @@ -17,6 +17,7 @@ import logging import os import sys +import traceback from cloudai import Installer, Parser, ReportGenerator, Runner, SystemObjectUpdater @@ -178,7 +179,11 @@ def handle_dry_run_and_run(args: argparse.Namespace) -> None: test_scenario.pretty_print() runner = Runner(args.mode, system, test_scenario) - asyncio.run(runner.run()) + try: + asyncio.run(runner.run()) + except RuntimeError as e: + logging.error(traceback.format_exc()) + logging.error(f"Error running asyncio loop: {e}") print(f"All test scenario results stored at: {runner.runner.output_path}") diff --git a/src/cloudai/runner/slurm/slurm_runner.py b/src/cloudai/runner/slurm/slurm_runner.py index 91b41006b..ff3dc0d89 100644 --- a/src/cloudai/runner/slurm/slurm_runner.py +++ b/src/cloudai/runner/slurm/slurm_runner.py @@ -72,6 +72,8 @@ def _submit_test(self, test: Test) -> Optional[SlurmJob]: job_id = None if self.mode == "run": stdout, stderr = self.cmd_shell.execute(exec_cmd).communicate() + self.logger.info(f"\tstdout: {stdout}") + self.logger.info(f"\tstderr: {stderr}") job_id = test.get_job_id(stdout, stderr) else: job_id = 0 diff --git a/src/cloudai/runner/standalone/standalone_runner.py b/src/cloudai/runner/standalone/standalone_runner.py index 4aecbf6a8..eb28c7063 100644 --- a/src/cloudai/runner/standalone/standalone_runner.py +++ b/src/cloudai/runner/standalone/standalone_runner.py @@ -104,7 +104,9 @@ def is_job_completed(self, job: BaseJob) -> bool: s_job = cast(StandaloneJob, job) command = f"ps -p {s_job.id}" self.logger.debug(f"Checking job status with command: {command}") - stdout = self.cmd_shell.execute(command).communicate()[0] + stdout, stderr = self.cmd_shell.execute(command).communicate() + self.logger.info(f"\tstdout: {stdout}") + self.logger.info(f"\tstderr: {stderr}") return str(s_job.id) not in stdout def kill_job(self, job: BaseJob):