Skip to content

Commit

Permalink
add new logging lines
Browse files Browse the repository at this point in the history
  • Loading branch information
jeffnvidia committed May 19, 2024
1 parent cb937ef commit d48a6b6
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 2 deletions.
7 changes: 6 additions & 1 deletion src/cloudai/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import logging
import os
import sys
import traceback

from cloudai import Installer, Parser, ReportGenerator, Runner, SystemObjectUpdater

Expand Down Expand Up @@ -177,7 +178,11 @@ def handle_dry_run_and_run(args: argparse.Namespace) -> None:
test_scenario.pretty_print()

runner = Runner(args.mode, system, test_scenario)
asyncio.run(runner.run())
try:
asyncio.run(runner.run())
except RuntimeError as e:
logging.error(traceback.format_exc())
logging.error(f"Error running asyncio loop: {e}")

print(f"All test scenario results stored at: {runner.runner.output_path}")

Expand Down
2 changes: 2 additions & 0 deletions src/cloudai/runner/slurm/slurm_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ def _submit_test(self, test: Test) -> Optional[SlurmJob]:
job_id = None
if self.mode == "run":
stdout, stderr = self.cmd_shell.execute(exec_cmd).communicate()
self.logger.info(f"\tstdout: {stdout}")
self.logger.info(f"\tstderr: {stderr}")
job_id = test.get_job_id(stdout, stderr)
else:
job_id = 0
Expand Down
4 changes: 3 additions & 1 deletion src/cloudai/runner/standalone/standalone_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,9 @@ def is_job_completed(self, job: BaseJob) -> bool:
s_job = cast(StandaloneJob, job)
command = f"ps -p {s_job.id}"
self.logger.debug(f"Checking job status with command: {command}")
stdout = self.cmd_shell.execute(command).communicate()[0]
stdout, stderr = self.cmd_shell.execute(command).communicate()
self.logger.info(f"\tstdout: {stdout}")
self.logger.info(f"\tstderr: {stderr}")
return str(s_job.id) not in stdout

def kill_job(self, job: BaseJob):
Expand Down

0 comments on commit d48a6b6

Please sign in to comment.