Skip to content

Commit

Permalink
Bug fix in job completion checks (#340)
Browse files Browse the repository at this point in the history
* Fix a typo in src/cloudai/report_generator/report_generator.py

* Bug fix in job completion check
  • Loading branch information
TaekyungHeo authored Jan 15, 2025
1 parent aac7f6f commit d2cc9b4
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/cloudai/report_generator/report_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def _generate_test_report(self, directory_path: Path, tr: TestRun) -> None:
continue
if not tr.test.test_template.can_handle_directory(subdir):
logging.warning(
f"Skipping '{subdir}', can't hande with "
f"Skipping '{subdir}', can't handle with "
f"strategy={tr.test.test_template.report_generation_strategy}."
)
continue
Expand Down
3 changes: 3 additions & 0 deletions src/cloudai/systems/slurm/slurm_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,9 @@ def is_job_completed(self, job: BaseJob, retry_threshold: int = 3) -> bool:
raise RuntimeError(error_message)

job_states = stdout.strip().split()
if "RUNNING" in job_states:
return False

if any(state in ["COMPLETED", "FAILED", "CANCELLED", "TIMEOUT"] for state in job_states):
return True

Expand Down
3 changes: 3 additions & 0 deletions tests/test_slurm_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,9 @@ def test_allocate_nodes_exceeding_limit(
("TIMEOUT", "", True),
("RUNNING", "", False),
("PENDING", "", False),
("COMPLETED RUNNING", "", False),
("RUNNING COMPLETED", "", False),
("COMPLETED COMPLETED", "", True),
("", "error", False),
],
)
Expand Down

0 comments on commit d2cc9b4

Please sign in to comment.