diff --git a/src/cloudai/report_generator/report_generator.py b/src/cloudai/report_generator/report_generator.py index 9d7ef563..e47bef40 100644 --- a/src/cloudai/report_generator/report_generator.py +++ b/src/cloudai/report_generator/report_generator.py @@ -75,7 +75,7 @@ def _generate_test_report(self, directory_path: Path, tr: TestRun) -> None: continue if not tr.test.test_template.can_handle_directory(subdir): logging.warning( - f"Skipping '{subdir}', can't hande with " + f"Skipping '{subdir}', can't handle with " f"strategy={tr.test.test_template.report_generation_strategy}." ) continue diff --git a/src/cloudai/systems/slurm/slurm_system.py b/src/cloudai/systems/slurm/slurm_system.py index 51388ac8..2b74bfac 100644 --- a/src/cloudai/systems/slurm/slurm_system.py +++ b/src/cloudai/systems/slurm/slurm_system.py @@ -244,6 +244,9 @@ def is_job_completed(self, job: BaseJob, retry_threshold: int = 3) -> bool: raise RuntimeError(error_message) job_states = stdout.strip().split() + if "RUNNING" in job_states: + return False + if any(state in ["COMPLETED", "FAILED", "CANCELLED", "TIMEOUT"] for state in job_states): return True diff --git a/tests/test_slurm_system.py b/tests/test_slurm_system.py index 028fb89e..c89ca1ad 100644 --- a/tests/test_slurm_system.py +++ b/tests/test_slurm_system.py @@ -241,6 +241,9 @@ def test_allocate_nodes_exceeding_limit( ("TIMEOUT", "", True), ("RUNNING", "", False), ("PENDING", "", False), + ("COMPLETED RUNNING", "", False), + ("RUNNING COMPLETED", "", False), + ("COMPLETED COMPLETED", "", True), ("", "error", False), ], )