diff --git a/conf/common/test/nemo_run_llama3_8b.toml b/conf/common/test/nemo_run_llama3_8b.toml index 462a57e61..ea8e8895f 100644 --- a/conf/common/test/nemo_run_llama3_8b.toml +++ b/conf/common/test/nemo_run_llama3_8b.toml @@ -21,6 +21,7 @@ test_template_name = "NeMoRun" [cmd_args] "docker_image_url" = "nvcr.io/nvidia/nemo:24.09" "task" = "pretrain" +"recipe_path" = "" "recipe_name" = "llama3_8b" [extra_cmd_args] diff --git a/src/cloudai/schema/test_template/nemo_run/slurm_command_gen_strategy.py b/src/cloudai/schema/test_template/nemo_run/slurm_command_gen_strategy.py index ebbf13a59..2543f005e 100644 --- a/src/cloudai/schema/test_template/nemo_run/slurm_command_gen_strategy.py +++ b/src/cloudai/schema/test_template/nemo_run/slurm_command_gen_strategy.py @@ -32,6 +32,11 @@ def _parse_slurm_args( tdef: NeMoRunTestDefinition = cast(NeMoRunTestDefinition, tr.test.test_definition) base_args.update({"image_path": tdef.docker_image.installed_path}) + if tdef.cmd_args.recipe_path: + # TODO: update /opt/NeMo/nemo/collections/llm/recipes/__init__.py + target_recipe_path = f"/opt/NeMo/nemo/collections/llm/recipes/{os.path.basename(tdef.cmd_args.recipe_path)}" + container_mounts = f"{tdef.cmd_args.recipe_path}:{target_recipe_path}" + base_args["container_mounts"] = container_mounts return base_args diff --git a/src/cloudai/test_definitions/nemo_run.py b/src/cloudai/test_definitions/nemo_run.py index fa424672f..3123e2ea1 100644 --- a/src/cloudai/test_definitions/nemo_run.py +++ b/src/cloudai/test_definitions/nemo_run.py @@ -25,6 +25,7 @@ class NeMoRunCmdArgs(CmdArgs): docker_image_url: str task: str + recipe_path: Optional[str] recipe_name: str diff --git a/tests/slurm_command_gen_strategy/test_nemo_run_slurm_command_gen_strategy.py b/tests/slurm_command_gen_strategy/test_nemo_run_slurm_command_gen_strategy.py index 57ed454d7..f6dd9a539 100644 --- a/tests/slurm_command_gen_strategy/test_nemo_run_slurm_command_gen_strategy.py +++ b/tests/slurm_command_gen_strategy/test_nemo_run_slurm_command_gen_strategy.py @@ -34,7 +34,7 @@ def test_run(self, tmp_path: Path) -> TestRun: description="desc1", test_template_name="tt", cmd_args=NeMoRunCmdArgs( - docker_image_url="nvcr.io/nvidia/nemo:24.09", task="pretrain", recipe_name="llama_3b" + docker_image_url="nvcr.io/nvidia/nemo:24.09", task="pretrain", recipe_path=None, recipe_name="llama_3b" ), extra_env_vars={"TEST_VAR_1": "value1"}, extra_cmd_args={"extra_args": ""}, @@ -59,7 +59,7 @@ def cmd_gen_strategy(self, slurm_system: SlurmSystem) -> NeMoRunSlurmCommandGenS "cmd_args, expected_cmd", [ ( - {"docker_image_url": "nvcr.io/nvidia/nemo:24.09", "task": "fine_tune", "recipe_name": "llama7_13b"}, + {"docker_image_url": "nvcr.io/nvidia/nemo:24.09", "task": "fine_tune", "recipe_path": None, "recipe_name": "llama7_13b"}, ["nemo", "llm", "fine_tune", "--factory", "llama7_13b", "-y", "trainer.num_nodes=2", "extra_args"], ), ], @@ -90,3 +90,21 @@ def test_num_nodes(self, cmd_gen_strategy: NeMoRunSlurmCommandGenStrategy, test_ num_nodes_param = next(p for p in cmd if "trainer.num_nodes" in p) assert num_nodes_param == "trainer.num_nodes=3" + + def test_parse_slurm_args_with_recipe_path( + self, cmd_gen_strategy: NeMoRunSlurmCommandGenStrategy, test_run: TestRun + ) -> None: + # Test when recipe_path is None + test_run.test.test_definition.cmd_args.recipe_path = None + base_args = cmd_gen_strategy._parse_slurm_args("test_job", {}, {}, test_run) + assert ( + "container_mounts" not in base_args + ), "container_mounts should not be in base_args when recipe_path is None" + + # Test when recipe_path is not None + test_run.test.test_definition.cmd_args.recipe_path = "/some/recipe/path" + base_args = cmd_gen_strategy._parse_slurm_args("test_job", {}, {}, test_run) + assert "container_mounts" in base_args, "container_mounts should be in base_args when recipe_path is not None" + assert ( + base_args["container_mounts"] == "/some/recipe/path:/some/recipe/path" + ), "container_mounts should correctly mount the recipe_path" diff --git a/tests/test_acceptance.py b/tests/test_acceptance.py index 53a22f502..f8f064639 100644 --- a/tests/test_acceptance.py +++ b/tests/test_acceptance.py @@ -211,7 +211,10 @@ def create_test_run(name, test_definition, command_gen_strategy): description=test_type, test_template_name=test_type, cmd_args=NeMoRunCmdArgs( - docker_image_url="nvcr.io/nvidia/nemo:24.09", task="pretrain", recipe_name="llama_3b" + docker_image_url="nvcr.io/nvidia/nemo:24.09", + task="pretrain", + recipe_path=None, + recipe_name="llama_3b", ), ), NeMoRunSlurmCommandGenStrategy,