Skip to content

Commit

Permalink
Support custom recipes in NeMoRun
Browse files Browse the repository at this point in the history
  • Loading branch information
TaekyungHeo committed Jan 8, 2025
1 parent 1bfdef8 commit 1f702f0
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 3 deletions.
1 change: 1 addition & 0 deletions conf/common/test/nemo_run_llama3_8b.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ test_template_name = "NeMoRun"
[cmd_args]
"docker_image_url" = "nvcr.io/nvidia/nemo:24.09"
"task" = "pretrain"
"recipe_path" = ""
"recipe_name" = "llama3_8b"

[extra_cmd_args]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# limitations under the License.


import os
from typing import Any, Dict, List, cast

from cloudai import TestRun
Expand All @@ -32,6 +33,11 @@ def _parse_slurm_args(

tdef: NeMoRunTestDefinition = cast(NeMoRunTestDefinition, tr.test.test_definition)
base_args.update({"image_path": tdef.docker_image.installed_path})
if tdef.cmd_args.recipe_path:
# TODO: update /opt/NeMo/nemo/collections/llm/recipes/__init__.py
target_recipe_path = f"/opt/NeMo/nemo/collections/llm/recipes/{os.path.basename(tdef.cmd_args.recipe_path)}"
container_mounts = f"{tdef.cmd_args.recipe_path}:{target_recipe_path}"
base_args["container_mounts"] = container_mounts

return base_args

Expand Down
1 change: 1 addition & 0 deletions src/cloudai/test_definitions/nemo_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ class NeMoRunCmdArgs(CmdArgs):

docker_image_url: str
task: str
recipe_path: Optional[str]
recipe_name: str


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def test_run(self, tmp_path: Path) -> TestRun:
description="desc1",
test_template_name="tt",
cmd_args=NeMoRunCmdArgs(
docker_image_url="nvcr.io/nvidia/nemo:24.09", task="pretrain", recipe_name="llama_3b"
docker_image_url="nvcr.io/nvidia/nemo:24.09", task="pretrain", recipe_path=None, recipe_name="llama_3b"
),
extra_env_vars={"TEST_VAR_1": "value1"},
extra_cmd_args={"extra_args": ""},
Expand All @@ -59,7 +59,12 @@ def cmd_gen_strategy(self, slurm_system: SlurmSystem) -> NeMoRunSlurmCommandGenS
"cmd_args, expected_cmd",
[
(
{"docker_image_url": "nvcr.io/nvidia/nemo:24.09", "task": "fine_tune", "recipe_name": "llama7_13b"},
{
"docker_image_url": "nvcr.io/nvidia/nemo:24.09",
"task": "fine_tune",
"recipe_path": None,
"recipe_name": "llama7_13b",
},
["nemo", "llm", "fine_tune", "--factory", "llama7_13b", "-y", "trainer.num_nodes=2", "extra_args"],
),
],
Expand Down Expand Up @@ -90,3 +95,18 @@ def test_num_nodes(self, cmd_gen_strategy: NeMoRunSlurmCommandGenStrategy, test_

num_nodes_param = next(p for p in cmd if "trainer.num_nodes" in p)
assert num_nodes_param == "trainer.num_nodes=3"

def test_parse_slurm_args_without_recipe_path(
self, cmd_gen_strategy: NeMoRunSlurmCommandGenStrategy, test_run: TestRun
) -> None:
test_run.test.test_definition.cmd_args.recipe_path = None
base_args = cmd_gen_strategy._parse_slurm_args("test_job", {}, {}, test_run)
assert "container_mounts" not in base_args

def test_parse_slurm_args_with_recipe_path(
self, cmd_gen_strategy: NeMoRunSlurmCommandGenStrategy, test_run: TestRun
) -> None:
test_run.test.test_definition.cmd_args.recipe_path = "/some/recipe/path"
base_args = cmd_gen_strategy._parse_slurm_args("test_job", {}, {}, test_run)
assert "container_mounts" in base_args
assert base_args["container_mounts"] == "/some/recipe/path:/some/recipe/path"
5 changes: 4 additions & 1 deletion tests/test_acceptance.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,10 @@ def create_test_run(name, test_definition, command_gen_strategy):
description=test_type,
test_template_name=test_type,
cmd_args=NeMoRunCmdArgs(
docker_image_url="nvcr.io/nvidia/nemo:24.09", task="pretrain", recipe_name="llama_3b"
docker_image_url="nvcr.io/nvidia/nemo:24.09",
task="pretrain",
recipe_path=None,
recipe_name="llama_3b",
),
),
NeMoRunSlurmCommandGenStrategy,
Expand Down

0 comments on commit 1f702f0

Please sign in to comment.