From d945201972c520442f191fd76d6c994a34270a51 Mon Sep 17 00:00:00 2001 From: Srivatsan Krishnan Date: Sat, 11 Jan 2025 02:17:42 -0800 Subject: [PATCH 01/21] agent environment intergation with runner --- src/cloudai/_core/configurator/cloudai_gym.py | 4 +-- src/cloudai/cli/handlers.py | 36 +++++++++++++------ 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/src/cloudai/_core/configurator/cloudai_gym.py b/src/cloudai/_core/configurator/cloudai_gym.py index 6e4e2b84..b3b87d75 100644 --- a/src/cloudai/_core/configurator/cloudai_gym.py +++ b/src/cloudai/_core/configurator/cloudai_gym.py @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Optional, Tuple +from typing import Any, Dict, Optional, Tuple import gymnasium as gym import numpy as np @@ -46,7 +46,7 @@ def __init__(self, test_run: TestRun, system: SlurmSystem, test_scenario: TestSc self.action_space = self.extract_action_space(self.test_run.test.cmd_args) self.observation_space = self.define_observation_space() - def extract_action_space(self, cmd_args: dict) -> spaces.Dict: + def extract_action_space(self, cmd_args: dict) -> Dict[str, Any]: """ Extract the action space from the cmd_args dictionary. diff --git a/src/cloudai/cli/handlers.py b/src/cloudai/cli/handlers.py index 49814c21..8ea3d481 100644 --- a/src/cloudai/cli/handlers.py +++ b/src/cloudai/cli/handlers.py @@ -22,6 +22,9 @@ from unittest.mock import Mock from cloudai import Installable, Parser, Registry, ReportGenerator, Runner, System +from cloudai._core.configurator.agents.grid_search import GridSearchAgent +from cloudai._core.configurator.cloudai_gym import CloudAIGymEnv +from cloudai.systems.slurm.slurm_system import SlurmSystem from ..parser import HOOK_ROOT @@ -93,6 +96,7 @@ def handle_dry_run_and_run(args: argparse.Namespace) -> int: """ parser = Parser(args.system_config) system, tests, test_scenario = parser.parse(args.tests_dir, args.test_scenario) + assert test_scenario is not None if args.output_dir: @@ -125,19 +129,29 @@ def handle_dry_run_and_run(args: argparse.Namespace) -> int: logging.info(test_scenario.pretty_print()) - runner = Runner(args.mode, system, test_scenario) - asyncio.run(runner.run()) + tr = test_scenario.test_runs[0] - logging.info(f"All test scenario results stored at: {runner.runner.output_path}") + agent = GridSearchAgent(tr) + env = CloudAIGymEnv(test_run=tr, system=SlurmSystem(system), test_scenario=test_scenario) - if args.mode == "run": - generator = ReportGenerator(runner.runner.output_path) - generator.generate_report(test_scenario) - logging.info( - "All test scenario execution attempts are complete. Please review" - f" the '{args.log_file}' file to confirm successful completion or to" - " identify any issues." - ) + agent.configure(env.action_space) + + for action in agent.get_all_combinations(): + for key, value in action.items(): + tr.test.test_definition.cmd_args_dict[key] = value + runner = Runner(args.mode, system, test_scenario) + asyncio.run(runner.run()) + + logging.info(f"All test scenario results stored at: {runner.runner.output_path}") + + if args.mode == "run": + generator = ReportGenerator(runner.runner.output_path) + generator.generate_report(test_scenario) + logging.info( + "All test scenario execution attempts are complete. Please review" + f" the '{args.log_file}' file to confirm successful completion or to" + " identify any issues." + ) return 0 From 1be4398c7db97629cf310c136447cc6e5b6b5bb8 Mon Sep 17 00:00:00 2001 From: Srivatsan Krishnan Date: Sat, 11 Jan 2025 03:10:43 -0800 Subject: [PATCH 02/21] more fixes --- src/cloudai/_core/configurator/cloudai_gym.py | 24 ++++++++++++++++++- src/cloudai/cli/handlers.py | 8 ++++--- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/src/cloudai/_core/configurator/cloudai_gym.py b/src/cloudai/_core/configurator/cloudai_gym.py index b3b87d75..c012ae79 100644 --- a/src/cloudai/_core/configurator/cloudai_gym.py +++ b/src/cloudai/_core/configurator/cloudai_gym.py @@ -19,11 +19,33 @@ import gymnasium as gym import numpy as np from gymnasium import spaces +from gymnasium.spaces import Space from cloudai._core.test_scenario import TestRun, TestScenario from cloudai.systems import SlurmSystem +class DictSpace(Space): + """ + A custom space that wraps a dictionary of spaces. + + Args: + space_dict (Dict[str, Any]): A dictionary of spaces. + """ + + def __init__(self, space_dict: Dict[str, Any]): + self.space_dict = space_dict + super().__init__((), None) + + def sample(self, mask: Optional[np.ndarray] = None): + # Implement sampling logic if needed + pass + + def contains(self, x) -> bool: + # Implement containment logic if needed + return True + + class CloudAIGymEnv(gym.Env): """ Custom Gym environment for CloudAI integration. @@ -46,7 +68,7 @@ def __init__(self, test_run: TestRun, system: SlurmSystem, test_scenario: TestSc self.action_space = self.extract_action_space(self.test_run.test.cmd_args) self.observation_space = self.define_observation_space() - def extract_action_space(self, cmd_args: dict) -> Dict[str, Any]: + def extract_action_space(self, cmd_args: dict): """ Extract the action space from the cmd_args dictionary. diff --git a/src/cloudai/cli/handlers.py b/src/cloudai/cli/handlers.py index 8ea3d481..89a3e605 100644 --- a/src/cloudai/cli/handlers.py +++ b/src/cloudai/cli/handlers.py @@ -24,7 +24,6 @@ from cloudai import Installable, Parser, Registry, ReportGenerator, Runner, System from cloudai._core.configurator.agents.grid_search import GridSearchAgent from cloudai._core.configurator.cloudai_gym import CloudAIGymEnv -from cloudai.systems.slurm.slurm_system import SlurmSystem from ..parser import HOOK_ROOT @@ -132,9 +131,12 @@ def handle_dry_run_and_run(args: argparse.Namespace) -> int: tr = test_scenario.test_runs[0] agent = GridSearchAgent(tr) - env = CloudAIGymEnv(test_run=tr, system=SlurmSystem(system), test_scenario=test_scenario) + env = CloudAIGymEnv(test_run=tr, system=system, test_scenario=test_scenario) - agent.configure(env.action_space) + # Convert env.action_space to a dictionary + action_space_dict = {key: space for key, space in env.action_space.spaces.items()} + + agent.configure(action_space_dict) for action in agent.get_all_combinations(): for key, value in action.items(): From 4df4ab974ac22ad0aaf04355213e41a2fefb20ef Mon Sep 17 00:00:00 2001 From: Srivatsan Krishnan Date: Sat, 11 Jan 2025 11:04:20 -0800 Subject: [PATCH 03/21] Remove Farma gym dependies for more control over types + other fixes in pyright --- src/cloudai/_core/configurator/base_gym.py | 102 +++++++++++++++++ src/cloudai/_core/configurator/cloudai_gym.py | 107 ++++++++---------- src/cloudai/cli/handlers.py | 5 +- 3 files changed, 148 insertions(+), 66 deletions(-) create mode 100644 src/cloudai/_core/configurator/base_gym.py diff --git a/src/cloudai/_core/configurator/base_gym.py b/src/cloudai/_core/configurator/base_gym.py new file mode 100644 index 00000000..b985f46c --- /dev/null +++ b/src/cloudai/_core/configurator/base_gym.py @@ -0,0 +1,102 @@ +# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES +# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from abc import ABC, abstractmethod +from typing import Any, Dict, Optional, Tuple + + +class BaseGym(ABC): + """Base class for CloudAI Gym environments.""" + + def __init__(self): + """Initialize the CloudAIGym environment.""" + self.action_space = self.define_action_space() + self.observation_space = self.define_observation_space() + + @abstractmethod + def define_action_space(self) -> Dict[str, Any]: + """ + Define the action space for the environment. + + Returns: + Dict[str, Any]: The action space. + """ + pass + + @abstractmethod + def define_observation_space(self) -> list: + """ + Define the observation space for the environment. + + Returns: + list: The observation space. + """ + pass + + @abstractmethod + def reset( + self, seed: Optional[int] = None, options: Optional[dict[str, Any]] = None + ) -> Tuple[list, dict[str, Any]]: + """ + Reset the environment. + + Args: + seed (Optional[int]): Seed for the environment's random number generator. + options (Optional[dict]): Additional options for reset. + + Returns: + Tuple: A tuple containing: + - observation (list): Initial observation. + - info (dict): Additional info for debugging. + """ + pass + + @abstractmethod + def step(self, action: Any) -> Tuple[list, float, bool, dict]: + """ + Execute one step in the environment. + + Args: + action (Any): Action chosen by the agent. + + Returns: + Tuple: A tuple containing: + - observation (list): Updated system state. + - reward (float): Reward for the action taken. + - done (bool): Whether the episode is done. + - info (dict): Additional info for debugging. + """ + pass + + @abstractmethod + def render(self, mode: str = "human"): + """ + Render the current state of the environment. + + Args: + mode (str): The mode to render with. Default is "human". + """ + pass + + @abstractmethod + def seed(self, seed: Optional[int] = None): + """ + Set the seed for the environment's random number generator. + + Args: + seed (Optional[int]): Seed for the environment's random number generator. + """ + pass diff --git a/src/cloudai/_core/configurator/cloudai_gym.py b/src/cloudai/_core/configurator/cloudai_gym.py index c012ae79..4205e3f9 100644 --- a/src/cloudai/_core/configurator/cloudai_gym.py +++ b/src/cloudai/_core/configurator/cloudai_gym.py @@ -16,44 +16,21 @@ from typing import Any, Dict, Optional, Tuple -import gymnasium as gym import numpy as np -from gymnasium import spaces -from gymnasium.spaces import Space +from cloudai import System +from cloudai._core.configurator.base_gym import BaseGym from cloudai._core.test_scenario import TestRun, TestScenario -from cloudai.systems import SlurmSystem -class DictSpace(Space): - """ - A custom space that wraps a dictionary of spaces. - - Args: - space_dict (Dict[str, Any]): A dictionary of spaces. - """ - - def __init__(self, space_dict: Dict[str, Any]): - self.space_dict = space_dict - super().__init__((), None) - - def sample(self, mask: Optional[np.ndarray] = None): - # Implement sampling logic if needed - pass - - def contains(self, x) -> bool: - # Implement containment logic if needed - return True - - -class CloudAIGymEnv(gym.Env): +class CloudAIGymEnv(BaseGym): """ Custom Gym environment for CloudAI integration. Uses the TestRun object and actual runner methods to execute jobs. """ - def __init__(self, test_run: TestRun, system: SlurmSystem, test_scenario: TestScenario): + def __init__(self, test_run: TestRun, system: System, test_scenario: TestScenario): """ Initialize the Gym environment using the TestRun object. @@ -62,47 +39,40 @@ def __init__(self, test_run: TestRun, system: SlurmSystem, test_scenario: TestSc system (SlurmSystem): The system configuration for running the tests. test_scenario (TestScenario): The test scenario configuration. """ - super(CloudAIGymEnv, self).__init__() self.test_run = test_run + self.system = system + self.test_scenario = test_scenario + super().__init__() - self.action_space = self.extract_action_space(self.test_run.test.cmd_args) - self.observation_space = self.define_observation_space() - - def extract_action_space(self, cmd_args: dict): + def define_action_space(self) -> Dict[str, Any]: """ - Extract the action space from the cmd_args dictionary. - - Args: - cmd_args (dict): The command arguments dictionary from the TestRun object. + Define the action space for the environment. Returns: - spaces.Dict: A dictionary containing the action space variables and their feasible values. + Dict[str, Any]: The action space. """ action_space = {} - for key, value in cmd_args.items(): + for key, value in self.test_run.test.cmd_args.items(): if isinstance(value, list): - action_space[key] = spaces.Discrete(len(value)) + action_space[key] = len(value) elif isinstance(value, dict): for sub_key, sub_value in value.items(): if isinstance(sub_value, list): - action_space[f"{key}.{sub_key}"] = spaces.Discrete(len(sub_value)) - return spaces.Dict(action_space) + action_space[f"{key}.{sub_key}"] = len(sub_value) + return action_space - def define_observation_space(self) -> spaces.Space: + def define_observation_space(self) -> list: """ Define the observation space for the environment. Returns: - spaces.Space: The observation space. + list: The observation space. """ - return spaces.Box(low=0, high=1, shape=(1,), dtype=np.float32) + return [0.0] def reset( - self, - *, - seed: Optional[int] = None, - options: Optional[dict[str, Any]] = None, - ) -> Tuple[np.ndarray, dict[str, Any]]: + self, seed: Optional[int] = None, options: Optional[dict[str, Any]] = None + ) -> Tuple[list, dict[str, Any]]: """ Reset the environment and reinitialize the TestRun. @@ -112,37 +82,37 @@ def reset( Returns: Tuple: A tuple containing: - - observation (np.ndarray): Initial observation. + - observation (list): Initial observation. - info (dict): Additional info for debugging. """ - super().reset(seed=seed, options=options) + if seed is not None: + np.random.seed(seed) self.test_run.current_iteration = 0 - observation = np.array([0.0], dtype=np.float32) + observation = [0.0] info = {} return observation, info - def step(self, action: np.ndarray) -> tuple: + def step(self, action: Any) -> Tuple[list, float, bool, dict]: """ Execute one step in the environment. Args: - action (np.ndarray): Action chosen by the agent. + action (Any): Action chosen by the agent. Returns: - tuple: A tuple containing: - - observation (np.ndarray): Updated system state. + Tuple: A tuple containing: + - observation (list): Updated system state. - reward (float): Reward for the action taken. - done (bool): Whether the episode is done. - info (dict): Additional info for debugging. """ observation = self.get_observation(action) - reward = 0.0 + reward = self.compute_reward() done = False info = {} - return observation, reward, done, info - def render(self, mode="human"): + def render(self, mode: str = "human"): """ Render the current state of the TestRun. @@ -151,6 +121,16 @@ def render(self, mode="human"): """ print(f"Step {self.test_run.current_iteration}: Parameters {self.test_run.test.cmd_args}") + def seed(self, seed: Optional[int] = None): + """ + Set the seed for the environment's random number generator. + + Args: + seed (Optional[int]): Seed for the environment's random number generator. + """ + if seed is not None: + np.random.seed(seed) + def compute_reward(self) -> float: """ Compute a reward based on the TestRun result. @@ -160,12 +140,15 @@ def compute_reward(self) -> float: """ return 0.0 - def get_observation(self, action) -> np.ndarray: + def get_observation(self, action: Any) -> list: """ Get the observation from the TestRun object. + Args: + action (Any): Action taken by the agent. + Returns: - np.ndarray: A scalar value representing the observation. + list: The observation. """ obs = action * 0.5 - return np.array([obs], dtype=np.float32) + return [obs] diff --git a/src/cloudai/cli/handlers.py b/src/cloudai/cli/handlers.py index 89a3e605..c4663975 100644 --- a/src/cloudai/cli/handlers.py +++ b/src/cloudai/cli/handlers.py @@ -133,10 +133,7 @@ def handle_dry_run_and_run(args: argparse.Namespace) -> int: agent = GridSearchAgent(tr) env = CloudAIGymEnv(test_run=tr, system=system, test_scenario=test_scenario) - # Convert env.action_space to a dictionary - action_space_dict = {key: space for key, space in env.action_space.spaces.items()} - - agent.configure(action_space_dict) + agent.configure(env.action_space) for action in agent.get_all_combinations(): for key, value in action.items(): From e6905f72b3ec862930599267053fb9fa11a38554 Mon Sep 17 00:00:00 2001 From: Srivatsan Krishnan Date: Sat, 11 Jan 2025 11:06:23 -0800 Subject: [PATCH 04/21] vulture fix --- src/cloudai/_core/configurator/base_gym.py | 2 +- src/cloudai/_core/configurator/cloudai_gym.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cloudai/_core/configurator/base_gym.py b/src/cloudai/_core/configurator/base_gym.py index b985f46c..fff4ffe5 100644 --- a/src/cloudai/_core/configurator/base_gym.py +++ b/src/cloudai/_core/configurator/base_gym.py @@ -48,7 +48,7 @@ def define_observation_space(self) -> list: @abstractmethod def reset( - self, seed: Optional[int] = None, options: Optional[dict[str, Any]] = None + self, seed: Optional[int] = None, _options: Optional[dict[str, Any]] = None ) -> Tuple[list, dict[str, Any]]: """ Reset the environment. diff --git a/src/cloudai/_core/configurator/cloudai_gym.py b/src/cloudai/_core/configurator/cloudai_gym.py index 4205e3f9..7569e899 100644 --- a/src/cloudai/_core/configurator/cloudai_gym.py +++ b/src/cloudai/_core/configurator/cloudai_gym.py @@ -71,7 +71,7 @@ def define_observation_space(self) -> list: return [0.0] def reset( - self, seed: Optional[int] = None, options: Optional[dict[str, Any]] = None + self, seed: Optional[int] = None, _options: Optional[dict[str, Any]] = None ) -> Tuple[list, dict[str, Any]]: """ Reset the environment and reinitialize the TestRun. From 177694f693e1fa9a102fc264802c7f0131868e2d Mon Sep 17 00:00:00 2001 From: Srivatsan Krishnan Date: Sat, 11 Jan 2025 11:18:32 -0800 Subject: [PATCH 05/21] remove farma gym dependencies + update the pytest for cloudai_gym --- requirements.txt | 1 - tests/test_cloudaigym.py | 51 ++++++++++------------------------------ 2 files changed, 13 insertions(+), 39 deletions(-) diff --git a/requirements.txt b/requirements.txt index d9ec54c0..ddaf06e2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ -gymnasium @ git+https://github.com/Farama-Foundation/Gymnasium/@v1.0.0a2 bokeh==3.4.1 pandas==2.2.1 tbparse==0.0.8 diff --git a/tests/test_cloudaigym.py b/tests/test_cloudaigym.py index 2d1e22bc..5049863e 100644 --- a/tests/test_cloudaigym.py +++ b/tests/test_cloudaigym.py @@ -1,24 +1,6 @@ -# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES -# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - from unittest.mock import MagicMock -import numpy as np import pytest -from gymnasium.spaces import Box, Dict, Discrete from cloudai._core.configurator.cloudai_gym import CloudAIGymEnv from cloudai._core.test_scenario import TestRun, TestScenario @@ -48,32 +30,25 @@ def setup_env(): def test_action_space_nccl(setup_env): test_run, system, test_scenario = setup_env env = CloudAIGymEnv(test_run=test_run, system=system, test_scenario=test_scenario) - assert isinstance(env.action_space, Dict) + action_space = env.define_action_space() - expected_action_space = Dict( - { - "iters": Discrete(2), - "maxbytes": Discrete(2), - "minbytes": Discrete(4), - "ngpus": Discrete(1), - } - ) + expected_action_space = { + "iters": 2, + "maxbytes": 2, + "minbytes": 4, + "ngpus": 1, + } - assert env.action_space.spaces.keys() == expected_action_space.spaces.keys() - for key in expected_action_space.spaces: - assert isinstance(env.action_space.spaces[key], Discrete) - assert isinstance(expected_action_space.spaces[key], Discrete) - assert env.action_space.spaces[key].__dict__ == expected_action_space.spaces[key].__dict__ + assert action_space.keys() == expected_action_space.keys() + for key in expected_action_space: + assert action_space[key] == expected_action_space[key] def test_observation_space(setup_env): test_run, system, test_scenario = setup_env env = CloudAIGymEnv(test_run=test_run, system=system, test_scenario=test_scenario) - assert isinstance(env.observation_space, Box) + observation_space = env.define_observation_space() - expected_observation_space = Box(low=0.0, high=1.0, shape=(1,), dtype=np.float32) + expected_observation_space = [0.0] - assert env.observation_space.shape == expected_observation_space.shape - assert env.observation_space.dtype == expected_observation_space.dtype - assert np.all(env.observation_space.low == expected_observation_space.low) - assert np.all(env.observation_space.high == expected_observation_space.high) + assert observation_space == expected_observation_space From b10dbfb98a61b7ce32e512f3beb0685c9fe0cfaa Mon Sep 17 00:00:00 2001 From: Srivatsan Krishnan Date: Sat, 11 Jan 2025 11:20:14 -0800 Subject: [PATCH 06/21] remove farma gym from pyproject --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2321a82c..5f5a5d77 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,6 @@ name = "cloudai" dynamic = ["version"] dependencies = [ - "gymnasium @ git+https://github.com/Farama-Foundation/Gymnasium/@v1.0.0a2", "bokeh==3.4.1", "pandas==2.2.1", "tbparse==0.0.8", From 15be693be356c0bfb8a74198cd179d75e3f73060 Mon Sep 17 00:00:00 2001 From: Srivatsan Krishnan Date: Sat, 11 Jan 2025 11:22:48 -0800 Subject: [PATCH 07/21] fix the copyright headers checks --- tests/test_cloudaigym.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/test_cloudaigym.py b/tests/test_cloudaigym.py index 5049863e..9585f3f6 100644 --- a/tests/test_cloudaigym.py +++ b/tests/test_cloudaigym.py @@ -1,3 +1,18 @@ +# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES +# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from unittest.mock import MagicMock import pytest From d5d1e14babc68320dcb00edff44f265ec1f1c18f Mon Sep 17 00:00:00 2001 From: Srivatsan Krishnan Date: Sat, 11 Jan 2025 11:35:51 -0800 Subject: [PATCH 08/21] use iterators to avoid indexing errors. --- src/cloudai/cli/handlers.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/cloudai/cli/handlers.py b/src/cloudai/cli/handlers.py index c4663975..ebb5fdd5 100644 --- a/src/cloudai/cli/handlers.py +++ b/src/cloudai/cli/handlers.py @@ -128,7 +128,10 @@ def handle_dry_run_and_run(args: argparse.Namespace) -> int: logging.info(test_scenario.pretty_print()) - tr = test_scenario.test_runs[0] + tr = next(iter(test_scenario.test_runs), None) + if tr is None: + logging.error("No test runs found in the test scenario.") + return 1 agent = GridSearchAgent(tr) env = CloudAIGymEnv(test_run=tr, system=system, test_scenario=test_scenario) From 96ab05537d65be12b72980399b759d27c129316a Mon Sep 17 00:00:00 2001 From: Srivatsan Krishnan Date: Sat, 11 Jan 2025 12:13:47 -0800 Subject: [PATCH 09/21] helper method for manipulating the TestRun object directly --- src/cloudai/cli/handlers.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/cloudai/cli/handlers.py b/src/cloudai/cli/handlers.py index ebb5fdd5..aedcde0f 100644 --- a/src/cloudai/cli/handlers.py +++ b/src/cloudai/cli/handlers.py @@ -140,7 +140,7 @@ def handle_dry_run_and_run(args: argparse.Namespace) -> int: for action in agent.get_all_combinations(): for key, value in action.items(): - tr.test.test_definition.cmd_args_dict[key] = value + update_nested_attr(tr.test.test_definition.cmd_args, key, value) runner = Runner(args.mode, system, test_scenario) asyncio.run(runner.run()) @@ -158,6 +158,21 @@ def handle_dry_run_and_run(args: argparse.Namespace) -> int: return 0 +def update_nested_attr(obj, attr_path, value): + """Update a nested attribute of an object.""" + attrs = attr_path.split(".") + # hot fix. Will be removed after the issue is fixed in the codebase + prefix = "Grok" + if attrs[0] == prefix: + attrs = attrs[1:] + for attr in attrs[:-1]: + if hasattr(obj, attr): + obj = getattr(obj, attr) + else: + raise AttributeError(f"{type(obj).__name__!r} object has no attribute {attr!r}") + setattr(obj, attrs[-1], value) + + def handle_generate_report(args: argparse.Namespace) -> int: """ Generate a report based on the existing configuration and test results. From 8cab45096c22a1ec6fdecffff98fe897baf069b5 Mon Sep 17 00:00:00 2001 From: Srivatsan Krishnan Date: Sat, 11 Jan 2025 14:56:52 -0800 Subject: [PATCH 10/21] Modifcations for storing dse results --- src/cloudai/_core/base_runner.py | 23 ++++++++++++----------- src/cloudai/cli/handlers.py | 3 ++- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/cloudai/_core/base_runner.py b/src/cloudai/_core/base_runner.py index 6eba2d69..72aeb6d9 100644 --- a/src/cloudai/_core/base_runner.py +++ b/src/cloudai/_core/base_runner.py @@ -74,20 +74,19 @@ def __init__(self, mode: str, system: System, test_scenario: TestScenario): def setup_output_directory(self, base_output_path: Path) -> Path: """ - Set up and return the output directory path for the runner instance. + Set up and return the base output directory path for the runner instance. Args: base_output_path (Path): The base output directory. Returns: - Path: The path to the output directory. + Path: The path to the base output directory. """ - if not base_output_path.exists(): - base_output_path.mkdir() - current_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") - output_subpath = base_output_path / f"{self.test_scenario.name}_{current_time}" - output_subpath.mkdir() - return output_subpath + base_output__path_with_name = base_output_path / self.test_scenario.name + if not base_output__path_with_name.exists(): + base_output__path_with_name = base_output__path_with_name + base_output__path_with_name.mkdir(parents=True, exist_ok=True) + return base_output__path_with_name def register_signal_handlers(self): """Register signal handlers for handling termination-related signals.""" @@ -264,10 +263,12 @@ def get_job_output_path(self, tr: TestRun) -> Path: job_output_path = Path() # avoid reportPossiblyUnboundVariable from pyright try: - test_output_path = self.output_path / tr.name - test_output_path.mkdir() + iteration_path = self.output_path / f"iteration_{tr.dse_iteration}" + iteration_path.mkdir(parents=True, exist_ok=True) + test_output_path = iteration_path / f"{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}" / tr.name + test_output_path.mkdir(parents=True, exist_ok=True) job_output_path = test_output_path / str(tr.current_iteration) - job_output_path.mkdir() + job_output_path.mkdir(parents=True, exist_ok=True) except PermissionError as e: raise PermissionError(f"Cannot create directory {job_output_path}: {e}") from e diff --git a/src/cloudai/cli/handlers.py b/src/cloudai/cli/handlers.py index aedcde0f..e17596dc 100644 --- a/src/cloudai/cli/handlers.py +++ b/src/cloudai/cli/handlers.py @@ -138,7 +138,8 @@ def handle_dry_run_and_run(args: argparse.Namespace) -> int: agent.configure(env.action_space) - for action in agent.get_all_combinations(): + for dse_iteration, action in enumerate(agent.get_all_combinations(), start=1): + tr.dse_iteration = dse_iteration for key, value in action.items(): update_nested_attr(tr.test.test_definition.cmd_args, key, value) runner = Runner(args.mode, system, test_scenario) From 0acf43ec89619f472ff8b768cafdda85981195bc Mon Sep 17 00:00:00 2001 From: Srivatsan Krishnan Date: Sat, 11 Jan 2025 15:07:01 -0800 Subject: [PATCH 11/21] add dse_iteration to TestRun object --- src/cloudai/_core/test_scenario.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cloudai/_core/test_scenario.py b/src/cloudai/_core/test_scenario.py index 39f1bd21..99264ade 100644 --- a/src/cloudai/_core/test_scenario.py +++ b/src/cloudai/_core/test_scenario.py @@ -53,6 +53,7 @@ class TestRun: output_path: Path = Path("") iterations: int = 1 current_iteration: int = 0 + dse_iteration: int = 0 time_limit: Optional[str] = None sol: Optional[float] = None weight: float = 0.0 From 55c203adcb572f0f7b0f20fca22ff8fc6d9943d5 Mon Sep 17 00:00:00 2001 From: Srivatsan Krishnan Date: Sat, 11 Jan 2025 19:03:01 -0800 Subject: [PATCH 12/21] safety valve to seperate the dse execution with benchmarking execution --- src/cloudai/cli/handlers.py | 83 +++++++++++++++++++++++++--------- tests/test_job_type_handler.py | 39 ++++++++++++++++ 2 files changed, 100 insertions(+), 22 deletions(-) create mode 100644 tests/test_job_type_handler.py diff --git a/src/cloudai/cli/handlers.py b/src/cloudai/cli/handlers.py index e17596dc..baef723e 100644 --- a/src/cloudai/cli/handlers.py +++ b/src/cloudai/cli/handlers.py @@ -84,6 +84,63 @@ def handle_install_and_uninstall(args: argparse.Namespace) -> int: return rc +def is_dse_job(cmd_args): + """ + Recursively check if any value in cmd_args is a list. + + Args: + cmd_args (dict): The command arguments to check. + + Returns: + bool: True if any value is a list, False otherwise. + """ + if isinstance(cmd_args, dict): + for _key, value in cmd_args.items(): + if isinstance(value, list) or (isinstance(value, dict) and is_dse_job(value)): + return True + return False + + +def handle_dse_job(tr, system, test_scenario, args): + agent = GridSearchAgent(tr) + env = CloudAIGymEnv(test_run=tr, system=system, test_scenario=test_scenario) + agent.configure(env.action_space) + + for dse_iteration, action in enumerate(agent.get_all_combinations(), start=1): + tr.dse_iteration = dse_iteration + for key, value in action.items(): + update_nested_attr(tr.test.test_definition.cmd_args, key, value) + runner = Runner(args.mode, system, test_scenario) + asyncio.run(runner.run()) + + logging.info(f"All test scenario results stored at: {runner.runner.output_path}") + + if args.mode == "run": + generator = ReportGenerator(runner.runner.output_path) + generator.generate_report(test_scenario) + logging.info( + "All test scenario execution attempts are complete. Please review" + f" the '{args.log_file}' file to confirm successful completion or to" + " identify any issues." + ) + + +def handle_non_dse_job(tr, system, test_scenario, args): + runner = Runner(args.mode, system, test_scenario) + asyncio.run(runner.run()) + + logging.info(f"All test scenario results stored at: {runner.runner.output_path}") + + if args.mode == "run": + generator = ReportGenerator(runner.runner.output_path) + generator.generate_report(test_scenario) + logging.info( + "All test scenario execution attempts are complete. Please review" + f" the '{args.log_file}' file to confirm successful completion or to" + " identify any issues." + ) + + def handle_dry_run_and_run(args: argparse.Namespace) -> int: """ Execute the dry-run or run modes for CloudAI. @@ -133,28 +190,10 @@ def handle_dry_run_and_run(args: argparse.Namespace) -> int: logging.error("No test runs found in the test scenario.") return 1 - agent = GridSearchAgent(tr) - env = CloudAIGymEnv(test_run=tr, system=system, test_scenario=test_scenario) - - agent.configure(env.action_space) - - for dse_iteration, action in enumerate(agent.get_all_combinations(), start=1): - tr.dse_iteration = dse_iteration - for key, value in action.items(): - update_nested_attr(tr.test.test_definition.cmd_args, key, value) - runner = Runner(args.mode, system, test_scenario) - asyncio.run(runner.run()) - - logging.info(f"All test scenario results stored at: {runner.runner.output_path}") - - if args.mode == "run": - generator = ReportGenerator(runner.runner.output_path) - generator.generate_report(test_scenario) - logging.info( - "All test scenario execution attempts are complete. Please review" - f" the '{args.log_file}' file to confirm successful completion or to" - " identify any issues." - ) + if is_dse_job(tr.test.cmd_args): + handle_dse_job(tr, system, test_scenario, args) + else: + handle_non_dse_job(tr, system, test_scenario, args) return 0 diff --git a/tests/test_job_type_handler.py b/tests/test_job_type_handler.py new file mode 100644 index 00000000..1053ed14 --- /dev/null +++ b/tests/test_job_type_handler.py @@ -0,0 +1,39 @@ +from cloudai.cli.handlers import is_dse_job + +# Mock data for testing +mock_toml_dse = { + "test": { + "cmd_args": { + "docker_image_url": "https://docker/fake_url", + "load_container": True, + "FakeConfig": { + "policy": ["option1", "option2"], + "shape": "[1, 2, 3, 4]", + "dtype": "fake_type", + "mesh_shape": "[4, 3, 2, 1]", + }, + } + } +} + +mock_toml_non_dse = { + "test": { + "cmd_args": { + "docker_image_url": "https://docker/fake_url", + "load_container": True, + "FakeConfig": { + "policy": "option1", + "shape": "[1, 2, 3, 4]", + "dtype": "fake_type", + }, + } + } +} + + +def test_is_dse_job_dse(): + assert is_dse_job(mock_toml_dse["test"]["cmd_args"]) + + +def test_is_dse_job_non_dse(): + assert not is_dse_job(mock_toml_non_dse["test"]["cmd_args"]) From be56e31d70d23eafe5a7739bd5b617e0b8891bc7 Mon Sep 17 00:00:00 2001 From: Srivatsan Krishnan Date: Sat, 11 Jan 2025 19:07:06 -0800 Subject: [PATCH 13/21] add copyright headers --- tests/test_job_type_handler.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/test_job_type_handler.py b/tests/test_job_type_handler.py index 1053ed14..11c1fcc4 100644 --- a/tests/test_job_type_handler.py +++ b/tests/test_job_type_handler.py @@ -1,3 +1,19 @@ +# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES +# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from cloudai.cli.handlers import is_dse_job # Mock data for testing From 71a12c48bee25adfaae95829ffbff9da48deb3f3 Mon Sep 17 00:00:00 2001 From: Srivatsan Krishnan Date: Sat, 11 Jan 2025 19:07:27 -0800 Subject: [PATCH 14/21] remove --- tests/test_job_type_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_job_type_handler.py b/tests/test_job_type_handler.py index 11c1fcc4..5e29d30b 100644 --- a/tests/test_job_type_handler.py +++ b/tests/test_job_type_handler.py @@ -16,7 +16,7 @@ from cloudai.cli.handlers import is_dse_job -# Mock data for testing + mock_toml_dse = { "test": { "cmd_args": { From 669bd8df4c5a659b2bacf8f47bb8bf1dfac9a704 Mon Sep 17 00:00:00 2001 From: Srivatsan Krishnan Date: Sat, 11 Jan 2025 19:09:56 -0800 Subject: [PATCH 15/21] remove empty line --- tests/test_job_type_handler.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_job_type_handler.py b/tests/test_job_type_handler.py index 5e29d30b..61eef1c0 100644 --- a/tests/test_job_type_handler.py +++ b/tests/test_job_type_handler.py @@ -16,7 +16,6 @@ from cloudai.cli.handlers import is_dse_job - mock_toml_dse = { "test": { "cmd_args": { From 02bb3e0d5ec57859bbd57cca462a7b4052f96662 Mon Sep 17 00:00:00 2001 From: Srivatsan Krishnan Date: Mon, 13 Jan 2025 23:02:35 -0800 Subject: [PATCH 16/21] Removing the agent's configuration and instead query from the environment. --- .../_core/configurator/agents/base_agent.py | 42 ++----------------- .../_core/configurator/agents/grid_search.py | 10 +++-- 2 files changed, 9 insertions(+), 43 deletions(-) diff --git a/src/cloudai/_core/configurator/agents/base_agent.py b/src/cloudai/_core/configurator/agents/base_agent.py index cdb555ad..a62717c1 100644 --- a/src/cloudai/_core/configurator/agents/base_agent.py +++ b/src/cloudai/_core/configurator/agents/base_agent.py @@ -17,8 +17,6 @@ from abc import ABC, abstractmethod from typing import Any, Dict -from cloudai._core.test_scenario import TestRun - class BaseAgent(ABC): """ @@ -28,48 +26,14 @@ class BaseAgent(ABC): Automatically infers parameter types from TestRun's cmd_args. """ - def __init__(self, test_run: TestRun): + def __init__(self, action_space: Dict[str, Any]): """ Initialize the agent with the TestRun object. Args: - test_run (TestRun): The TestRun object containing cmd_args and test state. - """ - self.test_run = test_run - self.action_space = self.extract_action_space(test_run.test.cmd_args) - - def extract_action_space(self, cmd_args: Dict[str, Any]) -> Dict[str, Any]: - """ - Extract the action space from cmd_args by inferring parameter types. - - Args: - cmd_args (Dict[str, Any]): The command arguments from TestRun. - - Returns: - Dict[str, Any]: Action space defined with inferred parameter types. + action_space (Dict[str, Any]): The action space for the agent. """ - action_space = {} - - for key, value in cmd_args.items(): - self._process_value(action_space, key, value) - - return action_space - - def _process_value(self, action_space: Dict[str, Any], key: str, value: Any) -> None: - if isinstance(value, list): - self._process_list(action_space, key, value) - elif isinstance(value, dict): - for sub_key, sub_value in value.items(): - full_key = f"{key}.{sub_key}" - self._process_value(action_space, full_key, sub_value) - - def _process_list(self, action_space: Dict[str, Any], key: str, value: list) -> None: - if all(isinstance(v, int) for v in value): - action_space[key] = {"type": "int", "values": value} - elif all(isinstance(v, float) for v in value): - action_space[key] = {"type": "float", "values": value} - else: - action_space[key] = {"type": "categorical", "categories": value} + self.action_space = action_space @abstractmethod def configure(self, config: Dict[str, Any]) -> None: diff --git a/src/cloudai/_core/configurator/agents/grid_search.py b/src/cloudai/_core/configurator/agents/grid_search.py index 2ab1b36b..6775948d 100644 --- a/src/cloudai/_core/configurator/agents/grid_search.py +++ b/src/cloudai/_core/configurator/agents/grid_search.py @@ -18,7 +18,7 @@ from typing import Any, Dict, List from cloudai._core.configurator.agents.base_agent import BaseAgent -from cloudai._core.test_scenario import TestRun +from cloudai._core.configurator.cloudai_gym import CloudAIGymEnv class GridSearchAgent(BaseAgent): @@ -28,14 +28,16 @@ class GridSearchAgent(BaseAgent): Iterates through all possible parameter combinations. """ - def __init__(self, test_run: TestRun): + def __init__(self, env: CloudAIGymEnv): """ Initialize the GridSearchAgent with the TestRun object. Args: - test_run (TestRun): The TestRun object containing cmd_args and test state. + env (CloudAIGymEnv): The environment instance to query the action space from. """ - super().__init__(test_run) + self.action_space = env.define_action_space() + super().__init__(self.action_space) + self.env = env self.action_combinations = [] self.index = 0 From 382e424c1aec849b42fa338092544cb2b7a7a09d Mon Sep 17 00:00:00 2001 From: itamar-rauch Date: Mon, 13 Jan 2025 15:43:06 +0200 Subject: [PATCH 17/21] Fixed typo in NeMoRunTestDefinition docstr (#336) --- src/cloudai/test_definitions/nemo_run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cloudai/test_definitions/nemo_run.py b/src/cloudai/test_definitions/nemo_run.py index fa424672..b5db1a03 100644 --- a/src/cloudai/test_definitions/nemo_run.py +++ b/src/cloudai/test_definitions/nemo_run.py @@ -29,7 +29,7 @@ class NeMoRunCmdArgs(CmdArgs): class NeMoRunTestDefinition(TestDefinition): - """Test object for NeMoLauncher.""" + """Test object for NeMoRun.""" cmd_args: NeMoRunCmdArgs _docker_image: Optional[DockerImage] = None From e16873de7be223979f1c6502f4793fea57f63d72 Mon Sep 17 00:00:00 2001 From: Taekyung Heo Date: Mon, 13 Jan 2025 22:07:45 -0500 Subject: [PATCH 18/21] Mount NCCL_TOPO_FILE in NCCL test (#337) --- .../test_template/nccl_test/slurm_command_gen_strategy.py | 7 +++---- .../test_nccl_slurm_command_gen_strategy.py | 6 +++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/cloudai/schema/test_template/nccl_test/slurm_command_gen_strategy.py b/src/cloudai/schema/test_template/nccl_test/slurm_command_gen_strategy.py index 3157c5d8..f26f214c 100644 --- a/src/cloudai/schema/test_template/nccl_test/slurm_command_gen_strategy.py +++ b/src/cloudai/schema/test_template/nccl_test/slurm_command_gen_strategy.py @@ -31,10 +31,9 @@ def _parse_slurm_args( base_args = super()._parse_slurm_args(job_name_prefix, env_vars, cmd_args, tr) container_mounts = "" - if "NCCL_TOPO_FILE" in env_vars and "DOCKER_NCCL_TOPO_FILE" in env_vars: - nccl_graph_path = Path(env_vars["NCCL_TOPO_FILE"]).resolve() - nccl_graph_file = env_vars["DOCKER_NCCL_TOPO_FILE"] - container_mounts = f"{nccl_graph_path}:{nccl_graph_file}" + if "NCCL_TOPO_FILE" in env_vars: + nccl_topo_file = Path(env_vars["NCCL_TOPO_FILE"]).resolve() + container_mounts = f"{nccl_topo_file}:{nccl_topo_file}" elif "NCCL_TOPO_FILE" in env_vars: del env_vars["NCCL_TOPO_FILE"] diff --git a/tests/slurm_command_gen_strategy/test_nccl_slurm_command_gen_strategy.py b/tests/slurm_command_gen_strategy/test_nccl_slurm_command_gen_strategy.py index c32ede71..33dd60c7 100644 --- a/tests/slurm_command_gen_strategy/test_nccl_slurm_command_gen_strategy.py +++ b/tests/slurm_command_gen_strategy/test_nccl_slurm_command_gen_strategy.py @@ -35,12 +35,12 @@ def cmd_gen_strategy(self, slurm_system: SlurmSystem) -> NcclTestSlurmCommandGen [ ( "nccl_test", - {"NCCL_TOPO_FILE": "/path/to/topo", "DOCKER_NCCL_TOPO_FILE": "/docker/topo"}, + {"NCCL_TOPO_FILE": "/path/to/topo"}, {"subtest_name": "all_reduce_perf", "docker_image_url": "fake_image_url"}, 2, ["node1", "node2"], { - "container_mounts": "/path/to/topo:/docker/topo", + "container_mounts": "/path/to/topo:/path/to/topo", }, ), ( @@ -50,7 +50,7 @@ def cmd_gen_strategy(self, slurm_system: SlurmSystem) -> NcclTestSlurmCommandGen 1, ["node1"], { - "container_mounts": "", + "container_mounts": "/path/to/topo:/path/to/topo", }, ), ], From fd3b6c90fcf34491737e5f1da938339be9873421 Mon Sep 17 00:00:00 2001 From: Srivatsan Krishnan Date: Mon, 13 Jan 2025 23:09:23 -0800 Subject: [PATCH 19/21] Fix the testing code --- tests/test_agents.py | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/tests/test_agents.py b/tests/test_agents.py index 075664bd..71963040 100644 --- a/tests/test_agents.py +++ b/tests/test_agents.py @@ -13,39 +13,34 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - from unittest.mock import MagicMock import pytest from cloudai._core.configurator.agents.grid_search import GridSearchAgent -from cloudai._core.test_scenario import TestRun +from cloudai._core.configurator.cloudai_gym import CloudAIGymEnv @pytest.fixture -def mock_test_run(): +def mock_env(): """ - Fixture to provide a mock TestRun object for testing. + Fixture to provide a mock CloudAIGymEnv object for testing. """ - test_run = MagicMock(spec=TestRun) - test_run.test = MagicMock() - test_run.test.cmd_args = { - "docker_image_url": "https://docker/url", - "iters": [10, 100], - "maxbytes": [1024, 2048], - "minbytes": [512, 1024, 2048, 4096], - "ngpus": [4], - "subtest_name": "nccl_test", - "warmup_iters": 5, + env = MagicMock(spec=CloudAIGymEnv) + env.define_action_space.return_value = { + "iters": {"type": "int", "values": [10, 100]}, + "maxbytes": {"type": "int", "values": [1024, 2048]}, + "minbytes": {"type": "int", "values": [512, 1024, 2048, 4096]}, + "ngpus": {"type": "int", "values": [4]}, } - return test_run + return env -def test_grid_search_agent(mock_test_run): +def test_grid_search_agent(mock_env): """ Test the GridSearchAgent's ability to traverse the action space. """ - agent = GridSearchAgent(mock_test_run) + agent = GridSearchAgent(mock_env) agent.configure(config={}) combinations = agent.get_all_combinations() From 9fadfce6b3fe16fd8455d4dd54ff8b8f05eb138c Mon Sep 17 00:00:00 2001 From: Srivatsan Krishnan Date: Tue, 14 Jan 2025 09:55:05 -0800 Subject: [PATCH 20/21] fix the configurator structure --- src/cloudai/_core/configurator/{agents => }/base_agent.py | 0 src/cloudai/_core/configurator/{agents => }/grid_search.py | 2 +- src/cloudai/cli/handlers.py | 2 +- tests/test_agents.py | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) rename src/cloudai/_core/configurator/{agents => }/base_agent.py (100%) rename src/cloudai/_core/configurator/{agents => }/grid_search.py (98%) diff --git a/src/cloudai/_core/configurator/agents/base_agent.py b/src/cloudai/_core/configurator/base_agent.py similarity index 100% rename from src/cloudai/_core/configurator/agents/base_agent.py rename to src/cloudai/_core/configurator/base_agent.py diff --git a/src/cloudai/_core/configurator/agents/grid_search.py b/src/cloudai/_core/configurator/grid_search.py similarity index 98% rename from src/cloudai/_core/configurator/agents/grid_search.py rename to src/cloudai/_core/configurator/grid_search.py index 6775948d..8b47d00c 100644 --- a/src/cloudai/_core/configurator/agents/grid_search.py +++ b/src/cloudai/_core/configurator/grid_search.py @@ -17,7 +17,7 @@ import itertools from typing import Any, Dict, List -from cloudai._core.configurator.agents.base_agent import BaseAgent +from cloudai._core.configurator.base_agent import BaseAgent from cloudai._core.configurator.cloudai_gym import CloudAIGymEnv diff --git a/src/cloudai/cli/handlers.py b/src/cloudai/cli/handlers.py index baef723e..382cc410 100644 --- a/src/cloudai/cli/handlers.py +++ b/src/cloudai/cli/handlers.py @@ -22,8 +22,8 @@ from unittest.mock import Mock from cloudai import Installable, Parser, Registry, ReportGenerator, Runner, System -from cloudai._core.configurator.agents.grid_search import GridSearchAgent from cloudai._core.configurator.cloudai_gym import CloudAIGymEnv +from cloudai._core.configurator.grid_search import GridSearchAgent from ..parser import HOOK_ROOT diff --git a/tests/test_agents.py b/tests/test_agents.py index 71963040..ce920a10 100644 --- a/tests/test_agents.py +++ b/tests/test_agents.py @@ -17,7 +17,7 @@ import pytest -from cloudai._core.configurator.agents.grid_search import GridSearchAgent +from cloudai._core.configurator.grid_search import GridSearchAgent from cloudai._core.configurator.cloudai_gym import CloudAIGymEnv From 4cb3f0248c6142433a64d53d9d34f92208398d76 Mon Sep 17 00:00:00 2001 From: Srivatsan Krishnan Date: Tue, 14 Jan 2025 16:45:39 -0800 Subject: [PATCH 21/21] update the test_agent --- tests/test_agents.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_agents.py b/tests/test_agents.py index ce920a10..2e13bc6e 100644 --- a/tests/test_agents.py +++ b/tests/test_agents.py @@ -17,8 +17,8 @@ import pytest -from cloudai._core.configurator.grid_search import GridSearchAgent from cloudai._core.configurator.cloudai_gym import CloudAIGymEnv +from cloudai._core.configurator.grid_search import GridSearchAgent @pytest.fixture