NVIDIA · srivatsankrishnan · Dec 19, 2024 · Jan 6, 2025 · Jan 8, 2025 · Jan 8, 2025
diff --git a/conf/common/test/dse_jaxtoolbox_grok.toml b/conf/common/test/dse_jaxtoolbox_grok.toml
@@ -0,0 +1,34 @@
+# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name = "dse_jaxtoolbox_grok"
+description = "DSE JaxToolbox Grok"
+test_template_name = "JaxToolboxGrok"
+
+[cmd_args]
+docker_image_url = "https://docker/url"
+  [cmd_args.fdl]
+  num_gpus = [1, 8, 16]
+  checkpoint_policy = ["save_iteration_input", "save_none"]
+  num_groups = "16"
+  use_fp8 = "1"
+  use_repeated_layer = "True"
+
+[extra_env_vars]
+"ENABLE_TE" = "0"
+"NVTE_FUSED_ATTN" = "1"
+"COMBINE_THRESHOLD" = "301989888"
+"XLA_PYTHON_CLIENT_MEM_FRACTION" = "0.9"
diff --git a/conf/common/test_scenario/dse_jaxtoolbox.toml b/conf/common/test_scenario/dse_jaxtoolbox.toml
@@ -0,0 +1,22 @@
+# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name = "dse_jaxtoolbox_grok"
+
+[[Tests]]
+id = "Tests.1"
+test_name = "dse_jaxtoolbox_grok"
+num_nodes = "1"
diff --git a/src/cloudai/_core/configurator/agents/base_agent.py b/src/cloudai/_core/configurator/agents/base_agent.py
@@ -0,0 +1,102 @@
+# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from abc import ABC, abstractmethod
+from typing import Any, Dict
+
+from cloudai._core.test_scenario import TestRun
+
+
+class BaseAgent(ABC):
+    """
+    Base class for all agents in the CloudAI framework.
+
+    Provides a unified interface and parameter management for action spaces.
+    Automatically infers parameter types from TestRun's cmd_args.
+    """
+
+    def __init__(self, test_run: TestRun):
+        """
+        Initialize the agent with the TestRun object.
+
+        Args:
+            test_run (TestRun): The TestRun object containing cmd_args and test state.
+        """
+        self.test_run = test_run
+        self.action_space = self.extract_action_space(test_run.test.cmd_args)
+
+    def extract_action_space(self, cmd_args: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Extract the action space from cmd_args by inferring parameter types.
+
+        Args:
+            cmd_args (Dict[str, Any]): The command arguments from TestRun.
+
+        Returns:
+            Dict[str, Any]: Action space defined with inferred parameter types.
+        """
+        action_space = {}
+
+        for key, value in cmd_args.items():
+            self._process_value(action_space, key, value)
+
+        return action_space
+
+    def _process_value(self, action_space: Dict[str, Any], key: str, value: Any) -> None:
+        if isinstance(value, list):
+            self._process_list(action_space, key, value)
+        elif isinstance(value, dict):
+            for sub_key, sub_value in value.items():
+                full_key = f"{key}.{sub_key}"
+                self._process_value(action_space, full_key, sub_value)
+
+    def _process_list(self, action_space: Dict[str, Any], key: str, value: list) -> None:
+        if all(isinstance(v, int) for v in value):
+            action_space[key] = {"type": "int", "values": value}
+        elif all(isinstance(v, float) for v in value):
+            action_space[key] = {"type": "float", "values": value}
+        else:
+            action_space[key] = {"type": "categorical", "categories": value}
+
+    @abstractmethod
+    def configure(self, config: Dict[str, Any]) -> None:
+        """
+        Configure the agent with additional settings.
+
+        Args:
+            config (Dict[str, Any]): Configuration settings for the agent.
+        """
+        pass
+
+    @abstractmethod
+    def select_action(self) -> Dict[str, Any]:
+        """
+        Select an action from the action space.
+
+        Returns:
+            Dict[str, Any]: A dictionary mapping action keys to selected values.
+        """
+        pass
+
+    @abstractmethod
+    def update_policy(self, _feedback: Dict[str, Any]) -> None:
+        """
+        Update the agent state based on feedback from the environment.
+
+        Args:
+            feedback (Dict[str, Any]): Feedback information from the environment.
+        """
+        pass
diff --git a/src/cloudai/_core/configurator/agents/grid_search.py b/src/cloudai/_core/configurator/agents/grid_search.py
@@ -0,0 +1,91 @@
+# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import itertools
+from typing import Any, Dict, List
+
+from cloudai._core.configurator.agents.base_agent import BaseAgent
+from cloudai._core.test_scenario import TestRun
+
+
+class GridSearchAgent(BaseAgent):
+    """
+    Agent implementing a grid search over the action space.
+
+    Iterates through all possible parameter combinations.
+    """
+
+    def __init__(self, test_run: TestRun):
+        """
+        Initialize the GridSearchAgent with the TestRun object.
+
+        Args:
+            test_run (TestRun): The TestRun object containing cmd_args and test state.
+        """
+        super().__init__(test_run)
+        self.action_combinations = []
+        self.index = 0
+
+    def configure(self, config: Dict[str, Any]) -> None:
+        """
+        Configure the grid search by precomputing all parameter combinations.
+
+        Args:
+            config (Dict[str, Any]): Additional configuration settings (optional).
+        """
+        parameter_values = []
+        for _key, param in self.action_space.items():
+            if param["type"] == "int" or param["type"] == "float":
+                parameter_values.append(param["values"])
+            elif param["type"] == "categorical":
+                parameter_values.append(param["categories"])
+
+        self.action_combinations = list(itertools.product(*parameter_values))
+        self.index = 0
+
+    def get_all_combinations(self) -> List[Dict[str, Any]]:
+        """
+        Get all possible combinations of the action space parameters.
+
+        Returns:
+            List[Dict[str, Any]]: A list of dictionaries, each representing a unique combination of parameters.
+        """
+        keys = list(self.action_space.keys())
+        return [dict(zip(keys, combination)) for combination in self.action_combinations]
+
+    def select_action(self) -> Dict[str, Any]:
+        """
+        Select the next action from the grid.
+
+        Returns:
+            Dict[str, Any]: A dictionary mapping action keys to selected values.
+        """
+        if self.index >= len(self.action_combinations):
+            raise StopIteration("Grid search completed.")
+
+        action = dict(zip(self.action_space.keys(), self.action_combinations[self.index]))
+        self.index += 1
+        return action
+
+    def update_policy(self, _feedback: Dict[str, Any]) -> None:
+        """
+        Update the agent based on feedback (not used in grid search).
+
+        Args:
+            feedback (Dict[str, Any]): Feedback information from the environment.
+        """
+        # Grid search is stateless and does not rely on feedback.
+        pass
diff --git a/src/cloudai/_core/configurator/base_gym.py b/src/cloudai/_core/configurator/base_gym.py
@@ -0,0 +1,102 @@
+# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from abc import ABC, abstractmethod
+from typing import Any, Dict, Optional, Tuple
+
+
+class BaseGym(ABC):
+    """Base class for CloudAI Gym environments."""
+
+    def __init__(self):
+        """Initialize the CloudAIGym environment."""
+        self.action_space = self.define_action_space()
+        self.observation_space = self.define_observation_space()
+
+    @abstractmethod
+    def define_action_space(self) -> Dict[str, Any]:
+        """
+        Define the action space for the environment.
+
+        Returns:
+            Dict[str, Any]: The action space.
+        """
+        pass
+
+    @abstractmethod
+    def define_observation_space(self) -> list:
+        """
+        Define the observation space for the environment.
+
+        Returns:
+            list: The observation space.
+        """
+        pass
+
+    @abstractmethod
+    def reset(
+        self, seed: Optional[int] = None, _options: Optional[dict[str, Any]] = None
+    ) -> Tuple[list, dict[str, Any]]:
+        """
+        Reset the environment.
+
+        Args:
+            seed (Optional[int]): Seed for the environment's random number generator.
+            options (Optional[dict]): Additional options for reset.
+
+        Returns:
+            Tuple: A tuple containing:
+                - observation (list): Initial observation.
+                - info (dict): Additional info for debugging.
+        """
+        pass
+
+    @abstractmethod
+    def step(self, action: Any) -> Tuple[list, float, bool, dict]:
+        """
+        Execute one step in the environment.
+
+        Args:
+            action (Any): Action chosen by the agent.
+
+        Returns:
+            Tuple: A tuple containing:
+                - observation (list): Updated system state.
+                - reward (float): Reward for the action taken.
+                - done (bool): Whether the episode is done.
+                - info (dict): Additional info for debugging.
+        """
+        pass
+
+    @abstractmethod
+    def render(self, mode: str = "human"):
+        """
+        Render the current state of the environment.
+
+        Args:
+            mode (str): The mode to render with. Default is "human".
+        """
+        pass
+
+    @abstractmethod
+    def seed(self, seed: Optional[int] = None):
+        """
+        Set the seed for the environment's random number generator.
+
+        Args:
+            seed (Optional[int]): Seed for the environment's random number generator.
+        """
+        pass