diff --git a/src/cloudai/_core/base_installer.py b/src/cloudai/_core/base_installer.py index c00906a59..e2d88877b 100644 --- a/src/cloudai/_core/base_installer.py +++ b/src/cloudai/_core/base_installer.py @@ -122,10 +122,16 @@ def install(self, test_templates: Iterable[TestTemplate]) -> InstallStatusResult else: install_results[test_template.name] = result.message done += 1 - logging.info( - f"{done}/{total} Installation for {test_template.name} finished with status: " + msg = ( + f"{done}/{total} Installation of {test_template.name}: " f"{result.message if result.message else 'OK'}" ) + if result.success: + install_results[test_template.name] = "Success" + logging.info(msg) + else: + install_results[test_template.name] = result.message + logging.error(msg) except Exception as e: done += 1 logging.error(f"{done}/{total} Installation failed for {test_template.name}: {e}") diff --git a/src/cloudai/schema/test_template/jax_toolbox/slurm_install_strategy.py b/src/cloudai/schema/test_template/jax_toolbox/slurm_install_strategy.py index 29fd81c1d..257e8c2c6 100644 --- a/src/cloudai/schema/test_template/jax_toolbox/slurm_install_strategy.py +++ b/src/cloudai/schema/test_template/jax_toolbox/slurm_install_strategy.py @@ -33,9 +33,9 @@ def is_installed(self) -> InstallStatusResult: if docker_image_result.success: return InstallStatusResult(success=True) else: - if self.docker_image_cache_manager.cache_docker_images_locally: + if self.docker_image_cache_manager.system.cache_docker_images_locally: expected_docker_image_path = os.path.join( - self.docker_image_cache_manager.install_path, self.SUBDIR_PATH, self.DOCKER_IMAGE_FILENAME + self.docker_image_cache_manager.system.install_path, self.SUBDIR_PATH, self.DOCKER_IMAGE_FILENAME ) return InstallStatusResult( success=False, diff --git a/src/cloudai/schema/test_template/nccl_test/slurm_install_strategy.py b/src/cloudai/schema/test_template/nccl_test/slurm_install_strategy.py index e4706b488..f359048b6 100644 --- a/src/cloudai/schema/test_template/nccl_test/slurm_install_strategy.py +++ b/src/cloudai/schema/test_template/nccl_test/slurm_install_strategy.py @@ -39,9 +39,9 @@ def is_installed(self) -> InstallStatusResult: if docker_image_result.success: return InstallStatusResult(success=True) else: - if self.docker_image_cache_manager.cache_docker_images_locally: + if self.docker_image_cache_manager.system.cache_docker_images_locally: expected_docker_image_path = os.path.join( - self.docker_image_cache_manager.install_path, self.SUBDIR_PATH, self.DOCKER_IMAGE_FILENAME + self.docker_image_cache_manager.system.install_path, self.SUBDIR_PATH, self.DOCKER_IMAGE_FILENAME ) return InstallStatusResult( success=False, diff --git a/src/cloudai/schema/test_template/ucc_test/slurm_install_strategy.py b/src/cloudai/schema/test_template/ucc_test/slurm_install_strategy.py index a633848ab..81209b2db 100644 --- a/src/cloudai/schema/test_template/ucc_test/slurm_install_strategy.py +++ b/src/cloudai/schema/test_template/ucc_test/slurm_install_strategy.py @@ -39,9 +39,9 @@ def is_installed(self) -> InstallStatusResult: if docker_image_result.success: return InstallStatusResult(success=True) else: - if self.docker_image_cache_manager.cache_docker_images_locally: + if self.docker_image_cache_manager.system.cache_docker_images_locally: expected_docker_image_path = os.path.join( - self.docker_image_cache_manager.install_path, self.SUBDIR_PATH, self.DOCKER_IMAGE_FILENAME + self.docker_image_cache_manager.system.install_path, self.SUBDIR_PATH, self.DOCKER_IMAGE_FILENAME ) return InstallStatusResult( success=False, diff --git a/src/cloudai/systems/slurm/strategy/slurm_command_gen_strategy.py b/src/cloudai/systems/slurm/strategy/slurm_command_gen_strategy.py index 71b5854fd..f9b11db4c 100644 --- a/src/cloudai/systems/slurm/strategy/slurm_command_gen_strategy.py +++ b/src/cloudai/systems/slurm/strategy/slurm_command_gen_strategy.py @@ -46,11 +46,7 @@ def __init__(self, system: SlurmSystem, env_vars: Dict[str, Any], cmd_args: Dict self.install_path = self.slurm_system.install_path self.default_env_vars.update(self.slurm_system.global_env_vars) - self.docker_image_cache_manager = DockerImageCacheManager( - self.slurm_system.install_path, - self.slurm_system.cache_docker_images_locally, - self.slurm_system.default_partition, - ) + self.docker_image_cache_manager = DockerImageCacheManager(self.slurm_system) docker_image_url_info = self.cmd_args.get("docker_image_url") if docker_image_url_info is not None: self.docker_image_url = docker_image_url_info.get("default") diff --git a/src/cloudai/systems/slurm/strategy/slurm_install_strategy.py b/src/cloudai/systems/slurm/strategy/slurm_install_strategy.py index 6507ef0fb..04a50ec0f 100644 --- a/src/cloudai/systems/slurm/strategy/slurm_install_strategy.py +++ b/src/cloudai/systems/slurm/strategy/slurm_install_strategy.py @@ -41,11 +41,7 @@ def __init__( super().__init__(system, env_vars, cmd_args) self.slurm_system = cast(SlurmSystem, self.system) self.install_path = self.slurm_system.install_path - self.docker_image_cache_manager = DockerImageCacheManager( - self.slurm_system.install_path, - self.slurm_system.cache_docker_images_locally, - self.slurm_system.default_partition, - ) + self.docker_image_cache_manager = DockerImageCacheManager(self.slurm_system) docker_image_url_info = self.cmd_args.get("docker_image_url") if docker_image_url_info is not None: self.docker_image_url = docker_image_url_info.get("default") diff --git a/src/cloudai/util/docker_image_cache_manager.py b/src/cloudai/util/docker_image_cache_manager.py index 6a302b652..2ec50a140 100644 --- a/src/cloudai/util/docker_image_cache_manager.py +++ b/src/cloudai/util/docker_image_cache_manager.py @@ -18,7 +18,8 @@ import os import shutil import subprocess -import tempfile + +from cloudai.systems import SlurmSystem class PrerequisiteCheckResult: @@ -107,15 +108,11 @@ class DockerImageCacheManager: Manages the caching of Docker images for installation strategies. Attributes - install_path (str): The base installation path. - cache_docker_images_locally (bool): Whether to cache Docker image files locally. - partition_name (str): The partition name to use in the srun command. + system (SlurmSystem): The Slurm system configuration. """ - def __init__(self, install_path: str, cache_docker_images_locally: bool, partition_name: str) -> None: - self.install_path = install_path - self.cache_docker_images_locally = cache_docker_images_locally - self.partition_name = partition_name + def __init__(self, system: SlurmSystem) -> None: + self.system = system def ensure_docker_image( self, docker_image_url: str, subdir_name: str, docker_image_filename: str @@ -135,7 +132,7 @@ def ensure_docker_image( if image_check_result.success: return image_check_result - if self.cache_docker_images_locally: + if self.system.cache_docker_images_locally: return self.cache_docker_image(docker_image_url, subdir_name, docker_image_filename) return image_check_result @@ -155,13 +152,14 @@ def check_docker_image_exists( DockerImageCacheResult: Result of the Docker image existence check. """ logging.debug( - f"Checking if Docker image exists: docker_image_url={docker_image_url}, subdir_name={subdir_name}, " + f"Checking if Docker image exists: docker_image_url={docker_image_url}, " + f"subdir_name={subdir_name}, " f"docker_image_filename={docker_image_filename}, " - f"cache_docker_images_locally={self.cache_docker_images_locally}" + f"cache_docker_images_locally={self.system.cache_docker_images_locally}" ) # If not caching locally, return True. Defer checking URL accessibility to srun. - if not self.cache_docker_images_locally: + if not self.system.cache_docker_images_locally: return DockerImageCacheResult(True, docker_image_url, "") # Check if docker_image_url is a file path and exists @@ -171,12 +169,12 @@ def check_docker_image_exists( ) # Check if the cache file exists - if not os.path.exists(self.install_path): - message = f"Install path {self.install_path} does not exist." + if not os.path.exists(self.system.install_path): + message = f"Install path {self.system.install_path} does not exist." logging.debug(message) return DockerImageCacheResult(False, "", message) - subdir_path = os.path.join(self.install_path, subdir_name) + subdir_path = os.path.join(self.system.install_path, subdir_name) if not os.path.exists(subdir_path): message = f"Subdirectory path {subdir_path} does not exist." logging.debug(message) @@ -206,7 +204,7 @@ def cache_docker_image( Returns: DockerImageCacheResult: Result of the Docker image caching operation. """ - subdir_path = os.path.join(self.install_path, subdir_name) + subdir_path = os.path.join(self.system.install_path, subdir_name) docker_image_path = os.path.join(subdir_path, docker_image_filename) if os.path.isfile(docker_image_path): @@ -214,18 +212,18 @@ def cache_docker_image( logging.info(success_message) return DockerImageCacheResult(True, docker_image_path, success_message) - prerequisite_check = self._check_prerequisites(docker_image_url) + prerequisite_check = self._check_prerequisites() if not prerequisite_check: logging.error(f"Prerequisite check failed: {prerequisite_check.message}") return DockerImageCacheResult(False, "", prerequisite_check.message) - if not os.path.exists(self.install_path): - error_message = f"Install path {self.install_path} does not exist." + if not os.path.exists(self.system.install_path): + error_message = f"Install path {self.system.install_path} does not exist." logging.error(error_message) return DockerImageCacheResult(False, "", error_message) - if not os.access(self.install_path, os.W_OK): - error_message = f"No permission to write in install path {self.install_path}." + if not os.access(self.system.install_path, os.W_OK): + error_message = f"No permission to write in install path {self.system.install_path}." logging.error(error_message) return DockerImageCacheResult(False, "", error_message) @@ -237,10 +235,10 @@ def cache_docker_image( logging.error(error_message) return DockerImageCacheResult(False, "", error_message) - enroot_import_cmd = ( - f"srun --export=ALL --partition={self.partition_name} " - f"enroot import -o {docker_image_path} docker://{docker_image_url}" - ) + srun_prefix = f"srun --export=ALL --partition={self.system.default_partition}" + if self.system.account: + srun_prefix += f" --account={self.system.account}" + enroot_import_cmd = f"{srun_prefix} enroot import -o {docker_image_path} docker://{docker_image_url}" logging.debug(f"Importing Docker image: {enroot_import_cmd}") try: @@ -264,29 +262,17 @@ def cache_docker_image( error_message = ( f"Failed to import Docker image from {docker_image_url}. Command: {enroot_import_cmd}. Error: {e}" ) - logging.error(error_message) - return DockerImageCacheResult( - False, - "", - ( - f"Failed to import Docker image from {docker_image_url}. " - f"Command: {enroot_import_cmd}. " - f"Error: {e}. Please check the Docker image URL and ensure that it is accessible and set up with " - f"valid credentials." - ), - ) + logging.debug(error_message) + return DockerImageCacheResult(False, message=error_message) - def _check_prerequisites(self, docker_image_url: str) -> PrerequisiteCheckResult: + def _check_prerequisites(self) -> PrerequisiteCheckResult: """ Check prerequisites for caching Docker image. - Args: - docker_image_url (str): URL of the Docker image. - - Returns: + Returns PrerequisiteCheckResult: Result of the prerequisite check. """ - required_binaries = ["enroot", "srun"] + required_binaries = ["srun"] missing_binaries = [binary for binary in required_binaries if not shutil.which(binary)] if missing_binaries: @@ -296,82 +282,8 @@ def _check_prerequisites(self, docker_image_url: str) -> PrerequisiteCheckResult False, f"{missing_binaries_str} are required for caching Docker images but are not installed." ) - docker_accessible = self._check_docker_image_accessibility(docker_image_url) - if not docker_accessible.success: - logging.error(f"Docker image URL {docker_image_url} is not accessible. Error: {docker_accessible.message}") - return docker_accessible - return PrerequisiteCheckResult(True, "All prerequisites are met.") - def _check_docker_image_accessibility(self, docker_image_url: str) -> PrerequisiteCheckResult: - """ - Check if the Docker image URL is accessible. - - Args: - docker_image_url (str): URL of the Docker image. - - Returns: - PrerequisiteCheckResult: Result of the Docker image accessibility check. - """ - with tempfile.TemporaryDirectory() as temp_dir: - docker_image_path = os.path.join(temp_dir, "docker_image.sqsh") - enroot_import_cmd = f"enroot import -o {docker_image_path} docker://{docker_image_url}" - - logging.debug(f"Checking Docker image accessibility: {enroot_import_cmd}") - - process = subprocess.Popen(enroot_import_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - try: - while True: - error_output = process.stderr.readline() if process.stderr else None - error_output = error_output.decode() if error_output else "" - - if error_output: - if ( - "Downloading" in error_output - or "Found all layers in cache" in error_output - or "Fetching image manifest list" in error_output - ): - logging.debug( - f"Docker image URL, {docker_image_url}, is accessible. " - f"Command used: {enroot_import_cmd}. Found keyword: {error_output.strip()}" - ) - process.terminate() - return PrerequisiteCheckResult( - True, f"Docker image URL, {docker_image_url}, is accessible." - ) - if "[ERROR]" in error_output: - logging.error( - f"Failed to access Docker image URL, {docker_image_url}. " - f"Command used: {enroot_import_cmd}. Error: {error_output}" - ) - process.terminate() - if "401 Unauthorized" in error_output: - detailed_message = ( - f"Failed to access Docker image URL: {docker_image_url}. Error: {error_output}\n" - "This error indicates that access to the Docker image URL is unauthorized. " - "Please ensure you have the necessary permissions and have followed the " - "instructions in the README for setting up your credentials correctly." - ) - return PrerequisiteCheckResult(False, detailed_message) - return PrerequisiteCheckResult( - False, f"Failed to access Docker image URL: {docker_image_url}. Error: {error_output}" - ) - if process.poll() is not None: - break - - logging.debug(f"Failed to access Docker image URL: {docker_image_url}. Unknown error.") - return PrerequisiteCheckResult( - False, f"Failed to access Docker image URL: {docker_image_url}. Unknown error." - ) - finally: - # Ensure the temporary docker image file is removed - if os.path.exists(docker_image_path): - try: - os.remove(docker_image_path) - logging.debug(f"Temporary Docker image file removed: {docker_image_path}") - except OSError as e: - logging.error(f"Failed to remove temporary Docker image file {docker_image_path}. Error: {e}") - def uninstall_cached_image(self, subdir_name: str, docker_image_filename: str) -> DockerImageCacheResult: """ Uninstall the cached Docker image and remove the subdirectory if empty. @@ -387,7 +299,7 @@ def uninstall_cached_image(self, subdir_name: str, docker_image_filename: str) - if not result.success: return result - subdir_path = os.path.join(self.install_path, subdir_name) + subdir_path = os.path.join(self.system.install_path, subdir_name) if os.path.isdir(subdir_path): try: if not os.listdir(subdir_path): @@ -414,7 +326,7 @@ def remove_cached_image(self, subdir_name: str, docker_image_filename: str) -> D Returns: DockerImageCacheResult: Result of the removal operation. """ - docker_image_path = os.path.join(self.install_path, subdir_name, docker_image_filename) + docker_image_path = os.path.join(self.system.install_path, subdir_name, docker_image_filename) if os.path.isfile(docker_image_path): try: os.remove(docker_image_path) diff --git a/tests/test_docker_image_cache_manager.py b/tests/test_docker_image_cache_manager.py index 1e9a29f5c..377872983 100644 --- a/tests/test_docker_image_cache_manager.py +++ b/tests/test_docker_image_cache_manager.py @@ -15,8 +15,11 @@ # limitations under the License. import subprocess -from unittest.mock import MagicMock, patch +from pathlib import Path +from unittest.mock import patch +import pytest +from cloudai.systems.slurm.slurm_system import SlurmSystem from cloudai.util.docker_image_cache_manager import ( DockerImageCacheManager, DockerImageCacheResult, @@ -24,12 +27,26 @@ ) +@pytest.fixture +def slurm_system(tmp_path: Path) -> SlurmSystem: + system = SlurmSystem( + name="test_system", + install_path=str(tmp_path / "install"), + output_path=str(tmp_path / "output"), + cache_docker_images_locally=True, + default_partition="main", + partitions={}, + ) + (tmp_path / "install").mkdir() + return system + + @patch("os.path.isfile") @patch("os.path.exists") @patch("os.access") -def test_ensure_docker_image_file_exists(mock_access, mock_exists, mock_isfile): - manager = DockerImageCacheManager("/fake/install/path", True, "default") - mock_isfile.return_value = True +def test_ensure_docker_image_file_exists(mock_access, mock_exists, mock_is_file, slurm_system: SlurmSystem): + manager = DockerImageCacheManager(slurm_system) + mock_is_file.return_value = True mock_exists.return_value = True result = manager.ensure_docker_image("/tmp/existing_file.sqsh", "subdir", "docker_image.sqsh") assert result.success @@ -40,9 +57,9 @@ def test_ensure_docker_image_file_exists(mock_access, mock_exists, mock_isfile): @patch("os.path.isfile") @patch("os.path.exists") @patch("os.access") -def test_ensure_docker_image_url_cache_enabled(mock_access, mock_exists, mock_isfile): - manager = DockerImageCacheManager("/fake/install/path", True, "default") - mock_isfile.return_value = False +def test_ensure_docker_image_url_cache_enabled(mock_access, mock_exists, mock_is_file, slurm_system: SlurmSystem): + manager = DockerImageCacheManager(slurm_system) + mock_is_file.return_value = False mock_exists.return_value = True mock_access.return_value = True with patch.object( @@ -61,24 +78,26 @@ def test_ensure_docker_image_url_cache_enabled(mock_access, mock_exists, mock_is @patch("os.path.isfile") @patch("os.path.exists") @patch("os.access") -@patch("os.makedirs") @patch("subprocess.run") @patch("cloudai.util.docker_image_cache_manager.DockerImageCacheManager._check_prerequisites") -def test_cache_docker_image(mock_check_prerequisites, mock_run, mock_makedirs, mock_access, mock_exists, mock_isfile): - manager = DockerImageCacheManager("/fake/install/path", True, "default") +def test_cache_docker_image( + mock_check_prerequisites, mock_run, mock_access, mock_exists, mock_is_file, slurm_system: SlurmSystem +): + manager = DockerImageCacheManager(slurm_system) # Test when cached file already exists - mock_isfile.return_value = True + mock_is_file.return_value = True result = manager.cache_docker_image("docker.io/hello-world", "subdir", "image.tar.gz") assert result.success - assert result.message == "Cached Docker image already exists at /fake/install/path/subdir/image.tar.gz." + assert result.docker_image_path == f"{slurm_system.install_path}/subdir/image.tar.gz" + assert result.message == f"Cached Docker image already exists at {slurm_system.install_path}/subdir/image.tar.gz." # Test creating subdirectory when it doesn't exist - mock_isfile.return_value = False - mock_exists.side_effect = [True, False, False] # install_path exists, subdir_path does not + mock_is_file.return_value = False + mock_exists.side_effect = [True, False] # install_path exists, subdir_path does not with patch("os.makedirs") as mock_makedirs: result = manager.cache_docker_image("docker.io/hello-world", "subdir", "image.tar.gz") - mock_makedirs.assert_called_once_with("/fake/install/path/subdir") + mock_makedirs.assert_called_once_with(f"{manager.system.install_path}/subdir") # Ensure prerequisites are always met for the following tests mock_check_prerequisites.return_value = PrerequisiteCheckResult(True, "All prerequisites are met.") @@ -88,28 +107,31 @@ def test_cache_docker_image(mock_check_prerequisites, mock_run, mock_makedirs, m mock_exists.side_effect = None # Test caching success with subprocess command (removal of default partition keyword) - mock_isfile.return_value = False + mock_is_file.return_value = False mock_exists.side_effect = [True, True, True, True, True] # Ensure all path checks return True mock_run.return_value = subprocess.CompletedProcess(args=["cmd"], returncode=0, stderr="") result = manager.cache_docker_image("docker.io/hello-world", "subdir", "image.tar.gz") mock_run.assert_called_once_with( - "srun --export=ALL --partition=default enroot import -o /fake/install/path/subdir/image.tar.gz docker://docker.io/hello-world", + ( + f"srun --export=ALL --partition={manager.system.default_partition} enroot " + f"import -o {manager.system.install_path}/subdir/image.tar.gz docker://docker.io/hello-world" + ), shell=True, check=True, capture_output=True, text=True, ) assert result.success - assert result.message == "Docker image cached successfully at /fake/install/path/subdir/image.tar.gz." + assert result.message == f"Docker image cached successfully at {slurm_system.install_path}/subdir/image.tar.gz." # Test caching failure due to subprocess error - mock_isfile.return_value = False + mock_is_file.return_value = False mock_run.side_effect = subprocess.CalledProcessError(1, "cmd") result = manager.cache_docker_image("docker.io/hello-world", "subdir", "image.tar.gz") assert not result.success # Test caching failure due to disk-related errors - mock_isfile.return_value = False + mock_is_file.return_value = False mock_run.side_effect = None mock_run.return_value = subprocess.CompletedProcess(args=["cmd"], returncode=1, stderr="Disk quota exceeded\n") mock_exists.side_effect = [True, True, True, True, True] @@ -127,15 +149,18 @@ def test_cache_docker_image(mock_check_prerequisites, mock_run, mock_makedirs, m @patch("os.path.exists") @patch("os.access") @patch("os.remove") -def test_remove_cached_image(mock_remove, mock_access, mock_exists, mock_isfile): - manager = DockerImageCacheManager("/fake/install/path", True, "default") +def test_remove_cached_image(mock_remove, mock_access, mock_exists, mock_isfile, slurm_system: SlurmSystem): + manager = DockerImageCacheManager(slurm_system) # Test successful removal mock_isfile.return_value = True result = manager.remove_cached_image("subdir", "image.tar.gz") assert result.success - assert result.message == "Cached Docker image removed successfully from /fake/install/path/subdir/image.tar.gz." - mock_remove.assert_called_once_with("/fake/install/path/subdir/image.tar.gz") + assert ( + result.message + == f"Cached Docker image removed successfully from {manager.system.install_path}/subdir/image.tar.gz." + ) + mock_remove.assert_called_once_with(f"{manager.system.install_path}/subdir/image.tar.gz") # Test failed removal due to OSError mock_remove.side_effect = OSError("Mocked OSError") @@ -147,15 +172,18 @@ def test_remove_cached_image(mock_remove, mock_access, mock_exists, mock_isfile) mock_isfile.return_value = False result = manager.remove_cached_image("subdir", "image.tar.gz") assert result.success - assert result.message == "No cached Docker image found to remove at /fake/install/path/subdir/image.tar.gz." + assert ( + result.message + == f"No cached Docker image found to remove at {manager.system.install_path}/subdir/image.tar.gz." + ) @patch("os.path.isfile") @patch("os.path.exists") @patch("os.access") @patch("os.remove") -def test_uninstall_cached_image(mock_remove, mock_access, mock_exists, mock_isfile): - manager = DockerImageCacheManager("/fake/install/path", True, "default") +def test_uninstall_cached_image(mock_remove, mock_access, mock_exists, mock_isfile, slurm_system: SlurmSystem): + manager = DockerImageCacheManager(slurm_system) # Test successful uninstallation and subdirectory removal mock_isfile.return_value = True @@ -165,17 +193,17 @@ def test_uninstall_cached_image(mock_remove, mock_access, mock_exists, mock_isfi result = manager.uninstall_cached_image("subdir", "image.tar.gz") assert result.success assert result.message in [ - "Cached Docker image uninstalled successfully from /fake/install/path/subdir.", - "Subdirectory removed successfully: /fake/install/path/subdir.", + f"Cached Docker image uninstalled successfully from {manager.system.install_path}/subdir.", + f"Subdirectory removed successfully: {manager.system.install_path}/subdir.", ] - mock_remove.assert_called_once_with("/fake/install/path/subdir/image.tar.gz") + mock_remove.assert_called_once_with(f"{manager.system.install_path}/subdir/image.tar.gz") # Test successful uninstallation but subdirectory not empty mock_listdir.return_value = ["otherfile"] result = manager.uninstall_cached_image("subdir", "image.tar.gz") assert result.success - assert result.message == "Cached Docker image uninstalled successfully from /fake/install/path/subdir." - mock_remove.assert_called_with("/fake/install/path/subdir/image.tar.gz") + assert result.message == f"Cached Docker image uninstalled successfully from {manager.system.install_path}/subdir." + mock_remove.assert_called_with(f"{manager.system.install_path}/subdir/image.tar.gz") # Test failed removal due to OSError mock_remove.side_effect = OSError("Mocked OSError") @@ -188,149 +216,93 @@ def test_uninstall_cached_image(mock_remove, mock_access, mock_exists, mock_isfi mock_exists.return_value = False result = manager.uninstall_cached_image("subdir", "image.tar.gz") assert result.success - assert result.message == "Cached Docker image uninstalled successfully from /fake/install/path/subdir." + assert result.message == f"Cached Docker image uninstalled successfully from {manager.system.install_path}/subdir." # Cleanup patch.stopall() @patch("shutil.which") -@patch("cloudai.util.docker_image_cache_manager.DockerImageCacheManager._check_docker_image_accessibility") -def test_check_prerequisites(mock_check_docker_image_accessibility, mock_which): - manager = DockerImageCacheManager("/fake/install/path", True, "default") +def test_check_prerequisites(mock_which, slurm_system: SlurmSystem): + manager = DockerImageCacheManager(slurm_system) - # Ensure enroot and srun are installed - mock_which.side_effect = lambda x: x in ["enroot", "srun"] + # Ensure srun is installed + mock_which.side_effect = lambda x: x in ["srun"] # Test all prerequisites met - mock_check_docker_image_accessibility.return_value = PrerequisiteCheckResult( - True, "Docker image URL is accessible." - ) - result = manager._check_prerequisites("docker.io/hello-world") + result = manager._check_prerequisites() assert result.success assert result.message == "All prerequisites are met." - # Test enroot not installed - mock_which.side_effect = lambda x: x != "enroot" - result = manager._check_prerequisites("docker.io/hello-world") - assert not result.success - assert result.message == "enroot are required for caching Docker images but are not installed." - # Test srun not installed mock_which.side_effect = lambda x: x != "srun" - result = manager._check_prerequisites("docker.io/hello-world") + result = manager._check_prerequisites() assert not result.success assert result.message == "srun are required for caching Docker images but are not installed." - # Ensure enroot and srun are installed again - mock_which.side_effect = lambda x: x in ["enroot", "srun"] - - # Test Docker image URL not accessible - mock_check_docker_image_accessibility.return_value = PrerequisiteCheckResult( - False, "Docker image URL not accessible." - ) - result = manager._check_prerequisites("docker.io/hello-world") - assert not result.success - assert "Docker image URL not accessible." in result.message - - -@patch("subprocess.Popen") -@patch("shutil.which") -@patch("tempfile.TemporaryDirectory") -def test_check_docker_image_accessibility_with_enroot(mock_tempdir, mock_which, mock_popen): - manager = DockerImageCacheManager("/fake/install/path", True, "default") - - # Ensure docker binary is not available - mock_which.return_value = None - - # Mocking the TemporaryDirectory context manager - mock_temp_dir = MagicMock() - mock_tempdir.return_value.__enter__.return_value = mock_temp_dir - temp_dir_path = "/fake/temp/dir" - mock_temp_dir.__enter__.return_value = temp_dir_path - - # Mock Popen for enroot command with success scenario - process_mock = MagicMock() - process_mock.stderr.readline.side_effect = [ - b"Found all layers in cache\n", # Simulate successful output - b"", - b"", - ] - process_mock.poll.side_effect = [None, None, 0] # Simulate process running then finishing - - mock_popen.return_value = process_mock - - result = manager._check_docker_image_accessibility("docker.io/hello-world") - assert result.success - assert result.message == "Docker image URL, docker.io/hello-world, is accessible." - - # Verify the command contains the required keywords - command = mock_popen.call_args[0][0] - assert "enroot import -o" in command - assert "docker://docker.io/hello-world" in command - - # Mock Popen for enroot command with failure scenario - process_mock.reset_mock() - process_mock.stderr.readline.side_effect = [ - b"[ERROR] URL https://nvcr.io/proxy_auth returned error code: 401 Unauthorized\n", # Simulate 401 error output - b"", - b"", - ] - process_mock.poll.side_effect = [None, None, 0] # Simulate process running then finishing - - mock_popen.return_value = process_mock - - result = manager._check_docker_image_accessibility("docker.io/hello-world") - assert not result.success - assert ( - "Failed to access Docker image URL: docker.io/hello-world. " - "Error: [ERROR] URL https://nvcr.io/proxy_auth returned error code: 401 Unauthorized\n" - "This error indicates that access to the Docker image URL is unauthorized. " - "Please ensure you have the necessary permissions and have followed the instructions in the README " - "for setting up your credentials correctly." in result.message.replace("\n\n", "\n") - ) - - # Verify the command contains the required keywords - command = mock_popen.call_args[0][0] - assert "enroot import -o" in command - assert "docker://docker.io/hello-world" in command - @patch("os.path.isfile") @patch("os.path.exists") -def test_check_docker_image_exists_with_only_cached_file(mock_exists, mock_isfile): - manager = DockerImageCacheManager("/fake/install/path", True, "default") +def test_check_docker_image_exists_with_only_cached_file(mock_exists, mock_isfile, slurm_system: SlurmSystem): + manager = DockerImageCacheManager(slurm_system) # Simulate only the cached file being a valid file path - mock_isfile.side_effect = lambda path: path == "/fake/install/path/subdir/docker_image.sqsh" + mock_isfile.side_effect = lambda path: path == f"{manager.system.install_path}/subdir/docker_image.sqsh" mock_exists.side_effect = lambda path: path in [ - "/fake/install/path", - "/fake/install/path/subdir", - "/fake/install/path/subdir/docker_image.sqsh", + f"{manager.system.install_path}", + f"{manager.system.install_path}/subdir", + f"{manager.system.install_path}/subdir/docker_image.sqsh", ] result = manager.check_docker_image_exists("/tmp/non_existing_file.sqsh", "subdir", "docker_image.sqsh") assert result.success - assert result.docker_image_path == "/fake/install/path/subdir/docker_image.sqsh" - assert result.message == "Cached Docker image already exists at /fake/install/path/subdir/docker_image.sqsh." + assert result.docker_image_path == f"{manager.system.install_path}/subdir/docker_image.sqsh" + assert ( + result.message + == f"Cached Docker image already exists at {manager.system.install_path}/subdir/docker_image.sqsh." + ) @patch("os.path.isfile") @patch("os.path.exists") -def test_check_docker_image_exists_with_both_valid_files(mock_exists, mock_isfile): - manager = DockerImageCacheManager("/fake/install/path", True, "default") +def test_check_docker_image_exists_with_both_valid_files(mock_exists, mock_isfile, slurm_system: SlurmSystem): + manager = DockerImageCacheManager(slurm_system) # Simulate both cached file and docker image URL being valid file paths mock_isfile.side_effect = lambda path: path in [ "/tmp/existing_file.sqsh", - "/fake/install/path/subdir/docker_image.sqsh", + f"{manager.system.install_path}/subdir/docker_image.sqsh", ] mock_exists.side_effect = lambda path: path in [ "/tmp/existing_file.sqsh", - "/fake/install/path/subdir/docker_image.sqsh", + f"{manager.system.install_path}/subdir/docker_image.sqsh", ] result = manager.check_docker_image_exists("/tmp/existing_file.sqsh", "subdir", "docker_image.sqsh") assert result.success - assert result.docker_image_path == "/tmp/existing_file.sqsh" + assert result.docker_image_path is not None + assert str(result.docker_image_path) == "/tmp/existing_file.sqsh" assert result.message == "Docker image file path is valid: /tmp/existing_file.sqsh." + + +def test_system_with_account(slurm_system: SlurmSystem): + slurm_system.account = "test_account" + Path(slurm_system.install_path).mkdir(parents=True, exist_ok=True) + + manager = DockerImageCacheManager(slurm_system) + manager._check_prerequisites = lambda: PrerequisiteCheckResult(True, "All prerequisites are met.") + + with patch("subprocess.run") as mock_run: + res = manager.cache_docker_image("docker.io/hello-world", "subdir", "docker_image.sqsh") + assert res.success + + mock_run.assert_called_once_with( + ( + f"srun --export=ALL --partition={slurm_system.default_partition} --account={slurm_system.account} " + f"enroot import -o {slurm_system.install_path}/subdir/docker_image.sqsh docker://docker.io/hello-world" + ), + shell=True, + check=True, + capture_output=True, + text=True, + ) diff --git a/tests/test_slurm_install_strategy.py b/tests/test_slurm_install_strategy.py index 35d26637d..241888d1e 100644 --- a/tests/test_slurm_install_strategy.py +++ b/tests/test_slurm_install_strategy.py @@ -37,6 +37,7 @@ def slurm_system(tmp_path: Path) -> SlurmSystem: install_path=str(tmp_path / "install"), output_path=str(tmp_path / "output"), default_partition="main", + cache_docker_images_locally=True, partitions={ "main": [ SlurmNode(name="node1", partition="main", state=SlurmNodeState.IDLE), @@ -66,8 +67,7 @@ def test_install_path_attribute(slurm_install_strategy: SlurmInstallStrategy, sl @pytest.fixture def mock_docker_image_cache_manager(slurm_system: SlurmSystem): mock = MagicMock() - mock.cache_docker_images_locally = True - mock.install_path = slurm_system.install_path + mock.system = slurm_system mock.check_docker_image_exists.return_value = InstallStatusResult(success=False, message="Docker image not found") mock.ensure_docker_image.return_value = InstallStatusResult(success=True) mock.uninstall_cached_image.return_value = InstallStatusResult(success=True) @@ -94,7 +94,7 @@ def test_is_installed_locally(self, strategy: NcclTestSlurmInstallStrategy): ) def test_is_installed_remote(self, strategy: NcclTestSlurmInstallStrategy): - strategy.docker_image_cache_manager.cache_docker_images_locally = False + strategy.docker_image_cache_manager.system.cache_docker_images_locally = False result = strategy.is_installed() @@ -222,7 +222,7 @@ def test_is_installed_locally(self, strategy: UCCTestSlurmInstallStrategy): ) def test_is_installed_remote(self, strategy: UCCTestSlurmInstallStrategy): - strategy.docker_image_cache_manager.cache_docker_images_locally = False + strategy.docker_image_cache_manager.system.cache_docker_images_locally = False result = strategy.is_installed()