diff --git a/.github/workflows/dev-docker.yml b/.github/workflows/dev-docker.yml index d830f0ab..cc948395 100644 --- a/.github/workflows/dev-docker.yml +++ b/.github/workflows/dev-docker.yml @@ -5,6 +5,8 @@ env: DEFAULT_IMAGE_INCREMENT: 0 DEFAULT_SERVER_REVISION: main DEFAULT_PYTHON_VERSIONS: 3.8 3.9 3.10 3.11 3.12 3.13 + DEFAULT_KHIOPS_GCS_DRIVER_REVISION: 0.0.11 + DEFAULT_KHIOPS_S3_DRIVER_REVISION: 0.0.13 on: pull_request: paths: [packaging/docker/khiopspydev/Dockerfile.*, .github/workflows/dev-docker.yml] @@ -34,6 +36,14 @@ on: type: string default: main description: Khiops Server Revision + khiops-gcs-driver-revision: + type: string + default: 0.0.11 + description: Driver version for Google Cloud Storage remote files + khiops-s3-driver-revision: + type: string + default: 0.0.13 + description: Driver version for AWS-S3 remote files concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true @@ -55,6 +65,8 @@ jobs: echo "KHIOPSDEV_OS_CODENAME=$(echo '${{ matrix.khiopsdev-os }}' | tr -d '0-9.')" >> "$GITHUB_ENV" echo "SERVER_REVISION=${{ inputs.server-revision || env.DEFAULT_SERVER_REVISION }}" >> "$GITHUB_ENV" echo "IMAGE_URL=ghcr.io/khiopsml/khiops-python/khiopspydev-${{ matrix.khiopsdev-os }}" >> "$GITHUB_ENV" + echo "KHIOPS_GCS_DRIVER_REVISION=${{ inputs.khiops-gcs-driver-revision || env.DEFAULT_KHIOPS_GCS_DRIVER_REVISION }}" >> "$GITHUB_ENV" + echo "KHIOPS_S3_DRIVER_REVISION=${{ inputs.khiops-s3-driver-revision || env.DEFAULT_KHIOPS_S3_DRIVER_REVISION }}" >> "$GITHUB_ENV" - name: Checkout khiops-python sources uses: actions/checkout@v4 - name: Set up Docker Buildx @@ -81,6 +93,9 @@ jobs: - name: Build image and push it to GitHub Container Registry uses: docker/build-push-action@v5 with: + # Special hostname used by the integration tests for remote file access + # added using inputs because /etc/hosts is read-only for alternate builders (buildx via moby buildkit) + add-hosts: s3-bucket.localhost:127.0.0.1 context: ./packaging/docker/khiopspydev/ file: ./packaging/docker/khiopspydev/Dockerfile.${{ env.KHIOPSDEV_OS_CODENAME }} build-args: | @@ -88,6 +103,8 @@ jobs: "KHIOPSDEV_OS=${{ matrix.khiopsdev-os }}" "SERVER_REVISION=${{ env.SERVER_REVISION }}" "PYTHON_VERSIONS=${{ inputs.python-versions || env.DEFAULT_PYTHON_VERSIONS }}" + "KHIOPS_GCS_DRIVER_REVISION=${{ env.KHIOPS_GCS_DRIVER_REVISION }}" + "KHIOPS_S3_DRIVER_REVISION=${{ env.KHIOPS_S3_DRIVER_REVISION }}" tags: ${{ env.DOCKER_IMAGE_TAGS }} # Push only on manual request push: ${{ inputs.push || false }} diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/tests.yml similarity index 85% rename from .github/workflows/unit-tests.yml rename to .github/workflows/tests.yml index 0fae6d0c..f17fa23b 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/tests.yml @@ -1,5 +1,5 @@ --- -name: Unit Tests +name: Tests env: DEFAULT_SAMPLES_REVISION: 10.2.4 DEFAULT_KHIOPS_DESKTOP_REVISION: 10.2.4 @@ -43,7 +43,7 @@ jobs: # because the `env` context is only accessible at the step level; # hence, it is hard-coded image: |- - ghcr.io/khiopsml/khiops-python/khiopspydev-ubuntu22.04:${{ inputs.image-tag || 'latest' }} + ghcr.io/khiopsml/khiops-python/khiopspydev-ubuntu22.04:${{ inputs.image-tag || '10.2.4.0.s3-gcs-remote-files' }} credentials: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} @@ -112,7 +112,32 @@ jobs: - name: Prepare Unit Tests Environment if: github.ref != 'dev' && github.ref != 'main' && ! inputs.run-long-tests run: echo "UNITTEST_ONLY_SHORT_TESTS=true" >> "$GITHUB_ENV" - - name: Run Unit Tests + - name: Prepare Integration Tests on remote files + env: + AWS_ENDPOINT_URL: http://localhost:4569 + shell: bash + run: | + # Prepare AWS-S3 credentials and configuration + mkdir -p ${GITHUB_WORKSPACE}/.aws/ + cat << EOF > ${GITHUB_WORKSPACE}/.aws/credentials + [default] + aws_access_key_id=KEY + aws_secret_access_key=SECRET + EOF + cat << EOF > ${GITHUB_WORKSPACE}/.aws/configuration + [default] + endpoint_url=${AWS_ENDPOINT_URL} + region=eu-north-1 + EOF + echo "Generated AWS credentials..." + cat ${GITHUB_WORKSPACE}/.aws/credentials + echo "Generated AWS configuration..." + cat ${GITHUB_WORKSPACE}/.aws/configuration + + # Prepare GCS credentials + touch ${GITHUB_WORKSPACE}/google-credentials.json + /scripts/run_fake_remote_file_servers.sh . # launch the servers in the background + - name: Run Unit & Integration Tests env: KHIOPS_SAMPLES_DIR: ${{ github.workspace }}/khiops-samples KHIOPS_DOCKER_RUNNER_URL: https://localhost:11000 @@ -124,6 +149,24 @@ jobs: rmaps_base_oversubscribe: true # Oversubscribe for MPI > 4.x OMPI_MCA_rmaps_base_oversubscribe: true + # for the tests with GCS + GCS_BUCKET_NAME: gcs-bucket + # we take advantage of the built-in `STORAGE_EMULATOR_HOST` env variable + # that every GCS client can read and lets us use a local fake file server + STORAGE_EMULATOR_HOST: http://localhost:4443 + # the C++ SDK recognizes the `CLOUD_STORAGE_EMULATOR_ENDPOINT` env variable + # instead of `STORAGE_EMULATOR_HOST` + CLOUD_STORAGE_EMULATOR_ENDPOINT: http://localhost:4443 + # even in GCS emulation mode, the credentials file must exist + GOOGLE_APPLICATION_CREDENTIALS: ${{ github.workspace }}/google-credentials.json + GCS_DRIVER_LOGLEVEL: info # set to debug for diagnosis + S3_DRIVER_LOGLEVEL: info # set to debug for diagnosis + # for the tests with S3 + S3_BUCKET_NAME: s3-bucket + AWS_SHARED_CREDENTIALS_FILE: ${{ github.workspace }}/.aws/credentials + AWS_CONFIG_FILE: ${{ github.workspace }}/.aws/configuration + # common var for tests with GCS & S3 + no_proxy: localhost run: | # This is needed so that the Git tag is parsed and the khiops-python # version is retrieved @@ -138,10 +181,10 @@ jobs: $CONDA run --no-capture-output -n "$CONDA_ENV" coverage report -m $CONDA run --no-capture-output -n "$CONDA_ENV" coverage xml -o "reports/$CONDA_ENV/py-coverage.xml" done - - name: Display Unit Test Reports + - name: Display Test Reports uses: dorny/test-reporter@v1 with: - name: Unit Tests ${{ matrix.python-version }} + name: Run Tests ${{ matrix.python-version }} path: >- reports/py${{ matrix.python-version }}/TEST-tests.*.*.xml, reports/py${{ matrix.python-version }}_conda/TEST-tests.*.*.xml diff --git a/khiops/core/internals/filesystems.py b/khiops/core/internals/filesystems.py index 16af9f30..ac3ca923 100644 --- a/khiops/core/internals/filesystems.py +++ b/khiops/core/internals/filesystems.py @@ -23,6 +23,7 @@ try: import boto3 import boto3.session + from boto3.exceptions import S3UploadFailedError from botocore.exceptions import ClientError boto3_import_error = None @@ -254,7 +255,7 @@ def copy_from_local(uri_or_path, local_path): Raises ------ RuntimeError - If there was a problem when removing. + If there was a problem when copying. """ create_resource(uri_or_path).copy_from_local(local_path) @@ -272,7 +273,7 @@ def copy_to_local(uri_or_path, local_path): Raises ------ RuntimeError - If there was a problem when removing. + If there was a problem when copying. """ create_resource(uri_or_path).copy_to_local(local_path) @@ -668,29 +669,22 @@ def remove(self): ) def copy_from_local(self, local_path): - response = self.s3_client.Bucket(self.uri_info.netloc).upload_file( - local_path, self.uri_info.path[1:] - ) - status_code = response["ResponseMetadata"]["HTTPStatusCode"] - copy_ok = 200 <= status_code <= 299 - if not copy_ok: - raise RuntimeError( - f"S3 copy_from_local failed {self.uri} with code {status_code}: " - + json.dumps(response) + try: + self.s3_client.Bucket(self.uri_info.netloc).upload_file( + local_path, self.uri_info.path[1:] ) + # normalize the raised exception + except S3UploadFailedError as exc: + raise RuntimeError(f"S3 copy_from_local failed {self.uri}") from exc def copy_to_local(self, local_path): - response = self.s3_client.Bucket(self.uri_info.netloc).download_file( - local_path, self.uri_info.path[1:] - ) - status_code = response["ResponseMetadata"]["HTTPStatusCode"] - copy_ok = 200 <= status_code <= 299 - if not copy_ok: - raise RuntimeError( - f"S3 download failed {self.uri} with code {status_code}: " - + json.dumps(response) + try: + self.s3_client.Bucket(self.uri_info.netloc).download_file( + self.uri_info.path[1:], local_path ) - return copy_ok + # normalize the raised exception + except S3UploadFailedError as exc: + raise RuntimeError(f"S3 download failed {self.uri}") from exc def list_dir(self): # Add an extra slash to the path to treat it as a folder diff --git a/khiops/core/internals/runner.py b/khiops/core/internals/runner.py index 912b4d17..d2c1f4ea 100644 --- a/khiops/core/internals/runner.py +++ b/khiops/core/internals/runner.py @@ -594,17 +594,26 @@ def _report_exit_status( # Create the message reporting the errors and warnings error_msg = "" - errors, fatal_errors, warning_messages = self._collect_errors(log_file_path) - if warning_messages: - error_msg += "Warnings in log:\n" + "".join(warning_messages) - if errors: - if error_msg: - error_msg += "\n" - error_msg += "Errors in log:\n" + "".join(errors) - if fatal_errors: - if error_msg: - error_msg += "\n" - error_msg += "Fatal errors in log:\n" + "".join(fatal_errors) + # If the log file exists: Collect the errors and warnings messages + if fs.exists(log_file_path): + errors, fatal_errors, warning_messages = self._collect_errors(log_file_path) + if warning_messages: + error_msg += "Warnings in log:\n" + "".join(warning_messages) + if errors: + if error_msg: + error_msg += "\n" + error_msg += "Errors in log:\n" + "".join(errors) + if fatal_errors: + if error_msg: + error_msg += "\n" + error_msg += "Fatal errors in log:\n" + "".join(fatal_errors) + # Otherwise warn that the log file is missing + else: + warnings.warn( + f"Log file not found after {tool_name} execution." + f"Path: {log_file_path}" + ) + errors = fatal_errors = [] # Add stdout to the warning message if non empty if stdout: diff --git a/packaging/conda/meta.yaml b/packaging/conda/meta.yaml index 35e2249e..5007f5ba 100644 --- a/packaging/conda/meta.yaml +++ b/packaging/conda/meta.yaml @@ -30,8 +30,12 @@ requirements: - pandas >=0.25.3 - scikit-learn >=0.22.2 run_constrained: - - boto3 >=1.17.39 + # do not necessary use the latest version + # to avoid undesired breaking changes + - boto3 >=1.17.39,<=1.35.69 - google-cloud-storage >=1.37.0 + # an open issue on boto3 (https://github.com/boto/boto3/issues/3585) forces a min version of pyopenssl + - pyopenssl>=24.0.0,<25.0.0 outputs: - name: {{ metadata.get('name') }} diff --git a/packaging/docker/khiopspydev/Dockerfile.ubuntu b/packaging/docker/khiopspydev/Dockerfile.ubuntu index e9ed25bb..5e7d1573 100644 --- a/packaging/docker/khiopspydev/Dockerfile.ubuntu +++ b/packaging/docker/khiopspydev/Dockerfile.ubuntu @@ -10,14 +10,15 @@ ARG KHIOPS_REVISION RUN true \ # Install git (for khiops-python version calculation) and pip \ && apt-get -y update \ - && apt-get -y --no-install-recommends install git python3-pip zip pandoc wget \ + && apt-get -y --no-install-recommends install git python3-pip zip pandoc wget ruby-dev \ # Get Linux distribution codename \ && if [ -f /etc/os-release ]; then . /etc/os-release; fi \ # Obtain the Khiops native package \ && KHIOPS_PKG_FILE=$KHIOPS_REVISION/khiops-core-openmpi_$KHIOPS_REVISION-1-$VERSION_CODENAME.amd64.deb \ && wget -O KHIOPS_CORE.deb "https://github.com/KhiopsML/khiops/releases/download/${KHIOPS_PKG_FILE}" \ - # Install the Khiops native package \ - && dpkg -i --force-all KHIOPS_CORE.deb \ + # Install the Khiops native package : make it always succeed. \ + # If dpkg fails it is due to missing dependencies which will be installed by apt in the next line \ + && (dpkg -i --force-all KHIOPS_CORE.deb || true) \ && apt-get -f -y install \ && rm -f KHIOPS_CORE.deb \ # Set python to python3 \ @@ -39,6 +40,8 @@ RUN true \ # set up all the supported Python environments under conda (for the unit tests) # relying on a variable containing all the versions ARG PYTHON_VERSIONS +ARG KHIOPS_GCS_DRIVER_REVISION +ARG KHIOPS_S3_DRIVER_REVISION RUN true \ && export CONDA="/root/miniforge3/bin/conda" \ && /bin/bash -c 'for version in ${PYTHON_VERSIONS}; \ @@ -48,15 +51,53 @@ RUN true \ do \ $CONDA create -y -n $CONDA_ENV python=${version}; \ done; \ + # khiops core \ $CONDA install -y -n py${version}_conda -c khiops-dev khiops-core=$(echo ${KHIOPS_REVISION} | tr -d "-") ; \ + # remote files drivers installed in the conda environment \ + $CONDA install -y -n py${version}_conda -c khiops \ + khiops-driver-s3=${KHIOPS_S3_DRIVER_REVISION} \ + khiops-driver-gcs=${KHIOPS_GCS_DRIVER_REVISION}; \ done' \ && true RUN mkdir -p /scripts -COPY ./run_service.sh /scripts/run_service.sh -RUN chmod +x /scripts/run_service.sh && \ +COPY ./run_service.sh ./run_fake_remote_file_servers.sh /scripts/ +RUN chmod +x /scripts/run_service.sh /scripts/run_fake_remote_file_servers.sh && \ useradd -rm -d /home/ubuntu -s /bin/bash -g root -u 1000 ubuntu +# remote files drivers installed system-wide +RUN true \ + # Get Linux distribution codename \ + && if [ -f /etc/os-release ]; then . /etc/os-release; fi \ + && wget -O khiops-gcs.deb https://github.com/KhiopsML/khiopsdriver-gcs/releases/download/${KHIOPS_GCS_DRIVER_REVISION}/khiops-driver-gcs_${KHIOPS_GCS_DRIVER_REVISION}-1-${VERSION_CODENAME}.amd64.deb \ + && wget -O khiops-s3.deb https://github.com/KhiopsML/khiopsdriver-s3/releases/download/${KHIOPS_S3_DRIVER_REVISION}/khiops-driver-s3_${KHIOPS_S3_DRIVER_REVISION}-1-${VERSION_CODENAME}.amd64.deb \ + && (dpkg -i --force-all khiops-gcs.deb khiops-s3.deb || true) \ + && apt-get -f -y install \ + && rm -f khiops-gcs.deb khiops-s3.deb \ + && true + FROM ghcr.io/khiopsml/khiops-server:${SERVER_REVISION} AS server +FROM fsouza/fake-gcs-server:1.50 AS gcs-server + FROM khiopsdev AS base COPY --from=server /service /usr/bin/service + +# GCS fake file server (only in the ubuntu container) +COPY --from=gcs-server /bin/fake-gcs-server /bin/fake-gcs-server + +# Port on which gcs-server is listening +EXPOSE 4443 + +# S3 fake file server (only in the ubuntu container) +# Do not use the latest fakes3 version because starting from 1.3 a licence is required +# if fakes3 is no longer compatible think about switching to an alternative and fully compatible server +# (https://github.com/jamhall/s3rver:v3.7.1 is not yet for example) +RUN gem install fakes3:1.2.1 sorted_set +# Avoid resolving a fake s3-bucket.localhost hostname +# Alternate builders (buildx via moby buildkit) mount /etc/hosts read-only, the following command will fail +# echo "127.0.0.1 s3-bucket.localhost" >> /etc/hosts +# You will have to add the `add-hosts` input instead (https://github.com/docker/build-push-action/#inputs) + +# Port on which fakes3 is listening +EXPOSE 4569 + diff --git a/packaging/docker/khiopspydev/run_fake_remote_file_servers.sh b/packaging/docker/khiopspydev/run_fake_remote_file_servers.sh new file mode 100644 index 00000000..cb65d402 --- /dev/null +++ b/packaging/docker/khiopspydev/run_fake_remote_file_servers.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +ROOT_FOLDER=${1:-.} # defaults to current folder + +# File server for GCS (runs in background) +# WARNING : there are 3 major features activated by the options ... +# -data : exposes pre-provisioned files (not currently used feature) : the direct child folder will be the bucket name +# -filesystem-root : let upload and read new files remotely at the same location as the source +# -public-host : must expose localhost (https://github.com/fsouza/fake-gcs-server/issues/201) +echo "Launching fake-gcs-server in background..." +nohup /bin/fake-gcs-server \ + -data "${ROOT_FOLDER}"/tests/resources/remote-access \ + -filesystem-root "${ROOT_FOLDER}"/tests/resources/remote-access \ + -scheme http \ + -public-host localhost > /dev/null < /dev/null 2>&1 & # needs to redirect all the 3 fds to free the TTY + +# File server for S3 (runs in background) +# WARNING : +# -r : exposes pre-provisioned files (not currently used feature) : the direct child folder will be the bucket name +# these files were uploaded once because fake-s3 creates metadata +echo "Launching fakes3 in background..." +PORT_NUMBER=${AWS_ENDPOINT_URL##*:} +nohup /usr/local/bin/fakes3 \ + -r "${ROOT_FOLDER}"/tests/resources/remote-access \ + -p "${PORT_NUMBER}" > /dev/null < /dev/null 2>&1 & # needs to redirect all the 3 fds to free the TTY diff --git a/setup.py b/setup.py index 85ab8f89..a82cfc12 100644 --- a/setup.py +++ b/setup.py @@ -49,7 +49,14 @@ ], cmdclass=versioneer.get_cmdclass(), extras_require={ - "s3": ["boto3>=1.17.39"], + "s3": [ + # do not necessary use the latest version + # to avoid undesired breaking changes + "boto3>=1.17.39,<=1.35.69", + # an open issue on boto3 (https://github.com/boto/boto3/issues/3585) + # forces a minimal version of pyopenssl + "pyopenssl>=24.0.0,<25.0.0", + ], "gcs": ["google-cloud-storage>=1.37.0"], }, ) diff --git a/tests/resources/remote-access/gcs-bucket/GCS_BUCKET_MUST_EXIST b/tests/resources/remote-access/gcs-bucket/GCS_BUCKET_MUST_EXIST new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_khiops_integrations.py b/tests/test_khiops_integrations.py index 2dda1ccb..519513d1 100644 --- a/tests/test_khiops_integrations.py +++ b/tests/test_khiops_integrations.py @@ -177,6 +177,7 @@ def test_runner_with_conda_based_environment(self): # directory # - check that the Khiops binary directory contains the MODL* binaries # and `mpiexec` (which should be its default location) + conda_prefix = None if "CONDA_PREFIX" in os.environ: # Remove `CONDA_PREFIX/bin` from `PATH` conda_prefix_bin = os.path.join(os.environ["CONDA_PREFIX"], "bin") @@ -186,12 +187,23 @@ def test_runner_with_conda_based_environment(self): if path_fragment != conda_prefix_bin ) + # Store existing CONDA_PREFIX + conda_prefix = os.environ["CONDA_PREFIX"] + # Unset `CONDA_PREFIX` del os.environ["CONDA_PREFIX"] # Create a fresh local runner runner = KhiopsLocalRunner() + # Restore CONDA_PREFIX + if conda_prefix is not None: + os.environ["CONDA_PREFIX"] = conda_prefix + + # Restore `CONDA_PREFIX/bin` into `PATH` + conda_prefix_bin = os.path.join(conda_prefix, "bin") + os.environ["PATH"] = os.pathsep.join([conda_prefix_bin, os.environ["PATH"]]) + # Check that MODL* files as set in the runner exist and are executable self.assertTrue(os.path.isfile(runner.khiops_path)) self.assertTrue(os.access(runner.khiops_path, os.X_OK)) diff --git a/tests/test_remote_access.py b/tests/test_remote_access.py index 3d8f2d59..5c10b2d0 100644 --- a/tests/test_remote_access.py +++ b/tests/test_remote_access.py @@ -21,12 +21,22 @@ import khiops.core as kh import khiops.core.internals.filesystems as fs +from khiops.core.internals.runner import KhiopsLocalRunner from khiops.extras.docker import KhiopsDockerRunner from khiops.sklearn import KhiopsClassifier, KhiopsCoclustering from tests.test_helper import KhiopsTestHelper def s3_config_exists(): + # Note: + # Instead of config files, + # the S3 configuration may also be set with alternative environment variables + # - `AWS_ACCESS_KEY_ID` + # - `AWS_SECRET_ACCESS_KEY` + # - `AWS_ENDPOINT_URL` + # - `S3_BUCKET_NAME` + # However storing the credentials in config files is more secure, + # because these can be protected by access policies. return ( "AWS_SHARED_CREDENTIALS_FILE" in os.environ and "AWS_CONFIG_FILE" in os.environ @@ -35,10 +45,17 @@ def s3_config_exists(): def gcs_config_exists(): - return ( - "GOOGLE_APPLICATION_CREDENTIALS" in os.environ - and "GCS_BUCKET_NAME" in os.environ - ) + # Note: + # We have to disable temporarily the GCS tests + # because the new generation of GCS driver in C++ + # cannot handle well yet the emulation mode + # https://github.com/KhiopsML/khiopsdriver-gcs/issues/28 + return False + # Check to reactivate when the issue is fixed + # return ( + # "GOOGLE_APPLICATION_CREDENTIALS" in os.environ + # and "GCS_BUCKET_NAME" in os.environ + # ) def docker_runner_config_exists(): @@ -54,6 +71,31 @@ class KhiopsRemoteAccessTestsContainer: class KhiopsRemoteAccessTests(unittest.TestCase, KhiopsTestHelper): """Generic class to test remote filesystems and Khiops runners""" + @classmethod + def init_remote_bucket(cls, bucket_name=None, proto=None): + # create the remote root_temp_dir + remote_resource = fs.create_resource( + f"{proto}://{bucket_name}/khiops-cicd/tmp" + ) + remote_resource.make_dir() + + # copy to /samples each file + for file in ( + "Iris/Iris.txt", + "Iris/Iris.kdic", + "SpliceJunction/SpliceJunction.txt", + "SpliceJunction/SpliceJunctionDNA.txt", + "SpliceJunction/SpliceJunction.kdic", + ): + fs.copy_from_local( + f"{proto}://{bucket_name}/khiops-cicd/samples/{file}", + os.path.join(kh.get_samples_dir(), file), + ) + # symmetric call to ensure the upload was OK + fs.copy_to_local( + f"{proto}://{bucket_name}/khiops-cicd/samples/{file}", "/tmp/dummy" + ) + def results_dir_root(self): """To be overridden by descendants if needed @@ -69,6 +111,10 @@ def remote_access_test_case(self): """To be overridden by descendants""" return "" + def should_skip_in_a_conda_env(self): + """To be overriden by descendants""" + return True + def print_test_title(self): print(f"\n Remote System: {self.remote_access_test_case()}") @@ -79,17 +125,58 @@ def skip_if_no_config(self): "has no configuration available" ) + @staticmethod + def is_in_a_conda_env(): + """Detects whether this is run from a Conda environment + + The way to find it out is to check if khiops-core is installed + in the same environment as the current running Conda one + """ + + if not isinstance(kh.get_runner(), KhiopsLocalRunner): + return False + + # Get path to the Khiops executable + khiops_path = kh.get_runner()._khiops_path + + # If $(dirname khiops_path) is identical to $CONDA_PREFIX/bin, + # then return True + conda_prefix = os.environ.get("CONDA_PREFIX") + return conda_prefix is not None and os.path.join( + conda_prefix, "bin" + ) == os.path.dirname(khiops_path) + def setUp(self): self.skip_if_no_config() + if self.is_in_a_conda_env() and self.should_skip_in_a_conda_env(): + self.skipTest( + f"Remote test case {self.remote_access_test_case()} " + "in a conda environment is currently skipped" + ) self.print_test_title() + def tearDown(self): + # Cleanup the output dir (the files within and the folder) + if hasattr(self, "folder_name_to_clean_in_teardown"): + for filename in fs.list_dir(self.folder_name_to_clean_in_teardown): + fs.remove( + fs.get_child_path( + self.folder_name_to_clean_in_teardown, filename + ) + ) + fs.remove(self.folder_name_to_clean_in_teardown) + def test_train_predictor_with_remote_access(self): """Test train_predictor with remote resources""" iris_data_dir = fs.get_child_path(kh.get_runner().samples_dir, "Iris") - output_dir = fs.get_child_path( + # ask for folder cleaning during tearDown + self.folder_name_to_clean_in_teardown = output_dir = fs.get_child_path( self.results_dir_root(), - f"test_{self.remote_access_test_case()}_remote_files", + f"test_{self.remote_access_test_case()}_remote_files_{uuid.uuid4()}", ) + + # When using `kh`, the log file will be by default + # in the runner `root_temp_dir` folder that can be remote kh.train_predictor( fs.get_child_path(iris_data_dir, "Iris.kdic"), dictionary_name="Iris", @@ -100,18 +187,21 @@ def test_train_predictor_with_remote_access(self): trace=True, ) - # Check the existents of the trining files + # Check the existence of the training files self.assertTrue(fs.exists(fs.get_child_path(output_dir, "AllReports.khj"))) self.assertTrue(fs.exists(fs.get_child_path(output_dir, "Modeling.kdic"))) - # Cleanup - for filename in fs.list_dir(output_dir): - fs.remove(fs.get_child_path(output_dir, filename)) - def test_khiops_classifier_with_remote_access(self): """Test the training of a khiops_classifier with remote resources""" + # Setup paths - output_dir = ( + # note : the current implementation forces the khiops.log file + # to be created in the output_dir (thus local) + # (any attempt to override it as an arg + # for the fit method will be ignored) + + # ask for folder cleaning during tearDown + self.folder_name_to_clean_in_teardown = output_dir = ( self._khiops_temp_dir + f"/KhiopsClassifier_output_dir_{uuid.uuid4()}/" ) iris_data_dir = fs.get_child_path(kh.get_runner().samples_dir, "Iris") @@ -135,17 +225,19 @@ def test_khiops_classifier_with_remote_access(self): predict_path = fs.get_child_path(output_dir, "predict.txt") self.assertTrue(fs.exists(predict_path), msg=f"Path: {predict_path}") - # Cleanup - for filename in fs.list_dir(output_dir): - fs.remove(fs.get_child_path(output_dir, filename)) - def test_khiops_coclustering_with_remote_access(self): """Test the training of a khiops_coclustering with remote resources""" # Skip if only short tests are run KhiopsTestHelper.skip_long_test(self) # Setup paths - output_dir = ( + # note : the current implementation forces the khiops.log file + # to be created in the output_dir (thus local) + # (any attempt to override it as an arg + # for the fit method will be ignored) + + # ask for folder cleaning during tearDown + self.folder_name_to_clean_in_teardown = output_dir = ( self._khiops_temp_dir + f"/KhiopsCoclustering_output_dir_{uuid.uuid4()}/" ) @@ -177,6 +269,14 @@ def test_train_predictor_fail_and_log_with_remote_access(self): log_file_path = fs.get_child_path( self._khiops_temp_dir, f"khiops_log_{uuid.uuid4()}.log" ) + + # no cleaning required as an exception would be raised + # without any result produced + output_dir = fs.get_child_path( + self.results_dir_root(), + f"test_{self.remote_access_test_case()}_remote_files_{uuid.uuid4()}", + ) + iris_data_dir = fs.get_child_path(kh.get_runner().samples_dir, "Iris") with self.assertRaises(kh.KhiopsRuntimeError): kh.train_predictor( @@ -184,10 +284,7 @@ def test_train_predictor_fail_and_log_with_remote_access(self): dictionary_name="Iris", data_table_path=fs.get_child_path(iris_data_dir, "Iris.txt"), target_variable="Class", - results_dir=fs.get_child_path( - self.results_dir_root(), - f"test_{self.remote_access_test_case()}_remote_files", - ), + results_dir=output_dir, log_file_path=log_file_path, ) # Check and remove log file @@ -204,9 +301,19 @@ def setUpClass(cls): if s3_config_exists(): runner = kh.get_runner() bucket_name = os.environ["S3_BUCKET_NAME"] - runner.samples_dir = f"s3://{bucket_name}/project/khiops-cicd/samples" - cls._khiops_temp_dir = f"s3://{bucket_name}/project/khiops-cicd/tmp" - runner.root_temp_dir = f"s3://{bucket_name}/project/khiops-cicd/tmp" + + cls.init_remote_bucket(bucket_name=bucket_name, proto="s3") + + runner.samples_dir = f"s3://{bucket_name}/khiops-cicd/samples" + resources_directory = KhiopsTestHelper.get_resources_dir() + + # WARNING : khiops temp files cannot be remote + cls._khiops_temp_dir = f"{resources_directory}/tmp/khiops-cicd" + + # root_temp_dir + # (where the log file is saved by default when using `kh`) + # can be remote + runner.root_temp_dir = f"s3://{bucket_name}/khiops-cicd/tmp" @classmethod def tearDownClass(cls): @@ -214,6 +321,11 @@ def tearDownClass(cls): if s3_config_exists(): kh.get_runner().__init__() + def should_skip_in_a_conda_env(self): + # The S3 driver is now released for conda too. + # No need to skip the tests any longer in a conda environment + return False + def config_exists(self): return s3_config_exists() @@ -232,8 +344,18 @@ def setUpClass(cls): if gcs_config_exists(): runner = kh.get_runner() bucket_name = os.environ["GCS_BUCKET_NAME"] + + cls.init_remote_bucket(bucket_name=bucket_name, proto="gs") + runner.samples_dir = f"gs://{bucket_name}/khiops-cicd/samples" - cls._khiops_temp_dir = f"gs://{bucket_name}/khiops-cicd/tmp" + resources_directory = KhiopsTestHelper.get_resources_dir() + + # WARNING : khiops temp files cannot be remote + cls._khiops_temp_dir = f"{resources_directory}/tmp/khiops-cicd" + + # root_temp_dir + # (where the log file is saved by default when using `kh`) + # can be remote runner.root_temp_dir = f"gs://{bucket_name}/khiops-cicd/tmp" @classmethod @@ -242,6 +364,11 @@ def tearDownClass(cls): if gcs_config_exists(): kh.get_runner().__init__() + def should_skip_in_a_conda_env(self): + # The GCS driver is now released for conda too. + # No need to skip the tests any longer in a conda environment + return False + def config_exists(self): return gcs_config_exists() @@ -341,6 +468,11 @@ def tearDownClass(cls): def config_exists(self): return docker_runner_config_exists() + def should_skip_in_a_conda_env(self): + # Tests using a docker runner should never be skipped + # even in a conda environment + return False + def remote_access_test_case(self): return "KhiopsDockerRunner"