diff --git a/ci/Dockerfile b/ci/Dockerfile index 517bc773..43cc1f5c 100644 --- a/ci/Dockerfile +++ b/ci/Dockerfile @@ -38,5 +38,5 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86 # install cuML ARG CUML_VER=24.12 -RUN conda install -y -c rapidsai-nightly -c conda-forge -c nvidia cuml=$CUML_VER cuvs=$CUML_VER python=3.10 cuda-version=11.8 numpy~=1.0 \ +RUN conda install -y -c rapidsai -c conda-forge -c nvidia cuml=$CUML_VER cuvs=$CUML_VER python=3.10 cuda-version=11.8 numpy~=1.0 \ && conda clean --all -f -y diff --git a/docker/Dockerfile.pip b/docker/Dockerfile.pip index 19146ab0..4d5c65bc 100644 --- a/docker/Dockerfile.pip +++ b/docker/Dockerfile.pip @@ -1,5 +1,5 @@ # -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -18,7 +18,7 @@ ARG CUDA_VERSION=11.8.0 FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 ARG PYSPARK_VERSION=3.3.1 -ARG RAPIDS_VERSION=24.10.0 +ARG RAPIDS_VERSION=24.12.0 ARG ARCH=amd64 #ARG ARCH=arm64 # Install packages to build spark-rapids-ml diff --git a/docker/Dockerfile.python b/docker/Dockerfile.python index b304cd38..6e00dd9a 100644 --- a/docker/Dockerfile.python +++ b/docker/Dockerfile.python @@ -1,5 +1,5 @@ # -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ ARG CUDA_VERSION=11.8.0 FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 -ARG CUML_VERSION=24.10 +ARG CUML_VERSION=24.12 # Install packages to build spark-rapids-ml RUN apt update -y \ diff --git a/notebooks/aws-emr/init-bootstrap-action.sh b/notebooks/aws-emr/init-bootstrap-action.sh index 096b4d94..16def3b4 100755 --- a/notebooks/aws-emr/init-bootstrap-action.sh +++ b/notebooks/aws-emr/init-bootstrap-action.sh @@ -27,7 +27,7 @@ sudo bash -c "wget https://www.python.org/ftp/python/3.10.9/Python-3.10.9.tgz && tar xzf Python-3.10.9.tgz && cd Python-3.10.9 && \ ./configure --enable-optimizations && make altinstall" -RAPIDS_VERSION=24.10.0 +RAPIDS_VERSION=24.12.0 sudo /usr/local/bin/pip3.10 install --upgrade pip diff --git a/notebooks/databricks/README.md b/notebooks/databricks/README.md index 1d60a204..7b5cadf6 100644 --- a/notebooks/databricks/README.md +++ b/notebooks/databricks/README.md @@ -51,7 +51,7 @@ If you already have a Databricks account, you can run the example notebooks on a spark.task.resource.gpu.amount 1 spark.databricks.delta.preview.enabled true spark.python.worker.reuse true - spark.executorEnv.PYTHONPATH /databricks/jars/rapids-4-spark_2.12-24.08.1.jar:/databricks/spark/python + spark.executorEnv.PYTHONPATH /databricks/jars/rapids-4-spark_2.12-24.10.1.jar:/databricks/spark/python spark.sql.execution.arrow.maxRecordsPerBatch 100000 spark.rapids.memory.gpu.minAllocFraction 0.0001 spark.plugins com.nvidia.spark.SQLPlugin diff --git a/notebooks/databricks/init-pip-cuda-11.8.sh b/notebooks/databricks/init-pip-cuda-11.8.sh index 72ee582f..d3a7b9ed 100644 --- a/notebooks/databricks/init-pip-cuda-11.8.sh +++ b/notebooks/databricks/init-pip-cuda-11.8.sh @@ -18,8 +18,8 @@ SPARK_RAPIDS_ML_ZIP=/dbfs/path/to/zip/file # IMPORTANT: specify RAPIDS_VERSION fully 23.10.0 and not 23.10 # also in general, RAPIDS_VERSION (python) fields should omit any leading 0 in month/minor field (i.e. 23.8.0 and not 23.08.0) # while SPARK_RAPIDS_VERSION (jar) should have leading 0 in month/minor (e.g. 23.08.2 and not 23.8.2) -RAPIDS_VERSION=24.10.0 -SPARK_RAPIDS_VERSION=24.08.1 +RAPIDS_VERSION=24.12.0 +SPARK_RAPIDS_VERSION=24.10.1 curl -L https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/${SPARK_RAPIDS_VERSION}/rapids-4-spark_2.12-${SPARK_RAPIDS_VERSION}-cuda11.jar -o /databricks/jars/rapids-4-spark_2.12-${SPARK_RAPIDS_VERSION}.jar diff --git a/notebooks/dataproc/README.md b/notebooks/dataproc/README.md index 5051e581..55317eda 100644 --- a/notebooks/dataproc/README.md +++ b/notebooks/dataproc/README.md @@ -29,7 +29,7 @@ If you already have a Dataproc account, you can run the example notebooks on a D - Create a cluster with at least two single-gpu workers. **Note**: in addition to the initialization script from above, this also uses the standard [initialization actions](https://github.com/GoogleCloudDataproc/initialization-actions) for installing the GPU drivers and RAPIDS: ``` export CUDA_VERSION=11.8 - export RAPIDS_VERSION=24.10.0 + export RAPIDS_VERSION=24.12.0 gcloud dataproc clusters create $USER-spark-rapids-ml \ --image-version=2.1-ubuntu \ diff --git a/notebooks/dataproc/spark_rapids_ml.sh b/notebooks/dataproc/spark_rapids_ml.sh index 2810cee9..3ff07286 100644 --- a/notebooks/dataproc/spark_rapids_ml.sh +++ b/notebooks/dataproc/spark_rapids_ml.sh @@ -14,7 +14,7 @@ # limitations under the License. -RAPIDS_VERSION=24.10.0 +RAPIDS_VERSION=24.12.0 # patch existing packages mamba install "llvmlite<0.40,>=0.39.0dev0" "numba>=0.56.2" diff --git a/python/README.md b/python/README.md index b2e495f8..71f6c422 100644 --- a/python/README.md +++ b/python/README.md @@ -8,9 +8,9 @@ For simplicity, the following instructions just use Spark local mode, assuming a First, install RAPIDS cuML per [these instructions](https://rapids.ai/start.html). Example for CUDA Toolkit 11.8: ```bash -conda create -n rapids-24.10 \ +conda create -n rapids-24.12 \ -c rapidsai -c conda-forge -c nvidia \ - cuml=24.10 cuvs=24.10 python=3.10 cuda-version=11.8 numpy~=1.0 + cuml=24.12 cuvs=24.12 python=3.10 cuda-version=11.8 numpy~=1.0 ``` **Note**: while testing, we recommend using conda or docker to simplify installation and isolate your environment while experimenting. Once you have a working environment, you can then try installing directly, if necessary. @@ -19,7 +19,7 @@ conda create -n rapids-24.10 \ Once you have the conda environment, activate it and install the required packages. ```bash -conda activate rapids-24.10 +conda activate rapids-24.12 ## for development access to notebooks, tests, and benchmarks git clone --branch main https://github.com/NVIDIA/spark-rapids-ml.git diff --git a/python/benchmark/databricks/init-pip-cuda-11.8.sh b/python/benchmark/databricks/init-pip-cuda-11.8.sh index 606618f8..e74fa213 100644 --- a/python/benchmark/databricks/init-pip-cuda-11.8.sh +++ b/python/benchmark/databricks/init-pip-cuda-11.8.sh @@ -19,7 +19,7 @@ BENCHMARK_ZIP=/dbfs/path/to/benchmark.zip # IMPORTANT: specify rapids fully 23.10.0 and not 23.10 # also, in general, RAPIDS_VERSION (python) fields should omit any leading 0 in month/minor field (i.e. 23.8.0 and not 23.08.0) # while SPARK_RAPIDS_VERSION (jar) should have leading 0 in month/minor (e.g. 23.08.2 and not 23.8.2) -RAPIDS_VERSION=24.10.0 +RAPIDS_VERSION=24.12.0 SPARK_RAPIDS_VERSION=24.10.1 curl -L https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/${SPARK_RAPIDS_VERSION}/rapids-4-spark_2.12-${SPARK_RAPIDS_VERSION}-cuda11.jar -o /databricks/jars/rapids-4-spark_2.12-${SPARK_RAPIDS_VERSION}.jar diff --git a/python/benchmark/dataproc/init_benchmark.sh b/python/benchmark/dataproc/init_benchmark.sh index fc61cda1..27ece2d9 100755 --- a/python/benchmark/dataproc/init_benchmark.sh +++ b/python/benchmark/dataproc/init_benchmark.sh @@ -22,7 +22,7 @@ function get_metadata_attribute() { /usr/share/google/get_metadata_value "attributes/${attribute_name}" || echo -n "${default_value}" } -RAPIDS_VERSION=$(get_metadata_attribute rapids-version 24.10.0) +RAPIDS_VERSION=$(get_metadata_attribute rapids-version 24.12.0) # patch existing packages mamba install "llvmlite<0.40,>=0.39.0dev0" "numba>=0.56.2"