NVIDIA · nvauto · Nov 27, 2024 · Nov 27, 2024
diff --git a/python/benchmark/benchmark/base.py b/python/benchmark/benchmark/base.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 #
 import argparse
+import logging
 import pprint
 import subprocess
 from abc import abstractmethod
@@ -28,6 +29,16 @@
 
 from .utils import WithSparkSession, to_bool, with_benchmark
 
+# disable mlflow autologging if in the environment (e.g. Databricks)
+# due to observed heavy resource usage
+logging.warning("***** Disabling mflow autologging for benchmark runs *****")
+try:
+    import mlflow
+
+    mlflow.autolog(disable=True)
+except ImportError:
+    pass
+
 
 class BenchmarkBase:
     """Based class for benchmarking.

diff --git a/python/benchmark/databricks/gpu_etl_cluster_spec.sh b/python/benchmark/databricks/gpu_etl_cluster_spec.sh
@@ -23,7 +23,7 @@ cat <<EOF
         "spark.task.cpus": "1",
         "spark.databricks.delta.preview.enabled": "true",
         "spark.python.worker.reuse": "true",
-        "spark.executorEnv.PYTHONPATH": "/databricks/jars/rapids-4-spark_2.12-24.08.1.jar:/databricks/spark/python",
+        "spark.executorEnv.PYTHONPATH": "/databricks/jars/rapids-4-spark_2.12-24.10.1.jar:/databricks/spark/python",
         "spark.sql.files.minPartitionNum": "2",
         "spark.sql.execution.arrow.maxRecordsPerBatch": "10000",
         "spark.executor.cores": "8",

diff --git a/python/benchmark/databricks/init-pip-cuda-11.8.sh b/python/benchmark/databricks/init-pip-cuda-11.8.sh
@@ -20,7 +20,7 @@ BENCHMARK_ZIP=/dbfs/path/to/benchmark.zip
 # also, in general, RAPIDS_VERSION (python) fields should omit any leading 0 in month/minor field (i.e. 23.8.0 and not 23.08.0)
 # while SPARK_RAPIDS_VERSION (jar) should have leading 0 in month/minor (e.g. 23.08.2 and not 23.8.2)
 RAPIDS_VERSION=24.10.0
-SPARK_RAPIDS_VERSION=24.08.1
+SPARK_RAPIDS_VERSION=24.10.1
 
 curl -L https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/${SPARK_RAPIDS_VERSION}/rapids-4-spark_2.12-${SPARK_RAPIDS_VERSION}-cuda11.jar -o /databricks/jars/rapids-4-spark_2.12-${SPARK_RAPIDS_VERSION}.jar
 

diff --git a/python/run_benchmark.sh b/python/run_benchmark.sh
@@ -121,7 +121,7 @@ EOF
 
 if [[ $cluster_type == "gpu_etl" ]]
 then
-SPARK_RAPIDS_VERSION=24.08.1
+SPARK_RAPIDS_VERSION=24.10.1
 rapids_jar=${rapids_jar:-rapids-4-spark_2.12-$SPARK_RAPIDS_VERSION.jar}
 if [ ! -f $rapids_jar ]; then
     echo "downloading spark rapids jar"