diff --git a/python/benchmark/benchmark/base.py b/python/benchmark/benchmark/base.py index 9af91252..9b774034 100644 --- a/python/benchmark/benchmark/base.py +++ b/python/benchmark/benchmark/base.py @@ -14,6 +14,7 @@ # limitations under the License. # import argparse +import logging import pprint import subprocess from abc import abstractmethod @@ -28,6 +29,16 @@ from .utils import WithSparkSession, to_bool, with_benchmark +# disable mlflow autologging if in the environment (e.g. Databricks) +# due to observed heavy resource usage +logging.warning("***** Disabling mflow autologging for benchmark runs *****") +try: + import mlflow + + mlflow.autolog(disable=True) +except ImportError: + pass + class BenchmarkBase: """Based class for benchmarking. diff --git a/python/benchmark/databricks/gpu_etl_cluster_spec.sh b/python/benchmark/databricks/gpu_etl_cluster_spec.sh index 0fac15c2..49c6fa69 100644 --- a/python/benchmark/databricks/gpu_etl_cluster_spec.sh +++ b/python/benchmark/databricks/gpu_etl_cluster_spec.sh @@ -23,7 +23,7 @@ cat <