From ff25f8fafeba01ccb547c9eda079db0149604a89 Mon Sep 17 00:00:00 2001 From: Erik Ordentlich Date: Wed, 20 Nov 2024 15:55:01 -0800 Subject: [PATCH] avoid reinitializing rmm multiple times to resolve some intermittent memory issuespin numpy < 1 in readme Signed-off-by: Erik Ordentlich --- python/README.md | 2 +- python/src/spark_rapids_ml/core.py | 28 ++++++++++++++++------------ python/src/spark_rapids_ml/umap.py | 10 ++++++++-- 3 files changed, 25 insertions(+), 15 deletions(-) diff --git a/python/README.md b/python/README.md index 31718ab0..aa9945cc 100644 --- a/python/README.md +++ b/python/README.md @@ -10,7 +10,7 @@ First, install RAPIDS cuML per [these instructions](https://rapids.ai/start.html ```bash conda create -n rapids-24.10 \ -c rapidsai -c conda-forge -c nvidia \ - cuml=24.10 cuvs=24.10 python=3.10 cuda-version=11.8 + cuml=24.10 cuvs=24.10 python=3.10 cuda-version=11.8 numpy~=1.0 ``` **Note**: while testing, we recommend using conda or docker to simplify installation and isolate your environment while experimenting. Once you have a working environment, you can then try installing directly, if necessary. diff --git a/python/src/spark_rapids_ml/core.py b/python/src/spark_rapids_ml/core.py index 644c88a7..76d877c5 100644 --- a/python/src/spark_rapids_ml/core.py +++ b/python/src/spark_rapids_ml/core.py @@ -711,11 +711,14 @@ def _train_udf(pdf_iter: Iterator[pd.DataFrame]) -> pd.DataFrame: import rmm from rmm.allocators.cupy import rmm_cupy_allocator - rmm.reinitialize( - managed_memory=True, - devices=_CumlCommon._get_gpu_device(context, is_local), - ) - cp.cuda.set_allocator(rmm_cupy_allocator) + # avoid initializing these twice to avoid downstream segfaults and other cuda memory errors + if not type(rmm.mr.get_current_device_resource()) == type( + rmm.mr.ManagedMemoryResource() + ): + rmm.mr.set_current_device_resource(rmm.mr.ManagedMemoryResource()) + + if not cp.cuda.get_allocator().__name__ == rmm_cupy_allocator.__name__: + cp.cuda.set_allocator(rmm_cupy_allocator) _CumlCommon._initialize_cuml_logging(cuml_verbose) @@ -1386,13 +1389,14 @@ def _transform_udf(pdf_iter: Iterator[pd.DataFrame]) -> pd.DataFrame: import rmm from rmm.allocators.cupy import rmm_cupy_allocator - rmm.reinitialize( - managed_memory=True, - devices=_CumlCommon._get_gpu_device( - context, is_local, is_transform=True - ), - ) - cp.cuda.set_allocator(rmm_cupy_allocator) + # avoid initializing these twice to avoid downstream segfaults and other cuda memory errors + if not type(rmm.mr.get_current_device_resource()) == type( + rmm.mr.ManagedMemoryResource() + ): + rmm.mr.set_current_device_resource(rmm.mr.ManagedMemoryResource()) + + if not cp.cuda.get_allocator().__name__ == rmm_cupy_allocator.__name__: + cp.cuda.set_allocator(rmm_cupy_allocator) # Construct the cuml counterpart object cuml_instance = construct_cuml_object_func() diff --git a/python/src/spark_rapids_ml/umap.py b/python/src/spark_rapids_ml/umap.py index 2fc68498..c1a282d9 100644 --- a/python/src/spark_rapids_ml/umap.py +++ b/python/src/spark_rapids_ml/umap.py @@ -1114,8 +1114,14 @@ def _train_udf(pdf_iter: Iterable[pd.DataFrame]) -> Iterable[pd.DataFrame]: import rmm from rmm.allocators.cupy import rmm_cupy_allocator - rmm.reinitialize(managed_memory=True) - cp.cuda.set_allocator(rmm_cupy_allocator) + # avoid initializing these twice to avoid downstream segfaults and other cuda memory errors + if not type(rmm.mr.get_current_device_resource()) == type( + rmm.mr.ManagedMemoryResource() + ): + rmm.mr.set_current_device_resource(rmm.mr.ManagedMemoryResource()) + + if not cp.cuda.get_allocator().__name__ == rmm_cupy_allocator.__name__: + cp.cuda.set_allocator(rmm_cupy_allocator) _CumlCommon._initialize_cuml_logging(cuml_verbose)