From ff25f8fafeba01ccb547c9eda079db0149604a89 Mon Sep 17 00:00:00 2001
From: Erik Ordentlich <eordentlich@gmail.com>
Date: Wed, 20 Nov 2024 15:55:01 -0800
Subject: [PATCH] avoid reinitializing rmm multiple times to resolve some
 intermittent memory issuespin numpy < 1 in readme

Signed-off-by: Erik Ordentlich <eordentlich@gmail.com>
---
 python/README.md                   |  2 +-
 python/src/spark_rapids_ml/core.py | 28 ++++++++++++++++------------
 python/src/spark_rapids_ml/umap.py | 10 ++++++++--
 3 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/python/README.md b/python/README.md
index 31718ab0..aa9945cc 100644
--- a/python/README.md
+++ b/python/README.md
@@ -10,7 +10,7 @@ First, install RAPIDS cuML per [these instructions](https://rapids.ai/start.html
 ```bash
 conda create -n rapids-24.10 \
     -c rapidsai -c conda-forge -c nvidia \
-    cuml=24.10 cuvs=24.10 python=3.10 cuda-version=11.8
+    cuml=24.10 cuvs=24.10 python=3.10 cuda-version=11.8 numpy~=1.0
 ```
 
 **Note**: while testing, we recommend using conda or docker to simplify installation and isolate your environment while experimenting.  Once you have a working environment, you can then try installing directly, if necessary.
diff --git a/python/src/spark_rapids_ml/core.py b/python/src/spark_rapids_ml/core.py
index 644c88a7..76d877c5 100644
--- a/python/src/spark_rapids_ml/core.py
+++ b/python/src/spark_rapids_ml/core.py
@@ -711,11 +711,14 @@ def _train_udf(pdf_iter: Iterator[pd.DataFrame]) -> pd.DataFrame:
                 import rmm
                 from rmm.allocators.cupy import rmm_cupy_allocator
 
-                rmm.reinitialize(
-                    managed_memory=True,
-                    devices=_CumlCommon._get_gpu_device(context, is_local),
-                )
-                cp.cuda.set_allocator(rmm_cupy_allocator)
+                # avoid initializing these twice to avoid downstream segfaults and other cuda memory errors
+                if not type(rmm.mr.get_current_device_resource()) == type(
+                    rmm.mr.ManagedMemoryResource()
+                ):
+                    rmm.mr.set_current_device_resource(rmm.mr.ManagedMemoryResource())
+
+                if not cp.cuda.get_allocator().__name__ == rmm_cupy_allocator.__name__:
+                    cp.cuda.set_allocator(rmm_cupy_allocator)
 
             _CumlCommon._initialize_cuml_logging(cuml_verbose)
 
@@ -1386,13 +1389,14 @@ def _transform_udf(pdf_iter: Iterator[pd.DataFrame]) -> pd.DataFrame:
                 import rmm
                 from rmm.allocators.cupy import rmm_cupy_allocator
 
-                rmm.reinitialize(
-                    managed_memory=True,
-                    devices=_CumlCommon._get_gpu_device(
-                        context, is_local, is_transform=True
-                    ),
-                )
-                cp.cuda.set_allocator(rmm_cupy_allocator)
+                # avoid initializing these twice to avoid downstream segfaults and other cuda memory errors
+                if not type(rmm.mr.get_current_device_resource()) == type(
+                    rmm.mr.ManagedMemoryResource()
+                ):
+                    rmm.mr.set_current_device_resource(rmm.mr.ManagedMemoryResource())
+
+                if not cp.cuda.get_allocator().__name__ == rmm_cupy_allocator.__name__:
+                    cp.cuda.set_allocator(rmm_cupy_allocator)
 
             # Construct the cuml counterpart object
             cuml_instance = construct_cuml_object_func()
diff --git a/python/src/spark_rapids_ml/umap.py b/python/src/spark_rapids_ml/umap.py
index 2fc68498..c1a282d9 100644
--- a/python/src/spark_rapids_ml/umap.py
+++ b/python/src/spark_rapids_ml/umap.py
@@ -1114,8 +1114,14 @@ def _train_udf(pdf_iter: Iterable[pd.DataFrame]) -> Iterable[pd.DataFrame]:
                 import rmm
                 from rmm.allocators.cupy import rmm_cupy_allocator
 
-                rmm.reinitialize(managed_memory=True)
-                cp.cuda.set_allocator(rmm_cupy_allocator)
+                # avoid initializing these twice to avoid downstream segfaults and other cuda memory errors
+                if not type(rmm.mr.get_current_device_resource()) == type(
+                    rmm.mr.ManagedMemoryResource()
+                ):
+                    rmm.mr.set_current_device_resource(rmm.mr.ManagedMemoryResource())
+
+                if not cp.cuda.get_allocator().__name__ == rmm_cupy_allocator.__name__:
+                    cp.cuda.set_allocator(rmm_cupy_allocator)
 
             _CumlCommon._initialize_cuml_logging(cuml_verbose)