avoid reinitializing rmm multiple times to resolve some intermittent …

…memory issuespin numpy < 1 in readme Signed-off-by: Erik Ordentlich <[email protected]>
NVIDIA · Nov 20, 2024 · ff25f8f · ff25f8f
1 parent d10e9f0
commit ff25f8f
Show file tree

Hide file tree

Showing 3 changed files with 25 additions and 15 deletions.
diff --git a/python/README.md b/python/README.md
@@ -10,7 +10,7 @@ First, install RAPIDS cuML per [these instructions](https://rapids.ai/start.html
 ```bash
 conda create -n rapids-24.10 \
     -c rapidsai -c conda-forge -c nvidia \
-    cuml=24.10 cuvs=24.10 python=3.10 cuda-version=11.8
+    cuml=24.10 cuvs=24.10 python=3.10 cuda-version=11.8 numpy~=1.0
 ```
 
 **Note**: while testing, we recommend using conda or docker to simplify installation and isolate your environment while experimenting.  Once you have a working environment, you can then try installing directly, if necessary.

diff --git a/python/src/spark_rapids_ml/core.py b/python/src/spark_rapids_ml/core.py
@@ -711,11 +711,14 @@ def _train_udf(pdf_iter: Iterator[pd.DataFrame]) -> pd.DataFrame:
                 import rmm
                 from rmm.allocators.cupy import rmm_cupy_allocator
 
-                rmm.reinitialize(
-                    managed_memory=True,
-                    devices=_CumlCommon._get_gpu_device(context, is_local),
-                )
-                cp.cuda.set_allocator(rmm_cupy_allocator)
+                # avoid initializing these twice to avoid downstream segfaults and other cuda memory errors
+                if not type(rmm.mr.get_current_device_resource()) == type(
+                    rmm.mr.ManagedMemoryResource()
+                ):
+                    rmm.mr.set_current_device_resource(rmm.mr.ManagedMemoryResource())
+
+                if not cp.cuda.get_allocator().__name__ == rmm_cupy_allocator.__name__:
+                    cp.cuda.set_allocator(rmm_cupy_allocator)
 
             _CumlCommon._initialize_cuml_logging(cuml_verbose)
 
@@ -1386,13 +1389,14 @@ def _transform_udf(pdf_iter: Iterator[pd.DataFrame]) -> pd.DataFrame:
                 import rmm
                 from rmm.allocators.cupy import rmm_cupy_allocator
 
-                rmm.reinitialize(
-                    managed_memory=True,
-                    devices=_CumlCommon._get_gpu_device(
-                        context, is_local, is_transform=True
-                    ),
-                )
-                cp.cuda.set_allocator(rmm_cupy_allocator)
+                # avoid initializing these twice to avoid downstream segfaults and other cuda memory errors
+                if not type(rmm.mr.get_current_device_resource()) == type(
+                    rmm.mr.ManagedMemoryResource()
+                ):
+                    rmm.mr.set_current_device_resource(rmm.mr.ManagedMemoryResource())
+
+                if not cp.cuda.get_allocator().__name__ == rmm_cupy_allocator.__name__:
+                    cp.cuda.set_allocator(rmm_cupy_allocator)
 
             # Construct the cuml counterpart object
             cuml_instance = construct_cuml_object_func()

diff --git a/python/src/spark_rapids_ml/umap.py b/python/src/spark_rapids_ml/umap.py
@@ -1114,8 +1114,14 @@ def _train_udf(pdf_iter: Iterable[pd.DataFrame]) -> Iterable[pd.DataFrame]:
                 import rmm
                 from rmm.allocators.cupy import rmm_cupy_allocator
 
-                rmm.reinitialize(managed_memory=True)
-                cp.cuda.set_allocator(rmm_cupy_allocator)
+                # avoid initializing these twice to avoid downstream segfaults and other cuda memory errors
+                if not type(rmm.mr.get_current_device_resource()) == type(
+                    rmm.mr.ManagedMemoryResource()
+                ):
+                    rmm.mr.set_current_device_resource(rmm.mr.ManagedMemoryResource())
+
+                if not cp.cuda.get_allocator().__name__ == rmm_cupy_allocator.__name__:
+                    cp.cuda.set_allocator(rmm_cupy_allocator)
 
             _CumlCommon._initialize_cuml_logging(cuml_verbose)