From c3944ca3d15b2fe172a306ba03dffa7bb822a8d4 Mon Sep 17 00:00:00 2001 From: Akshay Venkatesh Date: Mon, 6 Jan 2025 21:26:25 +0000 Subject: [PATCH 1/4] UCT/CUDA_IPC: Use buffer id to detect VA recylcing --- src/uct/cuda/cuda_ipc/cuda_ipc_cache.c | 10 +++------- src/uct/cuda/cuda_ipc/cuda_ipc_md.c | 10 ++++++---- src/uct/cuda/cuda_ipc/cuda_ipc_md.h | 1 + 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c b/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c index 7e6e5429a2b..e5926af995c 100644 --- a/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c +++ b/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c @@ -482,11 +482,7 @@ UCS_PROFILE_FUNC(ucs_status_t, uct_cuda_ipc_map_memhandle, (key, mapped_addr), int ret; size_t cmp_size; -#if HAVE_CUDA_FABRIC - cmp_size = sizeof(key->ph.handle); -#else - cmp_size = sizeof(key->ph); -#endif + cmp_size = sizeof(key->ph.buffer_id); status = uct_cuda_ipc_get_remote_cache(key->pid, &cache); if (status != UCS_OK) { @@ -498,8 +494,8 @@ UCS_PROFILE_FUNC(ucs_status_t, uct_cuda_ipc_map_memhandle, (key, mapped_addr), &cache->pgtable, key->d_bptr); if (ucs_likely(pgt_region != NULL)) { region = ucs_derived_of(pgt_region, uct_cuda_ipc_cache_region_t); - if (memcmp((const void *)&key->ph, (const void *)®ion->key.ph, - cmp_size) == 0) { + if (memcmp((const void *)&key->ph.buffer_id, + (const void *)®ion->key.ph.buffer_id, cmp_size) == 0) { /*cache hit */ ucs_trace("%s: cuda_ipc cache hit addr:%p size:%lu region:" UCS_PGT_REGION_FMT, cache->name, (void *)key->d_bptr, diff --git a/src/uct/cuda/cuda_ipc/cuda_ipc_md.c b/src/uct/cuda/cuda_ipc/cuda_ipc_md.c index fd1f588657e..10829edb4a6 100644 --- a/src/uct/cuda/cuda_ipc/cuda_ipc_md.c +++ b/src/uct/cuda/cuda_ipc/cuda_ipc_md.c @@ -117,7 +117,7 @@ uct_cuda_ipc_mem_add_reg(void *addr, uct_cuda_ipc_memh_t *memh, uct_cuda_ipc_lkey_t *key; ucs_status_t status; #if HAVE_CUDA_FABRIC -#define UCT_CUDA_IPC_QUERY_NUM_ATTRS 2 +#define UCT_CUDA_IPC_QUERY_NUM_ATTRS 4 CUmemGenericAllocationHandle handle; CUmemoryPool mempool; CUpointer_attribute attr_type[UCT_CUDA_IPC_QUERY_NUM_ATTRS]; @@ -143,6 +143,10 @@ uct_cuda_ipc_mem_add_reg(void *addr, uct_cuda_ipc_memh_t *memh, attr_data[0] = &legacy_capable; attr_type[1] = CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES; attr_data[1] = &allowed_handle_types; + attr_type[2] = CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE; + attr_data[2] = &mempool; + attr_type[3] = CU_POINTER_ATTRIBUTE_BUFFER_ID; + attr_data[3] = &key->ph.buffer_id; status = UCT_CUDADRV_FUNC_LOG_ERR( cuPointerGetAttributes(ucs_static_array_size(attr_data), attr_type, @@ -185,9 +189,7 @@ uct_cuda_ipc_mem_add_reg(void *addr, uct_cuda_ipc_memh_t *memh, goto common_path; } - status = UCT_CUDADRV_FUNC_LOG_ERR(cuPointerGetAttribute(&mempool, - CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE, (CUdeviceptr)addr)); - if ((status != UCS_OK) || (mempool == 0)) { + if (mempool == 0) { /* cuda_ipc can only handle UCS_MEMORY_TYPE_CUDA, which has to be either * legacy type, or VMM type, or mempool type. Return error if memory * does not belong to any of the three types */ diff --git a/src/uct/cuda/cuda_ipc/cuda_ipc_md.h b/src/uct/cuda/cuda_ipc/cuda_ipc_md.h index eb621bd5ce8..d78611450e5 100644 --- a/src/uct/cuda/cuda_ipc/cuda_ipc_md.h +++ b/src/uct/cuda/cuda_ipc/cuda_ipc_md.h @@ -31,6 +31,7 @@ typedef struct uct_cuda_ipc_md_handle { } handle; CUmemPoolPtrExportData ptr; CUmemoryPool pool; + unsigned long long buffer_id; } uct_cuda_ipc_md_handle_t; #else typedef CUipcMemHandle uct_cuda_ipc_md_handle_t; From efcbcd56c88bf5e3c0b19b5ac43ee6f00b89ceea Mon Sep 17 00:00:00 2001 From: Akshay Venkatesh Date: Mon, 6 Jan 2025 22:16:10 +0000 Subject: [PATCH 2/4] UCT/CUDA_IPC: build fix for non-fabric cases --- src/uct/cuda/cuda_ipc/cuda_ipc_cache.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c b/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c index e5926af995c..258f591f9f0 100644 --- a/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c +++ b/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c @@ -480,10 +480,9 @@ UCS_PROFILE_FUNC(ucs_status_t, uct_cuda_ipc_map_memhandle, (key, mapped_addr), ucs_pgt_region_t *pgt_region; uct_cuda_ipc_cache_region_t *region; int ret; + const void *arg1, *arg2; size_t cmp_size; - cmp_size = sizeof(key->ph.buffer_id); - status = uct_cuda_ipc_get_remote_cache(key->pid, &cache); if (status != UCS_OK) { return status; @@ -494,8 +493,16 @@ UCS_PROFILE_FUNC(ucs_status_t, uct_cuda_ipc_map_memhandle, (key, mapped_addr), &cache->pgtable, key->d_bptr); if (ucs_likely(pgt_region != NULL)) { region = ucs_derived_of(pgt_region, uct_cuda_ipc_cache_region_t); - if (memcmp((const void *)&key->ph.buffer_id, - (const void *)®ion->key.ph.buffer_id, cmp_size) == 0) { +#if HAVE_CUDA_FABRIC + cmp_size = sizeof(key->ph.buffer_id); + arg1 = (const void *)&key->ph.buffer_id; + arg2 = (const void *)®ion->key.ph.buffer_id; +#else + cmp_size = sizeof(key->ph); + arg1 = (const void *)&key->ph; + arg2 = (const void *)®ion->key.ph; +#endif + if (memcmp(arg1, arg2, cmp_size) == 0) { /*cache hit */ ucs_trace("%s: cuda_ipc cache hit addr:%p size:%lu region:" UCS_PGT_REGION_FMT, cache->name, (void *)key->d_bptr, From 759925b9244d7245ecce84e9be894ff4aba984df Mon Sep 17 00:00:00 2001 From: Akshay Venkatesh Date: Wed, 8 Jan 2025 00:17:04 +0000 Subject: [PATCH 3/4] UCT/CUDA_IPC: free mapped ptr; defer pooldestroy to cache purge phase --- src/uct/cuda/cuda_ipc/cuda_ipc_cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c b/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c index 258f591f9f0..36f80b372a7 100644 --- a/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c +++ b/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c @@ -124,7 +124,7 @@ static ucs_status_t uct_cuda_ipc_close_memhandle(uct_cuda_ipc_cache_region_t *re (CUdeviceptr)region->mapped_addr, region->key.b_len)); } } else if (region->key.ph.handle_type == UCT_CUDA_IPC_KEY_HANDLE_TYPE_MEMPOOL) { - return UCT_CUDADRV_FUNC_LOG_WARN(cuMemPoolDestroy(region->key.ph.pool)); + return UCT_CUDADRV_FUNC_LOG_WARN(cuMemFree((CUdeviceptr)region->mapped_addr)); } else #endif { From b9685de606fa0c6af9492be801900b5383154292 Mon Sep 17 00:00:00 2001 From: Akshay Venkatesh Date: Wed, 8 Jan 2025 00:21:56 +0000 Subject: [PATCH 4/4] UCT/CUDA_IPC: fix indentation --- src/uct/cuda/cuda_ipc/cuda_ipc_cache.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c b/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c index 36f80b372a7..7e9d5fda83b 100644 --- a/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c +++ b/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c @@ -492,15 +492,15 @@ UCS_PROFILE_FUNC(ucs_status_t, uct_cuda_ipc_map_memhandle, (key, mapped_addr), pgt_region = UCS_PROFILE_CALL(ucs_pgtable_lookup, &cache->pgtable, key->d_bptr); if (ucs_likely(pgt_region != NULL)) { - region = ucs_derived_of(pgt_region, uct_cuda_ipc_cache_region_t); + region = ucs_derived_of(pgt_region, uct_cuda_ipc_cache_region_t); #if HAVE_CUDA_FABRIC - cmp_size = sizeof(key->ph.buffer_id); - arg1 = (const void *)&key->ph.buffer_id; - arg2 = (const void *)®ion->key.ph.buffer_id; + cmp_size = sizeof(key->ph.buffer_id); + arg1 = (const void*)&key->ph.buffer_id; + arg2 = (const void*)®ion->key.ph.buffer_id; #else - cmp_size = sizeof(key->ph); - arg1 = (const void *)&key->ph; - arg2 = (const void *)®ion->key.ph; + cmp_size = sizeof(key->ph); + arg1 = (const void*)&key->ph; + arg2 = (const void*)®ion->key.ph; #endif if (memcmp(arg1, arg2, cmp_size) == 0) { /*cache hit */