From 447e4f9e836a21f4024b204b4b608c14ec8b8dc7 Mon Sep 17 00:00:00 2001
From: Ken Raffenetti <raffenet@mcs.anl.gov>
Date: Fri, 12 Jan 2024 09:23:04 -0600
Subject: [PATCH] mpl/cuda: Add thread safety to memory hooks

CUDA memory allocation functions are thread-safe, so our hooks need to
be, as well. Use a static MPL "initlock" to avoid the need for
initialization, since the hooks are installed at link-time.
---
 src/mpl/src/gpu/mpl_gpu_cuda.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/src/mpl/src/gpu/mpl_gpu_cuda.c b/src/mpl/src/gpu/mpl_gpu_cuda.c
index a73b00be79b..9255a79cf83 100644
--- a/src/mpl/src/gpu/mpl_gpu_cuda.c
+++ b/src/mpl/src/gpu/mpl_gpu_cuda.c
@@ -32,6 +32,7 @@ static CUresult CUDAAPI(*sys_cuMemFree) (CUdeviceptr dptr);
 static cudaError_t CUDARTAPI(*sys_cudaFree) (void *dptr);
 
 static int gpu_mem_hook_init();
+static MPL_initlock_t free_hook_mutex = MPL_INITLOCK_INITIALIZER;
 
 int MPL_gpu_get_dev_count(int *dev_cnt, int *dev_id, int *subdevice_id)
 {
@@ -359,7 +360,9 @@ int MPL_gpu_init(int debug_summary)
      * in cuda, such as cudaFree and cuMemFree, to track user behaviors on
      * the memory buffer and invalidate cached handle/buffer respectively
      * for result correctness. */
+    MPL_initlock_lock(&free_hook_mutex);
     gpu_mem_hook_init();
+    MPL_initlock_unlock(&free_hook_mutex);
     gpu_initialized = 1;
 
     if (MPL_gpu_info.debug_summary) {
@@ -388,11 +391,13 @@ int MPL_gpu_finalize(void)
     MPL_free(global_to_local_map);
 
     gpu_free_hook_s *prev;
+    MPL_initlock_lock(&free_hook_mutex);
     while (free_hook_chain) {
         prev = free_hook_chain;
         free_hook_chain = free_hook_chain->next;
         MPL_free(prev);
     }
+    MPL_initlock_unlock(&free_hook_mutex);
 
     /* Reset initialization state */
     gpu_initialized = 0;
@@ -483,6 +488,7 @@ static int gpu_mem_hook_init()
     assert(sys_cuMemFree);
     sys_cudaFree = (void *) dlsym(libcudart_handle, "cudaFree");
     assert(sys_cudaFree);
+
     return MPL_SUCCESS;
 }
 
@@ -492,12 +498,15 @@ int MPL_gpu_free_hook_register(void (*free_hook) (void *dptr))
     assert(hook_obj);
     hook_obj->free_hook = free_hook;
     hook_obj->next = NULL;
+
+    MPL_initlock_lock(&free_hook_mutex);
     if (!free_hook_chain)
         free_hook_chain = hook_obj;
     else {
         hook_obj->next = free_hook_chain;
         free_hook_chain = hook_obj;
     }
+    MPL_initlock_unlock(&free_hook_mutex);
 
     return MPL_SUCCESS;
 }
@@ -508,12 +517,15 @@ __attribute__ ((visibility("default")))
 CUresult CUDAAPI cuMemFree(CUdeviceptr dptr)
 {
     CUresult result;
+    MPL_initlock_lock(&free_hook_mutex);
     if (!sys_cuMemFree) {
         gpu_mem_hook_init();
     }
 
     gpu_free_hooks_cb((void *) dptr);
     result = sys_cuMemFree(dptr);
+
+    MPL_initlock_unlock(&free_hook_mutex);
     return (result);
 }
 
@@ -523,12 +535,15 @@ __attribute__ ((visibility("default")))
 cudaError_t CUDARTAPI cudaFree(void *dptr)
 {
     cudaError_t result;
+    MPL_initlock_lock(&free_hook_mutex);
     if (!sys_cudaFree) {
         gpu_mem_hook_init();
     }
 
     gpu_free_hooks_cb(dptr);
     result = sys_cudaFree(dptr);
+
+    MPL_initlock_unlock(&free_hook_mutex);
     return result;
 }