From 447e4f9e836a21f4024b204b4b608c14ec8b8dc7 Mon Sep 17 00:00:00 2001 From: Ken Raffenetti Date: Fri, 12 Jan 2024 09:23:04 -0600 Subject: [PATCH] mpl/cuda: Add thread safety to memory hooks CUDA memory allocation functions are thread-safe, so our hooks need to be, as well. Use a static MPL "initlock" to avoid the need for initialization, since the hooks are installed at link-time. --- src/mpl/src/gpu/mpl_gpu_cuda.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/mpl/src/gpu/mpl_gpu_cuda.c b/src/mpl/src/gpu/mpl_gpu_cuda.c index a73b00be79b..9255a79cf83 100644 --- a/src/mpl/src/gpu/mpl_gpu_cuda.c +++ b/src/mpl/src/gpu/mpl_gpu_cuda.c @@ -32,6 +32,7 @@ static CUresult CUDAAPI(*sys_cuMemFree) (CUdeviceptr dptr); static cudaError_t CUDARTAPI(*sys_cudaFree) (void *dptr); static int gpu_mem_hook_init(); +static MPL_initlock_t free_hook_mutex = MPL_INITLOCK_INITIALIZER; int MPL_gpu_get_dev_count(int *dev_cnt, int *dev_id, int *subdevice_id) { @@ -359,7 +360,9 @@ int MPL_gpu_init(int debug_summary) * in cuda, such as cudaFree and cuMemFree, to track user behaviors on * the memory buffer and invalidate cached handle/buffer respectively * for result correctness. */ + MPL_initlock_lock(&free_hook_mutex); gpu_mem_hook_init(); + MPL_initlock_unlock(&free_hook_mutex); gpu_initialized = 1; if (MPL_gpu_info.debug_summary) { @@ -388,11 +391,13 @@ int MPL_gpu_finalize(void) MPL_free(global_to_local_map); gpu_free_hook_s *prev; + MPL_initlock_lock(&free_hook_mutex); while (free_hook_chain) { prev = free_hook_chain; free_hook_chain = free_hook_chain->next; MPL_free(prev); } + MPL_initlock_unlock(&free_hook_mutex); /* Reset initialization state */ gpu_initialized = 0; @@ -483,6 +488,7 @@ static int gpu_mem_hook_init() assert(sys_cuMemFree); sys_cudaFree = (void *) dlsym(libcudart_handle, "cudaFree"); assert(sys_cudaFree); + return MPL_SUCCESS; } @@ -492,12 +498,15 @@ int MPL_gpu_free_hook_register(void (*free_hook) (void *dptr)) assert(hook_obj); hook_obj->free_hook = free_hook; hook_obj->next = NULL; + + MPL_initlock_lock(&free_hook_mutex); if (!free_hook_chain) free_hook_chain = hook_obj; else { hook_obj->next = free_hook_chain; free_hook_chain = hook_obj; } + MPL_initlock_unlock(&free_hook_mutex); return MPL_SUCCESS; } @@ -508,12 +517,15 @@ __attribute__ ((visibility("default"))) CUresult CUDAAPI cuMemFree(CUdeviceptr dptr) { CUresult result; + MPL_initlock_lock(&free_hook_mutex); if (!sys_cuMemFree) { gpu_mem_hook_init(); } gpu_free_hooks_cb((void *) dptr); result = sys_cuMemFree(dptr); + + MPL_initlock_unlock(&free_hook_mutex); return (result); } @@ -523,12 +535,15 @@ __attribute__ ((visibility("default"))) cudaError_t CUDARTAPI cudaFree(void *dptr) { cudaError_t result; + MPL_initlock_lock(&free_hook_mutex); if (!sys_cudaFree) { gpu_mem_hook_init(); } gpu_free_hooks_cb(dptr); result = sys_cudaFree(dptr); + + MPL_initlock_unlock(&free_hook_mutex); return result; }