Skip to content

Commit

Permalink
Merge pull request #6861 from raffenet/fix-for-rc2
Browse files Browse the repository at this point in the history
[4.2.x] mpl/cuda: Fix potential crash in memory hooks
  • Loading branch information
raffenet authored Jan 16, 2024
2 parents a79d91e + 447e4f9 commit e0b7245
Showing 1 changed file with 23 additions and 0 deletions.
23 changes: 23 additions & 0 deletions src/mpl/src/gpu/mpl_gpu_cuda.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ static CUresult CUDAAPI(*sys_cuMemFree) (CUdeviceptr dptr);
static cudaError_t CUDARTAPI(*sys_cudaFree) (void *dptr);

static int gpu_mem_hook_init();
static MPL_initlock_t free_hook_mutex = MPL_INITLOCK_INITIALIZER;

int MPL_gpu_get_dev_count(int *dev_cnt, int *dev_id, int *subdevice_id)
{
Expand Down Expand Up @@ -359,7 +360,9 @@ int MPL_gpu_init(int debug_summary)
* in cuda, such as cudaFree and cuMemFree, to track user behaviors on
* the memory buffer and invalidate cached handle/buffer respectively
* for result correctness. */
MPL_initlock_lock(&free_hook_mutex);
gpu_mem_hook_init();
MPL_initlock_unlock(&free_hook_mutex);
gpu_initialized = 1;

if (MPL_gpu_info.debug_summary) {
Expand Down Expand Up @@ -388,11 +391,13 @@ int MPL_gpu_finalize(void)
MPL_free(global_to_local_map);

gpu_free_hook_s *prev;
MPL_initlock_lock(&free_hook_mutex);
while (free_hook_chain) {
prev = free_hook_chain;
free_hook_chain = free_hook_chain->next;
MPL_free(prev);
}
MPL_initlock_unlock(&free_hook_mutex);

/* Reset initialization state */
gpu_initialized = 0;
Expand Down Expand Up @@ -483,6 +488,7 @@ static int gpu_mem_hook_init()
assert(sys_cuMemFree);
sys_cudaFree = (void *) dlsym(libcudart_handle, "cudaFree");
assert(sys_cudaFree);

return MPL_SUCCESS;
}

Expand All @@ -492,12 +498,15 @@ int MPL_gpu_free_hook_register(void (*free_hook) (void *dptr))
assert(hook_obj);
hook_obj->free_hook = free_hook;
hook_obj->next = NULL;

MPL_initlock_lock(&free_hook_mutex);
if (!free_hook_chain)
free_hook_chain = hook_obj;
else {
hook_obj->next = free_hook_chain;
free_hook_chain = hook_obj;
}
MPL_initlock_unlock(&free_hook_mutex);

return MPL_SUCCESS;
}
Expand All @@ -508,8 +517,15 @@ __attribute__ ((visibility("default")))
CUresult CUDAAPI cuMemFree(CUdeviceptr dptr)
{
CUresult result;
MPL_initlock_lock(&free_hook_mutex);
if (!sys_cuMemFree) {
gpu_mem_hook_init();
}

gpu_free_hooks_cb((void *) dptr);
result = sys_cuMemFree(dptr);

MPL_initlock_unlock(&free_hook_mutex);
return (result);
}

Expand All @@ -519,8 +535,15 @@ __attribute__ ((visibility("default")))
cudaError_t CUDARTAPI cudaFree(void *dptr)
{
cudaError_t result;
MPL_initlock_lock(&free_hook_mutex);
if (!sys_cudaFree) {
gpu_mem_hook_init();
}

gpu_free_hooks_cb(dptr);
result = sys_cudaFree(dptr);

MPL_initlock_unlock(&free_hook_mutex);
return result;
}

Expand Down

0 comments on commit e0b7245

Please sign in to comment.