From b1a32ba95f4c4caaf238cbe77638bf747f7e13c3 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Wed, 14 Feb 2024 14:25:22 +0000 Subject: [PATCH 1/3] [EXP][Command-Buffer] Fix CUDA Coverity issues Address issues in the CUDA adapter code added by https://github.com/oneapi-src/unified-runtime/pull/1089 flagged by Coverity. * Uninitalized struct member of `CUDA_KERNEL_NODE_PARAMS` struct * copying instead of moving a shared pointer to a node when constructing a command-handle. --- source/adapters/cuda/command_buffer.cpp | 12 +++++++----- source/adapters/cuda/command_buffer.hpp | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 3f7970df53..7beef3f3f8 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -72,11 +72,12 @@ ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() { ur_exp_command_buffer_command_handle_t_:: ur_exp_command_buffer_command_handle_t_( ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel, - std::shared_ptr Node, CUDA_KERNEL_NODE_PARAMS Params, + std::shared_ptr &&Node, CUDA_KERNEL_NODE_PARAMS Params, uint32_t WorkDim, const size_t *GlobalWorkOffsetPtr, const size_t *GlobalWorkSizePtr, const size_t *LocalWorkSizePtr) - : CommandBuffer(CommandBuffer), Kernel(Kernel), Node(Node), Params(Params), - WorkDim(WorkDim), RefCountInternal(1), RefCountExternal(1) { + : CommandBuffer(CommandBuffer), Kernel(Kernel), Node{std::move(Node)}, + Params(Params), WorkDim(WorkDim), RefCountInternal(1), + RefCountExternal(1) { CommandBuffer->incrementInternalReferenceCount(); const size_t CopySize = sizeof(size_t) * WorkDim; @@ -375,6 +376,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( NodeParams.blockDimZ = ThreadsPerBlock[2]; NodeParams.sharedMemBytes = LocalSize; NodeParams.kernelParams = const_cast(ArgIndices.data()); + NodeParams.kern = nullptr; NodeParams.extra = nullptr; // Create and add an new kernel node to the Cuda graph @@ -392,8 +394,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( } auto NewCommand = new ur_exp_command_buffer_command_handle_t_{ - hCommandBuffer, hKernel, NodeSP, NodeParams, - workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize}; + hCommandBuffer, hKernel, std::move(NodeSP), NodeParams, + workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize}; NewCommand->incrementInternalReferenceCount(); hCommandBuffer->CommandHandles.push_back(NewCommand); diff --git a/source/adapters/cuda/command_buffer.hpp b/source/adapters/cuda/command_buffer.hpp index e2b09059bf..60e03dc655 100644 --- a/source/adapters/cuda/command_buffer.hpp +++ b/source/adapters/cuda/command_buffer.hpp @@ -183,7 +183,7 @@ static inline const char *getUrResultString(ur_result_t Result) { struct ur_exp_command_buffer_command_handle_t_ { ur_exp_command_buffer_command_handle_t_( ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel, - std::shared_ptr Node, CUDA_KERNEL_NODE_PARAMS Params, + std::shared_ptr &&Node, CUDA_KERNEL_NODE_PARAMS Params, uint32_t WorkDim, const size_t *GlobalWorkOffsetPtr, const size_t *GlobalWorkSizePtr, const size_t *LocalWorkSizePtr); From 273ac6be0065e0ca553052447f1ec0b2ba154c0e Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Fri, 16 Feb 2024 16:14:51 +0000 Subject: [PATCH 2/3] Only destroy executable graph if command-buffer finalized --- source/adapters/cuda/command_buffer.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 7beef3f3f8..836854c627 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -66,7 +66,9 @@ ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() { cuGraphDestroy(CudaGraph); // Release the memory allocated to the CudaGraphExec - cuGraphExecDestroy(CudaGraphExec); + if (CudaGraphExec) { + cuGraphExecDestroy(CudaGraphExec); + } } ur_exp_command_buffer_command_handle_t_:: From be622e7cf127f652599f46b23f7e9e8e939e3a82 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Wed, 28 Feb 2024 13:28:53 +0000 Subject: [PATCH 3/3] [HIP][CMD-BUF] HIP coverity fixes * Don't throw from constructor * Move rather than copy nodes during sync-point registration * Initalize `NextSyncPoint` --- source/adapters/hip/command_buffer.cpp | 6 +++--- source/adapters/hip/command_buffer.hpp | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index 9fd3a06927..180c90d064 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -50,7 +50,7 @@ ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_( ur_context_handle_t hContext, ur_device_handle_t hDevice, bool IsUpdatable) : Context(hContext), Device(hDevice), IsUpdatable(IsUpdatable), HIPGraph{nullptr}, HIPGraphExec{nullptr}, - RefCountInternal{1}, RefCountExternal{1} { + RefCountInternal{1}, RefCountExternal{1}, NextSyncPoint{0} { urContextRetain(hContext); urDeviceRetain(hDevice); } @@ -65,11 +65,11 @@ ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() { UR_TRACE(urDeviceRelease(Device)); // Release the memory allocated to the HIPGraph - UR_CHECK_ERROR(hipGraphDestroy(HIPGraph)); + (void)hipGraphDestroy(HIPGraph); // Release the memory allocated to the HIPGraphExec if (HIPGraphExec) { - UR_CHECK_ERROR(hipGraphExecDestroy(HIPGraphExec)); + (void)hipGraphExecDestroy(HIPGraphExec); } } diff --git a/source/adapters/hip/command_buffer.hpp b/source/adapters/hip/command_buffer.hpp index f1b3e32bfb..53b648b5cd 100644 --- a/source/adapters/hip/command_buffer.hpp +++ b/source/adapters/hip/command_buffer.hpp @@ -254,8 +254,8 @@ struct ur_exp_command_buffer_handle_t_ { ~ur_exp_command_buffer_handle_t_(); void registerSyncPoint(ur_exp_command_buffer_sync_point_t SyncPoint, - std::shared_ptr HIPNode) { - SyncPoints[SyncPoint] = HIPNode; + std::shared_ptr &&HIPNode) { + SyncPoints[SyncPoint] = std::move(HIPNode); NextSyncPoint++; } @@ -269,7 +269,7 @@ struct ur_exp_command_buffer_handle_t_ { ur_exp_command_buffer_sync_point_t addSyncPoint(std::shared_ptr HIPNode) { ur_exp_command_buffer_sync_point_t SyncPoint = NextSyncPoint; - registerSyncPoint(SyncPoint, HIPNode); + registerSyncPoint(SyncPoint, std::move(HIPNode)); return SyncPoint; } uint32_t incrementInternalReferenceCount() noexcept {