diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 3f7970df53..836854c627 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -66,17 +66,20 @@ ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() { cuGraphDestroy(CudaGraph); // Release the memory allocated to the CudaGraphExec - cuGraphExecDestroy(CudaGraphExec); + if (CudaGraphExec) { + cuGraphExecDestroy(CudaGraphExec); + } } ur_exp_command_buffer_command_handle_t_:: ur_exp_command_buffer_command_handle_t_( ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel, - std::shared_ptr Node, CUDA_KERNEL_NODE_PARAMS Params, + std::shared_ptr &&Node, CUDA_KERNEL_NODE_PARAMS Params, uint32_t WorkDim, const size_t *GlobalWorkOffsetPtr, const size_t *GlobalWorkSizePtr, const size_t *LocalWorkSizePtr) - : CommandBuffer(CommandBuffer), Kernel(Kernel), Node(Node), Params(Params), - WorkDim(WorkDim), RefCountInternal(1), RefCountExternal(1) { + : CommandBuffer(CommandBuffer), Kernel(Kernel), Node{std::move(Node)}, + Params(Params), WorkDim(WorkDim), RefCountInternal(1), + RefCountExternal(1) { CommandBuffer->incrementInternalReferenceCount(); const size_t CopySize = sizeof(size_t) * WorkDim; @@ -375,6 +378,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( NodeParams.blockDimZ = ThreadsPerBlock[2]; NodeParams.sharedMemBytes = LocalSize; NodeParams.kernelParams = const_cast(ArgIndices.data()); + NodeParams.kern = nullptr; NodeParams.extra = nullptr; // Create and add an new kernel node to the Cuda graph @@ -392,8 +396,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( } auto NewCommand = new ur_exp_command_buffer_command_handle_t_{ - hCommandBuffer, hKernel, NodeSP, NodeParams, - workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize}; + hCommandBuffer, hKernel, std::move(NodeSP), NodeParams, + workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize}; NewCommand->incrementInternalReferenceCount(); hCommandBuffer->CommandHandles.push_back(NewCommand); diff --git a/source/adapters/cuda/command_buffer.hpp b/source/adapters/cuda/command_buffer.hpp index e2b09059bf..60e03dc655 100644 --- a/source/adapters/cuda/command_buffer.hpp +++ b/source/adapters/cuda/command_buffer.hpp @@ -183,7 +183,7 @@ static inline const char *getUrResultString(ur_result_t Result) { struct ur_exp_command_buffer_command_handle_t_ { ur_exp_command_buffer_command_handle_t_( ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel, - std::shared_ptr Node, CUDA_KERNEL_NODE_PARAMS Params, + std::shared_ptr &&Node, CUDA_KERNEL_NODE_PARAMS Params, uint32_t WorkDim, const size_t *GlobalWorkOffsetPtr, const size_t *GlobalWorkSizePtr, const size_t *LocalWorkSizePtr); diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index 9fd3a06927..180c90d064 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -50,7 +50,7 @@ ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_( ur_context_handle_t hContext, ur_device_handle_t hDevice, bool IsUpdatable) : Context(hContext), Device(hDevice), IsUpdatable(IsUpdatable), HIPGraph{nullptr}, HIPGraphExec{nullptr}, - RefCountInternal{1}, RefCountExternal{1} { + RefCountInternal{1}, RefCountExternal{1}, NextSyncPoint{0} { urContextRetain(hContext); urDeviceRetain(hDevice); } @@ -65,11 +65,11 @@ ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() { UR_TRACE(urDeviceRelease(Device)); // Release the memory allocated to the HIPGraph - UR_CHECK_ERROR(hipGraphDestroy(HIPGraph)); + (void)hipGraphDestroy(HIPGraph); // Release the memory allocated to the HIPGraphExec if (HIPGraphExec) { - UR_CHECK_ERROR(hipGraphExecDestroy(HIPGraphExec)); + (void)hipGraphExecDestroy(HIPGraphExec); } } diff --git a/source/adapters/hip/command_buffer.hpp b/source/adapters/hip/command_buffer.hpp index f1b3e32bfb..53b648b5cd 100644 --- a/source/adapters/hip/command_buffer.hpp +++ b/source/adapters/hip/command_buffer.hpp @@ -254,8 +254,8 @@ struct ur_exp_command_buffer_handle_t_ { ~ur_exp_command_buffer_handle_t_(); void registerSyncPoint(ur_exp_command_buffer_sync_point_t SyncPoint, - std::shared_ptr HIPNode) { - SyncPoints[SyncPoint] = HIPNode; + std::shared_ptr &&HIPNode) { + SyncPoints[SyncPoint] = std::move(HIPNode); NextSyncPoint++; } @@ -269,7 +269,7 @@ struct ur_exp_command_buffer_handle_t_ { ur_exp_command_buffer_sync_point_t addSyncPoint(std::shared_ptr HIPNode) { ur_exp_command_buffer_sync_point_t SyncPoint = NextSyncPoint; - registerSyncPoint(SyncPoint, HIPNode); + registerSyncPoint(SyncPoint, std::move(HIPNode)); return SyncPoint; } uint32_t incrementInternalReferenceCount() noexcept {