From 5eeb6cf7e7f9c9c6377d560e7eab00c19245a62b Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Thu, 8 Feb 2024 17:36:06 +0000 Subject: [PATCH] [EXP][Command-buffer] OpenCL kernel command update Implement the API for updating the kernel commands in a command-buffer defined by https://github.com/oneapi-src/unified-runtime/pull/1089 for the OpenCL adapter. However, the following changes to the UR kernel update API have been made based on implementation experience: 1. Forbid updating the work-dim of the kernel, see https://github.com/KhronosGroup/OpenCL-Docs/issues/1057 2. Remove struct fields to update exec info, after [DPC++ implementation prototype](https://github.com/intel/llvm/pull/12840) shows this isn't needed. This adapter implementation depends on support for the [cl_khr_command_buffer_mutable_dispatch](https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_command_buffer_mutable_dispatch) extension. Tested on Intel GPU/CPUs OpenCL implementations with the [command-buffer emulation layer](https://github.com/bashbaug/SimpleOpenCLSamples/tree/main/layers/10_cmdbufemu). ```bash $ OPENCL_LAYERS= ./bin/test-exp_command_buffer --platform="Intel(R) OpenCL Graphics" ``` DPC++ PR intel/llvm#12724 --- include/ur_api.h | 20 +- include/ur_print.h | 8 - include/ur_print.hpp | 65 ---- scripts/core/EXP-COMMAND-BUFFER.rst | 2 - scripts/core/exp-command-buffer.yml | 30 +- source/adapters/cuda/command_buffer.cpp | 7 + source/adapters/opencl/command_buffer.cpp | 326 ++++++++++++++---- source/adapters/opencl/command_buffer.hpp | 83 ++++- source/adapters/opencl/common.cpp | 32 ++ source/adapters/opencl/common.hpp | 9 + source/adapters/opencl/device.cpp | 6 +- source/loader/ur_libapi.cpp | 3 + source/loader/ur_print.cpp | 8 - source/ur_api.cpp | 3 + .../buffer_fill_kernel_update.cpp | 40 ++- .../buffer_saxpy_kernel_update.cpp | 33 +- .../conformance/exp_command_buffer/fixtures.h | 2 +- .../exp_command_buffer/invalid_update.cpp | 11 +- .../exp_command_buffer/ndrange_update.cpp | 67 ++-- .../usm_fill_kernel_update.cpp | 79 ----- .../usm_saxpy_kernel_update.cpp | 2 - 21 files changed, 501 insertions(+), 335 deletions(-) diff --git a/include/ur_api.h b/include/ur_api.h index 7ba79f4e13..b20fa89fcb 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -270,7 +270,6 @@ typedef enum ur_structure_type_t { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC = 0x1002, ///< ::ur_exp_command_buffer_update_memobj_arg_desc_t UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC = 0x1003, ///< ::ur_exp_command_buffer_update_pointer_arg_desc_t UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC = 0x1004, ///< ::ur_exp_command_buffer_update_value_arg_desc_t - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC = 0x1005, ///< ::ur_exp_command_buffer_update_exec_info_desc_t UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES = 0x2000, ///< ::ur_exp_sampler_mip_properties_t UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC = 0x2001, ///< ::ur_exp_interop_mem_desc_t UR_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC = 0x2002, ///< ::ur_exp_interop_semaphore_desc_t @@ -7896,19 +7895,6 @@ typedef struct ur_exp_command_buffer_update_value_arg_desc_t { } ur_exp_command_buffer_update_value_arg_desc_t; -/////////////////////////////////////////////////////////////////////////////// -/// @brief Descriptor type for updating kernel command execution info. -typedef struct ur_exp_command_buffer_update_exec_info_desc_t { - ur_structure_type_t stype; ///< [in] type of this structure, must be - ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC - const void *pNext; ///< [in][optional] pointer to extension-specific structure - ur_kernel_exec_info_t propName; ///< [in] Name of execution attribute. - size_t propSize; ///< [in] Size of execution attribute. - const ur_kernel_exec_info_properties_t *pProperties; ///< [in][optional] Pointer to execution info properties. - const void *pNewExecInfo; ///< [in] Pointer to memory location holding the execution info value. - -} ur_exp_command_buffer_update_exec_info_desc_t; - /////////////////////////////////////////////////////////////////////////////// /// @brief Descriptor type for updating a kernel launch command. typedef struct ur_exp_command_buffer_update_kernel_launch_desc_t { @@ -7918,7 +7904,6 @@ typedef struct ur_exp_command_buffer_update_kernel_launch_desc_t { uint32_t numNewMemObjArgs; ///< [in] Length of pNewMemObjArgList. uint32_t numNewPointerArgs; ///< [in] Length of pNewPointerArgList. uint32_t numNewValueArgs; ///< [in] Length of pNewValueArgList. - uint32_t numNewExecInfos; ///< [in] Length of pNewExecInfoList. uint32_t newWorkDim; ///< [in] Number of work dimensions in the kernel ND-range, from 1-3. const ur_exp_command_buffer_update_memobj_arg_desc_t *pNewMemObjArgList; ///< [in][optional][range(0, numNewMemObjArgs)] An array describing the new ///< kernel mem obj arguments for the command. @@ -7926,8 +7911,6 @@ typedef struct ur_exp_command_buffer_update_kernel_launch_desc_t { ///< new kernel pointer arguments for the command. const ur_exp_command_buffer_update_value_arg_desc_t *pNewValueArgList; ///< [in][optional][range(0, numNewValueArgs)] An array describing the new ///< kernel value arguments for the command. - const ur_exp_command_buffer_update_exec_info_desc_t *pNewExecInfoList; ///< [in][optional][range(0, numNewExecInfos)] An array describing the - ///< execution info objects for the command. size_t *pNewGlobalWorkOffset; ///< [in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned ///< values that describe the offset used to calculate the global ID. size_t *pNewGlobalWorkSize; ///< [in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned @@ -7970,6 +7953,8 @@ typedef struct ur_exp_command_buffer_command_handle_t_ *ur_exp_command_buffer_co /// + `NULL == phCommandBuffer` /// - ::UR_RESULT_ERROR_INVALID_CONTEXT /// - ::UR_RESULT_ERROR_INVALID_DEVICE +/// - ::UR_RESULT_ERROR_INVALID_OPERATION +/// + If `pCommandBufferDesc->isUpdatable` is true and `hDevice` does not support UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL @@ -8571,6 +8556,7 @@ urCommandBufferReleaseCommandExp( /// - ::UR_RESULT_ERROR_INVALID_OPERATION /// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. /// + If the command-buffer `hCommand` belongs to has not been finalized. +/// + If `pUpdateKernelLaunch->newWorkDim` is different from the work-dim used on creation of `hCommand`. /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX diff --git a/include/ur_print.h b/include/ur_print.h index b4675aee02..3912f4a6f8 100644 --- a/include/ur_print.h +++ b/include/ur_print.h @@ -978,14 +978,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintExpCommandBufferUpdatePointerArgDesc( /// - `buff_size < out_size` UR_APIEXPORT ur_result_t UR_APICALL urPrintExpCommandBufferUpdateValueArgDesc(const struct ur_exp_command_buffer_update_value_arg_desc_t params, char *buffer, const size_t buff_size, size_t *out_size); -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_exp_command_buffer_update_exec_info_desc_t struct -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_INVALID_SIZE -/// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintExpCommandBufferUpdateExecInfoDesc(const struct ur_exp_command_buffer_update_exec_info_desc_t params, char *buffer, const size_t buff_size, size_t *out_size); - /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_exp_command_buffer_update_kernel_launch_desc_t struct /// @returns diff --git a/include/ur_print.hpp b/include/ur_print.hpp index 35f0f2e9df..ad851b9f19 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -332,7 +332,6 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_memobj_arg_desc_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_pointer_arg_desc_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_value_arg_desc_t params); -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_exec_info_desc_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_kernel_launch_desc_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_exp_peer_info_t value); @@ -1044,9 +1043,6 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_structure_type_t value case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC: os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC"; break; - case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC: - os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC"; - break; case UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES: os << "UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES"; break; @@ -1282,11 +1278,6 @@ inline ur_result_t printStruct(std::ostream &os, const void *ptr) { printPtr(os, pstruct); } break; - case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC: { - const ur_exp_command_buffer_update_exec_info_desc_t *pstruct = (const ur_exp_command_buffer_update_exec_info_desc_t *)ptr; - printPtr(os, pstruct); - } break; - case UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES: { const ur_exp_sampler_mip_properties_t *pstruct = (const ur_exp_sampler_mip_properties_t *)ptr; printPtr(os, pstruct); @@ -9513,46 +9504,6 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_bu return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_exp_command_buffer_update_exec_info_desc_t type -/// @returns -/// std::ostream & -inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_buffer_update_exec_info_desc_t params) { - os << "(struct ur_exp_command_buffer_update_exec_info_desc_t){"; - - os << ".stype = "; - - os << (params.stype); - - os << ", "; - os << ".pNext = "; - - ur::details::printStruct(os, - (params.pNext)); - - os << ", "; - os << ".propName = "; - - os << (params.propName); - - os << ", "; - os << ".propSize = "; - - os << (params.propSize); - - os << ", "; - os << ".pProperties = "; - - os << (params.pProperties); - - os << ", "; - os << ".pNewExecInfo = "; - - os << (params.pNewExecInfo); - - os << "}"; - return os; -} -/////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_exp_command_buffer_update_kernel_launch_desc_t type /// @returns /// std::ostream & @@ -9584,11 +9535,6 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_bu os << (params.numNewValueArgs); - os << ", "; - os << ".numNewExecInfos = "; - - os << (params.numNewExecInfos); - os << ", "; os << ".newWorkDim = "; @@ -9627,17 +9573,6 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_bu } os << "}"; - os << ", "; - os << ".pNewExecInfoList = {"; - for (size_t i = 0; (params.pNewExecInfoList) != NULL && i < params.numNewExecInfos; ++i) { - if (i != 0) { - os << ", "; - } - - os << ((params.pNewExecInfoList))[i]; - } - os << "}"; - os << ", "; os << ".pNewGlobalWorkOffset = {"; for (size_t i = 0; (params.pNewGlobalWorkOffset) != NULL && i < params.newWorkDim; ++i) { diff --git a/scripts/core/EXP-COMMAND-BUFFER.rst b/scripts/core/EXP-COMMAND-BUFFER.rst index 0143b72c77..eb4656ba05 100644 --- a/scripts/core/EXP-COMMAND-BUFFER.rst +++ b/scripts/core/EXP-COMMAND-BUFFER.rst @@ -256,7 +256,6 @@ Enums * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC - * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC * ${x}_command_t * ${X}_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP * ${x}_function_t @@ -290,7 +289,6 @@ Types * ${x}_exp_command_buffer_update_memobj_arg_desc_t * ${x}_exp_command_buffer_update_pointer_arg_desc_t * ${x}_exp_command_buffer_update_value_arg_desc_t -* ${x}_exp_command_buffer_update_exec_info_desc_t * ${x}_exp_command_buffer_sync_point_t * ${x}_exp_command_buffer_handle_t * ${x}_exp_command_buffer_command_handle_t diff --git a/scripts/core/exp-command-buffer.yml b/scripts/core/exp-command-buffer.yml index d2292ceb22..998d9f3107 100644 --- a/scripts/core/exp-command-buffer.yml +++ b/scripts/core/exp-command-buffer.yml @@ -63,9 +63,6 @@ etors: - name: EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC desc: $x_exp_command_buffer_update_value_arg_desc_t value: "0x1004" - - name: EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC - desc: $x_exp_command_buffer_update_exec_info_desc_t - value: "0x1005" --- #-------------------------------------------------------------------------- type: enum extend: true @@ -163,24 +160,6 @@ members: desc: "[in][optional] Argument value representing matching kernel arg type to set at argument index." --- #-------------------------------------------------------------------------- type: struct -desc: "Descriptor type for updating kernel command execution info." -base: $x_base_desc_t -name: $x_exp_command_buffer_update_exec_info_desc_t -members: - - type: ur_kernel_exec_info_t - name: propName - desc: "[in] Name of execution attribute." - - type: size_t - name: propSize - desc: "[in] Size of execution attribute." - - type: "const ur_kernel_exec_info_properties_t *" - name: pProperties - desc: "[in][optional] Pointer to execution info properties." - - type: "const void *" - name: pNewExecInfo - desc: "[in] Pointer to memory location holding the execution info value." ---- #-------------------------------------------------------------------------- -type: struct desc: "Descriptor type for updating a kernel launch command." base: $x_base_desc_t name: $x_exp_command_buffer_update_kernel_launch_desc_t @@ -194,9 +173,6 @@ members: - type: uint32_t name: numNewValueArgs desc: "[in] Length of pNewValueArgList." - - type: uint32_t - name: numNewExecInfos - desc: "[in] Length of pNewExecInfoList." - type: uint32_t name: newWorkDim desc: "[in] Number of work dimensions in the kernel ND-range, from 1-3." @@ -209,9 +185,6 @@ members: - type: "const $x_exp_command_buffer_update_value_arg_desc_t*" name: pNewValueArgList desc: "[in][optional][range(0, numNewValueArgs)] An array describing the new kernel value arguments for the command." - - type: "const $x_exp_command_buffer_update_exec_info_desc_t*" - name: pNewExecInfoList - desc: "[in][optional][range(0, numNewExecInfos)] An array describing the execution info objects for the command." - type: "size_t*" name: pNewGlobalWorkOffset desc: "[in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned values that describe the offset used to calculate the global ID." @@ -261,6 +234,8 @@ params: returns: - $X_RESULT_ERROR_INVALID_CONTEXT - $X_RESULT_ERROR_INVALID_DEVICE + - $X_RESULT_ERROR_INVALID_OPERATION: + - "If `pCommandBufferDesc->isUpdatable` is true and `hDevice` does not support UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP." - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- @@ -924,6 +899,7 @@ returns: - $X_RESULT_ERROR_INVALID_OPERATION: - "If $x_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to." - "If the command-buffer `hCommand` belongs to has not been finalized." + - "If `pUpdateKernelLaunch->newWorkDim` is different from the work-dim used on creation of `hCommand`." - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP - $X_RESULT_ERROR_INVALID_MEM_OBJECT - $X_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 3f7970df53..aef2f13498 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -867,6 +867,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( return UR_RESULT_ERROR_INVALID_OPERATION; } + // Error if work dim changes + if (auto NewWorkDim = pUpdateKernelLaunch->newWorkDim) { + if (NewWorkDim != hCommand->WorkDim) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + } + // Kernel corresponding to the command to update ur_kernel_handle_t Kernel = hCommand->Kernel; diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 88c661b4ae..d85e505468 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -11,9 +11,51 @@ #include "command_buffer.hpp" #include "common.hpp" +namespace { +ur_result_t +commandBufferReleaseInternal(ur_exp_command_buffer_handle_t CommandBuffer) { + if (CommandBuffer->decrementInternalReferenceCount() != 0) { + return UR_RESULT_SUCCESS; + } + + delete CommandBuffer; + return UR_RESULT_SUCCESS; +} + +ur_result_t +commandHandleReleaseInternal(ur_exp_command_buffer_command_handle_t Command) { + if (Command->decrementInternalReferenceCount() != 0) { + return UR_RESULT_SUCCESS; + } + + // Decrement parent command-buffer internal ref count + commandBufferReleaseInternal(Command->hCommandBuffer); + + delete Command; + return UR_RESULT_SUCCESS; +} +} // end anonymous namespace + +/// The ur_exp_command_buffer_handle_t_ destructor calls CL release +/// command-buffer to free the underlying object. +ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() { + urQueueRelease(hInternalQueue); + + cl_context CLContext = cl_adapter::cast(hContext); + cl_ext::clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHR = nullptr; + cl_int Res = + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clReleaseCommandBufferKHRCache, + cl_ext::ReleaseCommandBufferName, &clReleaseCommandBufferKHR); + assert(Res == CL_SUCCESS); + (void)Res; + + clReleaseCommandBufferKHR(CLCommandBuffer); +} + UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, - [[maybe_unused]] const ur_exp_command_buffer_desc_t *pCommandBufferDesc, + const ur_exp_command_buffer_desc_t *pCommandBufferDesc, ur_exp_command_buffer_handle_t *phCommandBuffer) { ur_queue_handle_t Queue = nullptr; @@ -29,13 +71,28 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( if (!clCreateCommandBufferKHR || Res != CL_SUCCESS) return UR_RESULT_ERROR_INVALID_OPERATION; + const bool IsUpdatable = + pCommandBufferDesc ? pCommandBufferDesc->isUpdatable : false; + + bool DeviceSupportsUpdate = false; + cl_device_id CLDevice = cl_adapter::cast(hDevice); + CL_RETURN_ON_FAILURE(deviceSupportsURCommandBufferKernelUpdate( + CLDevice, DeviceSupportsUpdate)); + + if (IsUpdatable && !DeviceSupportsUpdate) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + + cl_command_buffer_properties_khr Properties[3] = { + CL_COMMAND_BUFFER_FLAGS_KHR, + IsUpdatable ? CL_COMMAND_BUFFER_MUTABLE_KHR : 0u, 0}; auto CLCommandBuffer = clCreateCommandBufferKHR( - 1, cl_adapter::cast(&Queue), nullptr, &Res); + 1, cl_adapter::cast(&Queue), Properties, &Res); CL_RETURN_ON_FAILURE_AND_SET_NULL(Res, phCommandBuffer); try { auto URCommandBuffer = std::make_unique( - Queue, hContext, CLCommandBuffer); + Queue, hContext, CLCommandBuffer, IsUpdatable); *phCommandBuffer = URCommandBuffer.release(); } catch (...) { return UR_RESULT_ERROR_OUT_OF_RESOURCES; @@ -47,38 +104,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer) { - UR_RETURN_ON_FAILURE(urQueueRetain(hCommandBuffer->hInternalQueue)); - - cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); - cl_ext::clRetainCommandBufferKHR_fn clRetainCommandBuffer = nullptr; - cl_int Res = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clRetainCommandBufferKHRCache, - cl_ext::RetainCommandBufferName, &clRetainCommandBuffer); - - if (!clRetainCommandBuffer || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; - - CL_RETURN_ON_FAILURE(clRetainCommandBuffer(hCommandBuffer->CLCommandBuffer)); + hCommandBuffer->incrementInternalReferenceCount(); + hCommandBuffer->incrementExternalReferenceCount(); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) { - UR_RETURN_ON_FAILURE(urQueueRelease(hCommandBuffer->hInternalQueue)); - - cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); - cl_ext::clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHR = nullptr; - cl_int Res = - cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clReleaseCommandBufferKHRCache, - cl_ext::ReleaseCommandBufferName, &clReleaseCommandBufferKHR); - - if (!clReleaseCommandBufferKHR || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; + if (hCommandBuffer->decrementExternalReferenceCount() == 0) { + // External ref count has reached zero, internal release of created + // commands. + for (auto Command : hCommandBuffer->CommandHandles) { + commandHandleReleaseInternal(Command); + } + } - CL_RETURN_ON_FAILURE( - clReleaseCommandBufferKHR(hCommandBuffer->CLCommandBuffer)); - return UR_RESULT_SUCCESS; + return commandBufferReleaseInternal(hCommandBuffer); } UR_APIEXPORT ur_result_t UR_APICALL @@ -95,6 +136,7 @@ urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) { CL_RETURN_ON_FAILURE( clFinalizeCommandBufferKHR(hCommandBuffer->CLCommandBuffer)); + hCommandBuffer->IsFinalized = true; return UR_RESULT_SUCCESS; } @@ -105,7 +147,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ur_exp_command_buffer_sync_point_t *pSyncPoint, - ur_exp_command_buffer_command_handle_t *) { + ur_exp_command_buffer_command_handle_t *phCommandHandle) { cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); cl_ext::clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR = nullptr; @@ -117,11 +159,35 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( if (!clCommandNDRangeKernelKHR || Res != CL_SUCCESS) return UR_RESULT_ERROR_INVALID_OPERATION; + cl_mutable_command_khr CommandHandle = nullptr; + cl_mutable_command_khr *OutCommandHandle = + hCommandBuffer->IsUpdatable ? &CommandHandle : nullptr; + + cl_ndrange_kernel_command_properties_khr UpdateProperties[] = { + CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, + CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR | + CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR | + CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR | + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR | CL_MUTABLE_DISPATCH_EXEC_INFO_KHR, + 0}; + + cl_ndrange_kernel_command_properties_khr *Properties = + hCommandBuffer->IsUpdatable ? UpdateProperties : nullptr; CL_RETURN_ON_FAILURE(clCommandNDRangeKernelKHR( - hCommandBuffer->CLCommandBuffer, nullptr, nullptr, + hCommandBuffer->CLCommandBuffer, nullptr, Properties, cl_adapter::cast(hKernel), workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint, nullptr)); + pSyncPointWaitList, pSyncPoint, OutCommandHandle)); + + try { + auto URCommandHandle = + std::make_unique( + hCommandBuffer, CommandHandle, workDim); + *phCommandHandle = URCommandHandle.release(); + hCommandBuffer->CommandHandles.push_back(*phCommandHandle); + } catch (...) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } return UR_RESULT_SUCCESS; } @@ -359,65 +425,189 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainCommandExp( - [[maybe_unused]] ur_exp_command_buffer_command_handle_t hCommand) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + ur_exp_command_buffer_command_handle_t hCommand) { + hCommand->incrementExternalReferenceCount(); + hCommand->incrementInternalReferenceCount(); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( - [[maybe_unused]] ur_exp_command_buffer_command_handle_t hCommand) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + ur_exp_command_buffer_command_handle_t hCommand) { + hCommand->decrementExternalReferenceCount(); + return commandHandleReleaseInternal(hCommand); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( - [[maybe_unused]] ur_exp_command_buffer_command_handle_t hCommand, - [[maybe_unused]] const ur_exp_command_buffer_update_kernel_launch_desc_t +namespace { +void updateKernelPointerArgs( + std::vector &CLUSMArgs, + const ur_exp_command_buffer_update_kernel_launch_desc_t *pUpdateKernelLaunch) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + + // WARNING - This relies on USM and SVM using the same implementation, + // which is not guaranteed. + // See https://github.com/KhronosGroup/OpenCL-Docs/issues/843 + const uint32_t NumPointerArgs = pUpdateKernelLaunch->numNewPointerArgs; + const ur_exp_command_buffer_update_pointer_arg_desc_t *ArgPointerList = + pUpdateKernelLaunch->pNewPointerArgList; + + CLUSMArgs.resize(NumPointerArgs); + for (uint32_t i = 0; i < NumPointerArgs; i++) { + const ur_exp_command_buffer_update_pointer_arg_desc_t &URPointerArg = + ArgPointerList[i]; + cl_mutable_dispatch_arg_khr &USMArg = CLUSMArgs[i]; + USMArg.arg_index = URPointerArg.argIndex; + USMArg.arg_value = *(void *const *)URPointerArg.pNewPointerArg; + } } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( - ur_exp_command_buffer_handle_t hCommandBuffer, - ur_exp_command_buffer_info_t propName, size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { +void updateKernelArgs(std::vector &CLArgs, + const ur_exp_command_buffer_update_kernel_launch_desc_t + *pUpdateKernelLaunch) { + const uint32_t NumMemobjArgs = pUpdateKernelLaunch->numNewMemObjArgs; + const ur_exp_command_buffer_update_memobj_arg_desc_t *ArgMemobjList = + pUpdateKernelLaunch->pNewMemObjArgList; + const uint32_t NumValueArgs = pUpdateKernelLaunch->numNewValueArgs; + const ur_exp_command_buffer_update_value_arg_desc_t *ArgValueList = + pUpdateKernelLaunch->pNewValueArgList; + + for (uint32_t i = 0; i < NumMemobjArgs; i++) { + const ur_exp_command_buffer_update_memobj_arg_desc_t &URMemObjArg = + ArgMemobjList[i]; + cl_mutable_dispatch_arg_khr CLArg{ + URMemObjArg.argIndex, // arg_index + sizeof(cl_mem), // arg_size + cl_adapter::cast( + &URMemObjArg.hNewMemObjArg) // arg_value + }; + + CLArgs.push_back(CLArg); + } + + for (uint32_t i = 0; i < NumValueArgs; i++) { + const ur_exp_command_buffer_update_value_arg_desc_t &URValueArg = + ArgValueList[i]; + cl_mutable_dispatch_arg_khr CLArg{ + URValueArg.argIndex, // arg_index + URValueArg.argSize, // arg_size + URValueArg.pNewValueArg // arg_value + }; + CLArgs.push_back(CLArg); + } +} +} // end anonymous namespace + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t hCommand, + const ur_exp_command_buffer_update_kernel_launch_desc_t + *pUpdateKernelLaunch) { + + ur_exp_command_buffer_handle_t hCommandBuffer = hCommand->hCommandBuffer; cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); - cl_ext::clGetCommandBufferInfoKHR_fn clGetCommandBufferInfoKHR = nullptr; + cl_ext::clUpdateMutableCommandsKHR_fn clUpdateMutableCommandsKHR = nullptr; cl_int Res = - cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clGetCommandBufferInfoKHRCache, - cl_ext::GetCommandBufferInfoName, &clGetCommandBufferInfoKHR); + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clUpdateMutableCommandsKHRCache, + cl_ext::UpdateMutableCommandsName, &clUpdateMutableCommandsKHR); + + if (!clUpdateMutableCommandsKHR || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; - if (!clGetCommandBufferInfoKHR || Res != CL_SUCCESS) + if (!hCommandBuffer->IsFinalized || !hCommandBuffer->IsUpdatable) return UR_RESULT_ERROR_INVALID_OPERATION; - if (propName != UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT) { - return UR_RESULT_ERROR_INVALID_ENUMERATION; + const cl_uint NewWorkDim = pUpdateKernelLaunch->newWorkDim; + if (NewWorkDim != 0 && NewWorkDim != hCommand->WorkDim) { + return UR_RESULT_ERROR_INVALID_OPERATION; } - if (pPropSizeRet) { - *pPropSizeRet = sizeof(cl_uint); + // Find the CL USM pointer arguments to the kernel to update + std::vector CLUSMArgs; + updateKernelPointerArgs(CLUSMArgs, pUpdateKernelLaunch); + + // Find the memory object and scalar arguments to the kernel to update + std::vector CLArgs; + + updateKernelArgs(CLArgs, pUpdateKernelLaunch); + + // Find the updated ND-Range configuration of the kernel. + std::vector CLGlobalWorkOffset, CLGlobalWorkSize, CLLocalWorkSize; + cl_uint &CommandWorkDim = hCommand->WorkDim; + + // Lambda for N-Dimensional update + auto updateNDRange = [CommandWorkDim](std::vector &NDRange, + size_t *UpdatePtr) { + NDRange.resize(CommandWorkDim, 0); + const size_t CopySize = sizeof(size_t) * CommandWorkDim; + std::memcpy(NDRange.data(), UpdatePtr, CopySize); + }; + + if (auto GlobalWorkOffsetPtr = pUpdateKernelLaunch->pNewGlobalWorkOffset) { + updateNDRange(CLGlobalWorkOffset, GlobalWorkOffsetPtr); } - cl_uint ref_count; - CL_RETURN_ON_FAILURE(clGetCommandBufferInfoKHR( - hCommandBuffer->CLCommandBuffer, CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR, - sizeof(ref_count), &ref_count, nullptr)); + if (auto GlobalWorkSizePtr = pUpdateKernelLaunch->pNewGlobalWorkSize) { + updateNDRange(CLGlobalWorkSize, GlobalWorkSizePtr); + } - if (pPropValue) { - if (propSize != sizeof(cl_uint)) { - return UR_RESULT_ERROR_INVALID_SIZE; - } - static_assert(sizeof(cl_uint) == sizeof(uint32_t)); - *static_cast(pPropValue) = static_cast(ref_count); + if (auto LocalWorkSizePtr = pUpdateKernelLaunch->pNewLocalWorkSize) { + updateNDRange(CLLocalWorkSize, LocalWorkSizePtr); } + cl_mutable_command_khr command = + cl_adapter::cast(hCommand->CLMutableCommand); + cl_mutable_dispatch_config_khr dispatch_config = { + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + static_cast(CLArgs.size()), // num_args + static_cast(CLUSMArgs.size()), // num_svm_args + 0, // num_exec_infos + CommandWorkDim, // work_dim + CLArgs.data(), // arg_list + CLUSMArgs.data(), // arg_svm_list + nullptr, // exec_info_list + CLGlobalWorkOffset.data(), // global_work_offset + CLGlobalWorkSize.data(), // global_work_size + CLLocalWorkSize.data(), // local_work_size + }; + cl_mutable_base_config_khr config = { + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, &dispatch_config}; + CL_RETURN_ON_FAILURE( + clUpdateMutableCommandsKHR(hCommandBuffer->CLCommandBuffer, &config)); + return UR_RESULT_SUCCESS; } +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( + ur_exp_command_buffer_handle_t hCommandBuffer, + ur_exp_command_buffer_info_t propName, size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { + + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + + switch (propName) { + case UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT: + return ReturnValue(hCommandBuffer->getExternalReferenceCount()); + default: + assert(!"Command-buffer info request not implemented"); + } + + return UR_RESULT_ERROR_INVALID_ENUMERATION; +} + UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( - [[maybe_unused]] ur_exp_command_buffer_command_handle_t hCommand, - [[maybe_unused]] ur_exp_command_buffer_command_info_t propName, - [[maybe_unused]] size_t propSize, [[maybe_unused]] void *pPropValue, - [[maybe_unused]] size_t *pPropSizeRet) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + ur_exp_command_buffer_command_handle_t hCommand, + ur_exp_command_buffer_command_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) { + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + + switch (propName) { + case UR_EXP_COMMAND_BUFFER_COMMAND_INFO_REFERENCE_COUNT: + return ReturnValue(hCommand->getExternalReferenceCount()); + default: + assert(!"Command-buffer command info request not implemented"); + } + + return UR_RESULT_ERROR_INVALID_ENUMERATION; } diff --git a/source/adapters/opencl/command_buffer.hpp b/source/adapters/opencl/command_buffer.hpp index d80f29594b..8f3bdbb55b 100644 --- a/source/adapters/opencl/command_buffer.hpp +++ b/source/adapters/opencl/command_buffer.hpp @@ -11,14 +11,93 @@ #include #include +/// Handle to a kernel command. +struct ur_exp_command_buffer_command_handle_t_ { + /// Command-buffer this command belongs to. + ur_exp_command_buffer_handle_t hCommandBuffer; + /// OpenCL command-handle. + cl_mutable_command_khr CLMutableCommand; + /// Work-dimension the command was originally created with. + cl_uint WorkDim; + /// Internal & External reference counts. + /// We need to maintain these because in OpenCL a command-handle isn't + /// reference counting, but is tied to the lifetime of the parent + /// command-buffer. This is not the case in UR where a command-handle is + /// reference counted. + std::atomic_uint32_t RefCountInternal; + std::atomic_uint32_t RefCountExternal; + + ur_exp_command_buffer_command_handle_t_( + ur_exp_command_buffer_handle_t hCommandBuffer, + cl_mutable_command_khr CLMutableCommand, cl_uint WorkDim) + : hCommandBuffer(hCommandBuffer), CLMutableCommand(CLMutableCommand), + WorkDim(WorkDim), RefCountInternal(0), RefCountExternal(0) {} + + uint32_t incrementInternalReferenceCount() noexcept { + return ++RefCountInternal; + } + uint32_t decrementInternalReferenceCount() noexcept { + return --RefCountInternal; + } + + uint32_t incrementExternalReferenceCount() noexcept { + return ++RefCountExternal; + } + uint32_t decrementExternalReferenceCount() noexcept { + return --RefCountExternal; + } + uint32_t getExternalReferenceCount() const noexcept { + return RefCountExternal; + } +}; + +/// Handle to a command-buffer object. struct ur_exp_command_buffer_handle_t_ { + /// UR queue belonging to the command-buffer, required for OpenCL creation. ur_queue_handle_t hInternalQueue; + /// Context the command-buffer is created for. ur_context_handle_t hContext; + /// OpenCL command-buffer object. cl_command_buffer_khr CLCommandBuffer; + /// Set to true if the kernel commands in the command-buffer can be updated, + /// false otherwise + bool IsUpdatable; + /// Set to true if the command-buffer has been finalized, false otherwise + bool IsFinalized; + /// List of commands in the command-buffer. + std::vector CommandHandles; + /// Internal & External reference counts of the command-buffer. We do this + /// manually rather than forward to the OpenCL retain/release APIs because + /// we also need to track the lifetimes of command handle objects, which + /// extended the lifetime of a UR command-buffer even if its reference + /// count is zero. + std::atomic_uint32_t RefCountInternal; + std::atomic_uint32_t RefCountExternal; ur_exp_command_buffer_handle_t_(ur_queue_handle_t hQueue, ur_context_handle_t hContext, - cl_command_buffer_khr CLCommandBuffer) + cl_command_buffer_khr CLCommandBuffer, + bool IsUpdatable) : hInternalQueue(hQueue), hContext(hContext), - CLCommandBuffer(CLCommandBuffer) {} + CLCommandBuffer(CLCommandBuffer), IsUpdatable(IsUpdatable), + IsFinalized(false), RefCountInternal(0), RefCountExternal(0) {} + + ~ur_exp_command_buffer_handle_t_(); + + uint32_t incrementInternalReferenceCount() noexcept { + return ++RefCountInternal; + } + uint32_t decrementInternalReferenceCount() noexcept { + return --RefCountInternal; + } + + uint32_t incrementExternalReferenceCount() noexcept { + return ++RefCountExternal; + } + uint32_t decrementExternalReferenceCount() noexcept { + return --RefCountExternal; + } + uint32_t getExternalReferenceCount() const noexcept { + return RefCountExternal; + } }; diff --git a/source/adapters/opencl/common.cpp b/source/adapters/opencl/common.cpp index 4fe8bed408..267c4fc705 100644 --- a/source/adapters/opencl/common.cpp +++ b/source/adapters/opencl/common.cpp @@ -101,3 +101,35 @@ ur_result_t getNativeHandle(void *URObj, ur_native_handle_t *NativeHandle) { *NativeHandle = reinterpret_cast(URObj); return UR_RESULT_SUCCESS; } + +cl_int deviceSupportsURCommandBufferKernelUpdate(cl_device_id Dev, + bool &Result) { + size_t ExtSize = 0; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize)); + + std::string ExtStr(ExtSize, '\0'); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, ExtSize, + ExtStr.data(), nullptr)); + + std::string SupportedExtensions(ExtStr.c_str()); + if (ExtStr.find("cl_khr_command_buffer_mutable_dispatch") == + std::string::npos) { + Result = false; + return CL_SUCCESS; + } + + // All the CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR capabilities must + // be supported by a device for UR update. + cl_mutable_dispatch_fields_khr mutable_capabilities; + CL_RETURN_ON_FAILURE(clGetDeviceInfo( + Dev, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, + sizeof(mutable_capabilities), &mutable_capabilities, nullptr)); + const cl_mutable_dispatch_fields_khr required_caps = + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR | + CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR | + CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR | CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR | + CL_MUTABLE_DISPATCH_EXEC_INFO_KHR; + Result = (mutable_capabilities & required_caps) == required_caps; + return CL_SUCCESS; +} diff --git a/source/adapters/opencl/common.hpp b/source/adapters/opencl/common.hpp index 0667cd3d17..bdb94c3feb 100644 --- a/source/adapters/opencl/common.hpp +++ b/source/adapters/opencl/common.hpp @@ -215,6 +215,7 @@ CONSTFIX char CommandCopyBufferRectName[] = "clCommandCopyBufferRectKHR"; CONSTFIX char CommandFillBufferName[] = "clCommandFillBufferKHR"; CONSTFIX char EnqueueCommandBufferName[] = "clEnqueueCommandBufferKHR"; CONSTFIX char GetCommandBufferInfoName[] = "clGetCommandBufferInfoKHR"; +CONSTFIX char UpdateMutableCommandsName[] = "clUpdateMutableCommandsKHR"; #undef CONSTFIX @@ -305,6 +306,10 @@ using clGetCommandBufferInfoKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)( cl_command_buffer_khr command_buffer, cl_command_buffer_info_khr param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret); +using clUpdateMutableCommandsKHR_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_command_buffer_khr command_buffer, + const cl_mutable_base_config_khr *mutable_config); + template struct FuncPtrCache { std::map Map; std::mutex Mutex; @@ -344,6 +349,7 @@ struct ExtFuncPtrCacheT { FuncPtrCache clCommandFillBufferKHRCache; FuncPtrCache clEnqueueCommandBufferKHRCache; FuncPtrCache clGetCommandBufferInfoKHRCache; + FuncPtrCache clUpdateMutableCommandsKHRCache; }; // A raw pointer is used here since the lifetime of this map has to be tied to // piTeardown to avoid issues with static destruction order (a user application @@ -414,3 +420,6 @@ static ur_result_t getExtFuncFromContext(cl_context Context, ur_result_t mapCLErrorToUR(cl_int Result); ur_result_t getNativeHandle(void *URObj, ur_native_handle_t *NativeHandle); + +cl_int deviceSupportsURCommandBufferKernelUpdate(cl_device_id Dev, + bool &Result); diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index 115b9b2e09..7792839d29 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -967,7 +967,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, std::string::npos); } case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: { - return ReturnValue(false); + cl_device_id Dev = cl_adapter::cast(hDevice); + bool Supported = false; + CL_RETURN_ON_FAILURE( + deviceSupportsURCommandBufferKernelUpdate(Dev, Supported)); + return ReturnValue(Supported); } default: { return UR_RESULT_ERROR_INVALID_ENUMERATION; diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 7cbbdffb1c..abfa2d8500 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -7076,6 +7076,8 @@ ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /// + `NULL == phCommandBuffer` /// - ::UR_RESULT_ERROR_INVALID_CONTEXT /// - ::UR_RESULT_ERROR_INVALID_DEVICE +/// - ::UR_RESULT_ERROR_INVALID_OPERATION +/// + If `pCommandBufferDesc->isUpdatable` is true and `hDevice` does not support UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferCreateExp( @@ -7960,6 +7962,7 @@ ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( /// - ::UR_RESULT_ERROR_INVALID_OPERATION /// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. /// + If the command-buffer `hCommand` belongs to has not been finalized. +/// + If `pUpdateKernelLaunch->newWorkDim` is different from the work-dim used on creation of `hCommand`. /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX diff --git a/source/loader/ur_print.cpp b/source/loader/ur_print.cpp index 1e8ad88086..3496ccf4bf 100644 --- a/source/loader/ur_print.cpp +++ b/source/loader/ur_print.cpp @@ -987,14 +987,6 @@ ur_result_t urPrintExpCommandBufferUpdateValueArgDesc( return str_copy(&ss, buffer, buff_size, out_size); } -ur_result_t urPrintExpCommandBufferUpdateExecInfoDesc( - const struct ur_exp_command_buffer_update_exec_info_desc_t params, - char *buffer, const size_t buff_size, size_t *out_size) { - std::stringstream ss; - ss << params; - return str_copy(&ss, buffer, buff_size, out_size); -} - ur_result_t urPrintExpCommandBufferUpdateKernelLaunchDesc( const struct ur_exp_command_buffer_update_kernel_launch_desc_t params, char *buffer, const size_t buff_size, size_t *out_size) { diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 665e75548b..0cbf1af308 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -6010,6 +6010,8 @@ ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /// + `NULL == phCommandBuffer` /// - ::UR_RESULT_ERROR_INVALID_CONTEXT /// - ::UR_RESULT_ERROR_INVALID_DEVICE +/// - ::UR_RESULT_ERROR_INVALID_OPERATION +/// + If `pCommandBufferDesc->isUpdatable` is true and `hDevice` does not support UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferCreateExp( @@ -6729,6 +6731,7 @@ ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( /// - ::UR_RESULT_ERROR_INVALID_OPERATION /// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. /// + If the command-buffer `hCommand` belongs to has not been finalized. +/// + If `pUpdateKernelLaunch->newWorkDim` is different from the work-dim used on creation of `hCommand`. /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX diff --git a/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp index e7fac99800..c682cc4f93 100644 --- a/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp @@ -14,10 +14,28 @@ struct BufferFillCommandTest UUR_RETURN_ON_FATAL_FAILURE( urUpdatableCommandBufferExpExecutionTest::SetUp()); - // First argument is buffer to fill (will also be hidden accessor arg) - AddBuffer1DArg(sizeof(val) * global_size, &buffer); + ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, + sizeof(val) * global_size, nullptr, + &buffer)); + + if (backend != UR_PLATFORM_BACKEND_OPENCL) { + // First argument is buffer to fill + ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, 0, nullptr, buffer)); + } else { + // First argument is buffer to fill + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 0, sizeof(buffer), + nullptr, &buffer)); + } + // second arg is hidden accessor + struct { + size_t offsets[1] = {0}; + } accessor; + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(accessor), nullptr, + &accessor)); + // Second argument is scalar to fill with. - AddPodArg(val); + ASSERT_SUCCESS( + urKernelSetArgValue(kernel, 2, sizeof(val), nullptr, &val)); // Append kernel command to command-buffer and close command-buffer ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( @@ -99,12 +117,10 @@ TEST_P(BufferFillCommandTest, UpdateParameters) { 1, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs - 0, // numNewExecInfos 0, // newWorkDim &new_output_desc, // pNewMemObjArgList nullptr, // pNewPointerArgList &new_input_desc, // pNewValueArgList - nullptr, // pNewExecInfoList nullptr, // pNewGlobalWorkOffset nullptr, // pNewGlobalWorkSize nullptr, // pNewLocalWorkSize @@ -152,12 +168,10 @@ TEST_P(BufferFillCommandTest, UpdateGlobalSize) { 1, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs - 0, // numNewExecInfos 0, // newWorkDim &new_output_desc, // pNewMemObjArgList nullptr, // pNewPointerArgList nullptr, // pNewValueArgList - nullptr, // pNewExecInfoList nullptr, // pNewGlobalWorkOffset &new_global_size, // pNewGlobalWorkSize nullptr, // pNewLocalWorkSize @@ -204,12 +218,10 @@ TEST_P(BufferFillCommandTest, SeparateUpdateCalls) { 1, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs - 0, // numNewExecInfos 0, // newWorkDim &new_output_desc, // pNewMemObjArgList nullptr, // pNewPointerArgList nullptr, // pNewValueArgList - nullptr, // pNewExecInfoList nullptr, // pNewGlobalWorkOffset nullptr, // pNewGlobalWorkSize nullptr, // pNewLocalWorkSize @@ -234,12 +246,10 @@ TEST_P(BufferFillCommandTest, SeparateUpdateCalls) { 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs - 0, // numNewExecInfos 0, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList &new_input_desc, // pNewValueArgList - nullptr, // pNewExecInfoList nullptr, // pNewGlobalWorkOffset nullptr, // pNewGlobalWorkSize nullptr, // pNewLocalWorkSize @@ -253,12 +263,10 @@ TEST_P(BufferFillCommandTest, SeparateUpdateCalls) { 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs - 0, // numNewExecInfos 0, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList nullptr, // pNewValueArgList - nullptr, // pNewExecInfoList nullptr, // pNewGlobalWorkOffset &new_global_size, // pNewGlobalWorkSize nullptr, // pNewLocalWorkSize @@ -299,12 +307,10 @@ TEST_P(BufferFillCommandTest, OverrideUpdate) { 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs - 0, // numNewExecInfos 0, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList &first_input_desc, // pNewValueArgList - nullptr, // pNewExecInfoList nullptr, // pNewGlobalWorkOffset nullptr, // pNewGlobalWorkSize nullptr, // pNewLocalWorkSize @@ -328,12 +334,10 @@ TEST_P(BufferFillCommandTest, OverrideUpdate) { 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs - 0, // numNewExecInfos 0, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList &second_input_desc, // pNewValueArgList - nullptr, // pNewExecInfoList nullptr, // pNewGlobalWorkOffset nullptr, // pNewGlobalWorkSize nullptr, // pNewLocalWorkSize @@ -386,12 +390,10 @@ TEST_P(BufferFillCommandTest, OverrideArgList) { 0, // numNewMemObjArgs 0, // numNewPointerArgs 2, // numNewValueArgs - 0, // numNewExecInfos 0, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList input_descs, // pNewValueArgList - nullptr, // pNewExecInfoList nullptr, // pNewGlobalWorkOffset nullptr, // pNewGlobalWorkSize nullptr, // pNewLocalWorkSize diff --git a/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp index b29ad8c6c5..2fa28fb435 100644 --- a/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp @@ -31,9 +31,16 @@ struct BufferSaxpyKernelTest // Variable that is incremented as arguments are added to the kernel size_t current_arg_index = 0; + // Index 0 is output buffer for HIP/Non-HIP - ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, current_arg_index++, - nullptr, buffers[0])); + if (backend != UR_PLATFORM_BACKEND_OPENCL) { + ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, current_arg_index++, + nullptr, buffers[0])); + } else { + ASSERT_SUCCESS(urKernelSetArgValue(kernel, current_arg_index++, + sizeof(ur_mem_handle_t), nullptr, + &buffers[0])); + } // Lambda to add accessor arguments depending on backend. // HIP has 3 offset parameters and other backends only have 1. @@ -67,15 +74,27 @@ struct BufferSaxpyKernelTest sizeof(A), nullptr, &A)); // Index 5 on HIP and 3 on non-HIP is X buffer - ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, current_arg_index++, - nullptr, buffers[1])); + if (backend != UR_PLATFORM_BACKEND_OPENCL) { + ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, current_arg_index++, + nullptr, buffers[1])); + } else { + ASSERT_SUCCESS(urKernelSetArgValue(kernel, current_arg_index++, + sizeof(ur_mem_handle_t), nullptr, + &buffers[1])); + } // Index 8 on HIP and 4 on non-HIP is X buffer accessor addAccessorArgs(); // Index 9 on HIP and 5 on non-HIP is Y buffer - ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, current_arg_index++, - nullptr, buffers[2])); + if (backend != UR_PLATFORM_BACKEND_OPENCL) { + ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, current_arg_index++, + nullptr, buffers[2])); + } else { + ASSERT_SUCCESS(urKernelSetArgValue(kernel, current_arg_index++, + sizeof(ur_mem_handle_t), nullptr, + &buffers[2])); + } // Index 12 on HIP and 6 on non-HIP is Y buffer accessor addAccessorArgs(); @@ -186,12 +205,10 @@ TEST_P(BufferSaxpyKernelTest, UpdateParameters) { 2, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs - 0, // numNewExecInfos 0, // newWorkDim new_input_descs, // pNewMemObjArgList nullptr, // pNewPointerArgList &new_A_desc, // pNewValueArgList - nullptr, // pNewExecInfoList nullptr, // pNewGlobalWorkOffset nullptr, // pNewGlobalWorkSize nullptr, // pNewLocalWorkSize diff --git a/test/conformance/exp_command_buffer/fixtures.h b/test/conformance/exp_command_buffer/fixtures.h index c8a198224b..cbf441db2f 100644 --- a/test/conformance/exp_command_buffer/fixtures.h +++ b/test/conformance/exp_command_buffer/fixtures.h @@ -100,7 +100,7 @@ struct urCommandBufferExpExecutionTest : uur::urKernelExecutionTest { ur_exp_command_buffer_handle_t cmd_buf_handle = nullptr; ur_bool_t updatable_command_buffer_support = false; - ur_platform_backend_t backend; + ur_platform_backend_t backend{}; }; struct urUpdatableCommandBufferExpExecutionTest diff --git a/test/conformance/exp_command_buffer/invalid_update.cpp b/test/conformance/exp_command_buffer/invalid_update.cpp index 00cf04ea85..8f06fdc801 100644 --- a/test/conformance/exp_command_buffer/invalid_update.cpp +++ b/test/conformance/exp_command_buffer/invalid_update.cpp @@ -41,6 +41,13 @@ struct InvalidUpdateTest } void TearDown() override { + // Workaround an issue with the OpenCL adapter implementing urUsmFree + // using a blocking free where hangs + if (updatable_cmd_buf_handle) { + EXPECT_SUCCESS( + urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + if (shared_ptr) { EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); } @@ -84,12 +91,10 @@ TEST_P(InvalidUpdateTest, NotFinalizedCommandBuffer) { 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs - 0, // numNewExecInfos 0, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList &new_input_desc, // pNewValueArgList - nullptr, // pNewExecInfoList nullptr, // pNewGlobalWorkOffset nullptr, // pNewGlobalWorkSize nullptr, // pNewLocalWorkSize @@ -135,12 +140,10 @@ TEST_P(InvalidUpdateTest, NotUpdatableCommandBuffer) { 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs - 0, // numNewExecInfos 0, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList &new_input_desc, // pNewValueArgList - nullptr, // pNewExecInfoList nullptr, // pNewGlobalWorkOffset nullptr, // pNewGlobalWorkSize nullptr, // pNewLocalWorkSize diff --git a/test/conformance/exp_command_buffer/ndrange_update.cpp b/test/conformance/exp_command_buffer/ndrange_update.cpp index e5631f9176..a2dd440ff2 100644 --- a/test/conformance/exp_command_buffer/ndrange_update.cpp +++ b/test/conformance/exp_command_buffer/ndrange_update.cpp @@ -130,12 +130,10 @@ TEST_P(NDRangeUpdateTest, Update3D) { 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs - 0, // numNewExecInfos 3, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList nullptr, // pNewValueArgList - nullptr, // pNewExecInfoList new_global_offset.data(), // pNewGlobalWorkOffset nullptr, // pNewGlobalWorkSize new_local_size.data(), // pNewLocalWorkSize @@ -152,8 +150,8 @@ TEST_P(NDRangeUpdateTest, Update3D) { Validate(global_size, new_local_size, new_global_offset); } -// Update the kernel work dimensions to 2, and update global size, local size, -// and global offset to new values. +// Update the kernel work dimensions to use 1 in the Z dimension, +// and update global size, local size, and global offset to new values. TEST_P(NDRangeUpdateTest, Update2D) { // Run command-buffer prior to update an verify output ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, @@ -176,12 +174,10 @@ TEST_P(NDRangeUpdateTest, Update2D) { 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs - 0, // numNewExecInfos - 2, // newWorkDim + 3, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList nullptr, // pNewValueArgList - nullptr, // pNewExecInfoList new_global_offset.data(), // pNewGlobalWorkOffset new_global_size.data(), // pNewGlobalWorkSize new_local_size.data(), // pNewLocalWorkSize @@ -202,8 +198,9 @@ TEST_P(NDRangeUpdateTest, Update2D) { Validate(new_global_size, new_local_size, new_global_offset); } -// Update the kernel work dimensions to 1, and check that previously -// set global size, local size, and global offset update accordingly. +// Update the kernel work dimensions to be 1 in Y & Z dimensions, and check +// that the previously set global size, local size, and global offset update +// accordingly. TEST_P(NDRangeUpdateTest, Update1D) { // Run command-buffer prior to update an verify output ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, @@ -212,21 +209,22 @@ TEST_P(NDRangeUpdateTest, Update1D) { Validate(global_size, local_size, global_offset); // Set dimensions to 1 + std::array new_global_size = {9, 1, 1}; + std::array new_local_size = {3, 1, 1}; + std::array new_global_offset = {0, 0, 0}; ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - 0, // numNewMemObjArgs - 0, // numNewPointerArgs - 0, // numNewValueArgs - 0, // numNewExecInfos - 1, // newWorkDim - nullptr, // pNewMemObjArgList - nullptr, // pNewPointerArgList - nullptr, // pNewValueArgList - nullptr, // pNewExecInfoList - nullptr, // pNewGlobalWorkOffset - nullptr, // pNewGlobalWorkSize - nullptr, // pNewLocalWorkSize + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + 3, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + new_global_offset.data(), // pNewGlobalWorkOffset + new_global_size.data(), // pNewGlobalWorkSize + new_local_size.data(), // pNewLocalWorkSize }; // Reset output to remove old values which will no longer have a @@ -241,8 +239,29 @@ TEST_P(NDRangeUpdateTest, Update1D) { ASSERT_SUCCESS(urQueueFinish(queue)); // Verify that update occurred correctly - std::array new_global_size = {global_size[0], 1, 1}; - std::array new_local_size = {local_size[0], 1, 1}; - std::array new_global_offset = {global_offset[0], 0, 0}; Validate(new_global_size, new_local_size, new_global_offset); } + +// Test error code is returned if work dimension parameter changes +TEST_P(NDRangeUpdateTest, Invalid) { + const size_t new_work_dim = n_dimensions - 1; + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + new_work_dim, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Update command to command-buffer to use different work dim + ur_result_t result = + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc); + ASSERT_EQ(UR_RESULT_ERROR_INVALID_OPERATION, result); +} diff --git a/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp index 7e6cab6ee3..576dc54519 100644 --- a/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp @@ -121,12 +121,10 @@ TEST_P(USMFillCommandTest, UpdateParameters) { 0, // numNewMemObjArgs 1, // numNewPointerArgs 1, // numNewValueArgs - 0, // numNewExecInfos 0, // newWorkDim nullptr, // pNewMemObjArgList &new_output_desc, // pNewPointerArgList &new_input_desc, // pNewValueArgList - nullptr, // pNewExecInfoList nullptr, // pNewGlobalWorkOffset &new_global_size, // pNewGlobalWorkSize nullptr, // pNewLocalWorkSize @@ -143,81 +141,6 @@ TEST_P(USMFillCommandTest, UpdateParameters) { Validate((uint32_t *)new_shared_ptr, new_global_size, new_val); } -// Test updating the kernel execution info -TEST_P(USMFillCommandTest, UpdateExecInfo) { - // Run command-buffer prior to update an verify output - ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, - nullptr, nullptr)); - ASSERT_SUCCESS(urQueueFinish(queue)); - Validate((uint32_t *)shared_ptr, global_size, val); - - ur_exp_command_buffer_update_exec_info_desc_t new_exec_info_descs[3]; - - // Update direct access flag - bool indirect_access = false; - new_exec_info_descs[0] = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC, // stype - nullptr, // pNext - UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS, // propName - sizeof(indirect_access), // propSize - nullptr, // pProperties - &indirect_access, // pPropValue - }; - - // Update cache config - ur_kernel_cache_config_t cache_config = UR_KERNEL_CACHE_CONFIG_DEFAULT; - new_exec_info_descs[1] = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC, // stype - nullptr, // pNext - UR_KERNEL_EXEC_INFO_CACHE_CONFIG, // propName - sizeof(cache_config), // propSize - nullptr, // pProperties - &cache_config, // pPropValue - }; - - // Create a new USM allocation to set indirect access for - ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, - allocation_size, &new_shared_ptr)); - ASSERT_NE(new_shared_ptr, nullptr); - void *pointers = {new_shared_ptr}; - new_exec_info_descs[2] = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC, // stype - nullptr, // pNext - UR_KERNEL_EXEC_INFO_USM_PTRS, // propName - sizeof(pointers), // propSize - nullptr, // pProperties - &pointers, // pPropValue - }; - - ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype - nullptr, // pNext - 0, // numNewMemObjArgs - 0, // numNewPointerArgs - 0, // numNewValueArgs - 3, // numNewExecInfos - 0, // newWorkDim - nullptr, // pNewMemObjArgList - nullptr, // pNewPointerArgList - nullptr, // pNewValueArgList - new_exec_info_descs, // pNewExecInfoList - nullptr, // pNewGlobalWorkOffset - nullptr, // pNewGlobalWorkSize - nullptr, // pNewLocalWorkSize - }; - - // Update kernel and enqueue command-buffer again - ASSERT_SUCCESS( - urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); - ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, - nullptr, nullptr)); - ASSERT_SUCCESS(urQueueFinish(queue)); - - // Verify results are correct, although exec info modifications should - // have no effect on output - Validate((uint32_t *)shared_ptr, global_size, val); -} - // Test updating a command-buffer with multiple USM fill kernel commands struct USMMultipleFillCommandTest : uur::command_buffer::urUpdatableCommandBufferExpExecutionTest { @@ -349,12 +272,10 @@ TEST_P(USMMultipleFillCommandTest, UpdateAllKernels) { 0, // numNewMemObjArgs 1, // numNewPointerArgs 1, // numNewValueArgs - 0, // numNewExecInfos 0, // newWorkDim nullptr, // pNewMemObjArgList &new_output_desc, // pNewPointerArgList &new_input_desc, // pNewValueArgList - nullptr, // pNewExecInfoList nullptr, // pNewGlobalWorkOffset nullptr, // pNewGlobalWorkSize nullptr, // pNewLocalWorkSize diff --git a/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp index b3f9f93fe1..8f213e8b24 100644 --- a/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp @@ -138,12 +138,10 @@ TEST_P(USMSaxpyKernelTest, UpdateParameters) { 0, // numNewMemObjArgs 2, // numNewPointerArgs 1, // numNewValueArgs - 0, // numNewExecInfos 0, // newWorkDim nullptr, // pNewMemObjArgList new_input_descs, // pNewPointerArgList &new_A_desc, // pNewValueArgList - nullptr, // pNewExecInfoList nullptr, // pNewGlobalWorkOffset nullptr, // pNewGlobalWorkSize nullptr, // pNewLocalWorkSize