From 1397916f6558f34eab10ea968f5dacb54dac53fa Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Thu, 8 Feb 2024 17:36:06 +0000 Subject: [PATCH] [EXP][Command-buffer] OpenCL kernel command update Implement the API for updating the kernel commands in a command-buffer defined by https://github.com/oneapi-src/unified-runtime/pull/1089 for the OpenCL adapter. However, the following changes to the UR kernel update API have been made based on implementation experience: 1. Forbid updating the work-dim of the kernel, see https://github.com/KhronosGroup/OpenCL-Docs/issues/1057 2. Remove struct fields to update exec info, after [DPC++ implementation prototype](https://github.com/intel/llvm/pull/12840) shows this isn't needed. 3. Forbid changing the local work size from user to impl defined and vice-versa. See discussion in [L0 implementation PR](https://github.com/oneapi-src/unified-runtime/pull/1353#discussion_r1510859516). This adapter implementation depends on support for the [cl_khr_command_buffer_mutable_dispatch](https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_command_buffer_mutable_dispatch) extension. Tested on Intel GPU/CPUs OpenCL implementations with the [command-buffer emulation layer](https://github.com/bashbaug/SimpleOpenCLSamples/tree/main/layers/10_cmdbufemu). ```bash $ OPENCL_LAYERS= ./bin/test-exp_command_buffer --platform="Intel(R) OpenCL Graphics" ``` DPC++ PR intel/llvm#12724 --- include/ur_api.h | 32 +- include/ur_print.h | 8 - include/ur_print.hpp | 65 ---- scripts/core/EXP-COMMAND-BUFFER.rst | 2 - scripts/core/exp-command-buffer.yml | 37 +- source/adapters/cuda/command_buffer.cpp | 27 +- source/adapters/cuda/command_buffer.hpp | 5 + source/adapters/hip/command_buffer.cpp | 27 +- source/adapters/hip/command_buffer.hpp | 5 + source/adapters/level_zero/command_buffer.cpp | 82 ++--- source/adapters/level_zero/command_buffer.hpp | 7 +- source/adapters/null/ur_nullddi.cpp | 4 +- source/adapters/opencl/command_buffer.cpp | 340 ++++++++++++++---- source/adapters/opencl/command_buffer.hpp | 87 ++++- source/adapters/opencl/common.cpp | 32 ++ source/adapters/opencl/common.hpp | 9 + source/adapters/opencl/device.cpp | 6 +- source/loader/layers/tracing/ur_trcddi.cpp | 4 +- source/loader/layers/validation/ur_valddi.cpp | 8 +- source/loader/ur_ldrddi.cpp | 4 +- source/loader/ur_libapi.cpp | 11 +- source/loader/ur_print.cpp | 8 - source/ur_api.cpp | 11 +- .../buffer_fill_kernel_update.cpp | 38 +- .../buffer_saxpy_kernel_update.cpp | 2 - .../conformance/exp_command_buffer/fixtures.h | 2 +- .../exp_command_buffer/invalid_update.cpp | 109 +++++- .../exp_command_buffer/ndrange_update.cpp | 72 ++-- .../usm_fill_kernel_update.cpp | 79 ---- .../usm_saxpy_kernel_update.cpp | 2 - 30 files changed, 713 insertions(+), 412 deletions(-) diff --git a/include/ur_api.h b/include/ur_api.h index d4ea8d3404..7a2d76597e 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -271,7 +271,6 @@ typedef enum ur_structure_type_t { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC = 0x1002, ///< ::ur_exp_command_buffer_update_memobj_arg_desc_t UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC = 0x1003, ///< ::ur_exp_command_buffer_update_pointer_arg_desc_t UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC = 0x1004, ///< ::ur_exp_command_buffer_update_value_arg_desc_t - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC = 0x1005, ///< ::ur_exp_command_buffer_update_exec_info_desc_t UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES = 0x2000, ///< ::ur_exp_sampler_mip_properties_t UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC = 0x2001, ///< ::ur_exp_interop_mem_desc_t UR_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC = 0x2002, ///< ::ur_exp_interop_semaphore_desc_t @@ -8022,19 +8021,6 @@ typedef struct ur_exp_command_buffer_update_value_arg_desc_t { } ur_exp_command_buffer_update_value_arg_desc_t; -/////////////////////////////////////////////////////////////////////////////// -/// @brief Descriptor type for updating kernel command execution info. -typedef struct ur_exp_command_buffer_update_exec_info_desc_t { - ur_structure_type_t stype; ///< [in] type of this structure, must be - ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC - const void *pNext; ///< [in][optional] pointer to extension-specific structure - ur_kernel_exec_info_t propName; ///< [in] Name of execution attribute. - size_t propSize; ///< [in] Size of execution attribute. - const ur_kernel_exec_info_properties_t *pProperties; ///< [in][optional] Pointer to execution info properties. - const void *pNewExecInfo; ///< [in] Pointer to memory location holding the execution info value. - -} ur_exp_command_buffer_update_exec_info_desc_t; - /////////////////////////////////////////////////////////////////////////////// /// @brief Descriptor type for updating a kernel launch command. typedef struct ur_exp_command_buffer_update_kernel_launch_desc_t { @@ -8044,7 +8030,6 @@ typedef struct ur_exp_command_buffer_update_kernel_launch_desc_t { uint32_t numNewMemObjArgs; ///< [in] Length of pNewMemObjArgList. uint32_t numNewPointerArgs; ///< [in] Length of pNewPointerArgList. uint32_t numNewValueArgs; ///< [in] Length of pNewValueArgList. - uint32_t numNewExecInfos; ///< [in] Length of pNewExecInfoList. uint32_t newWorkDim; ///< [in] Number of work dimensions in the kernel ND-range, from 1-3. const ur_exp_command_buffer_update_memobj_arg_desc_t *pNewMemObjArgList; ///< [in][optional][range(0, numNewMemObjArgs)] An array describing the new ///< kernel mem obj arguments for the command. @@ -8052,16 +8037,16 @@ typedef struct ur_exp_command_buffer_update_kernel_launch_desc_t { ///< new kernel pointer arguments for the command. const ur_exp_command_buffer_update_value_arg_desc_t *pNewValueArgList; ///< [in][optional][range(0, numNewValueArgs)] An array describing the new ///< kernel value arguments for the command. - const ur_exp_command_buffer_update_exec_info_desc_t *pNewExecInfoList; ///< [in][optional][range(0, numNewExecInfos)] An array describing the - ///< execution info objects for the command. size_t *pNewGlobalWorkOffset; ///< [in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned ///< values that describe the offset used to calculate the global ID. size_t *pNewGlobalWorkSize; ///< [in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned ///< values that describe the number of global work-items. size_t *pNewLocalWorkSize; ///< [in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned ///< values that describe the number of work-items that make up a - ///< work-group. If nullptr, the runtime implementation will choose the - ///< work-group size. + ///< work-group. If newWorkDim is non-zero and pNewLocalWorkSize is + ///< nullptr, then runtime implementation will choose the work-group size. + ///< If newWorkDim is zero and pNewLocalWorkSize is nullptr, then the local + ///< work size is unchanged. } ur_exp_command_buffer_update_kernel_launch_desc_t; @@ -8096,6 +8081,8 @@ typedef struct ur_exp_command_buffer_command_handle_t_ *ur_exp_command_buffer_co /// + `NULL == phCommandBuffer` /// - ::UR_RESULT_ERROR_INVALID_CONTEXT /// - ::UR_RESULT_ERROR_INVALID_DEVICE +/// - ::UR_RESULT_ERROR_INVALID_OPERATION +/// + If `pCommandBufferDesc->isUpdatable` is true and `hDevice` does not support UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL @@ -8176,7 +8163,6 @@ urCommandBufferFinalizeExp( /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pGlobalWorkOffset` /// + `NULL == pGlobalWorkSize` -/// + `NULL == pLocalWorkSize` /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP /// - ::UR_RESULT_ERROR_INVALID_KERNEL /// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION @@ -8195,7 +8181,7 @@ urCommandBufferAppendKernelLaunchExp( uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t *pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. - const size_t *pLocalWorkSize, ///< [in] Local work size to use when executing kernel. + const size_t *pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. ur_exp_command_buffer_sync_point_t *pSyncPoint, ///< [out][optional] Sync point associated with this command. @@ -8697,6 +8683,10 @@ urCommandBufferReleaseCommandExp( /// - ::UR_RESULT_ERROR_INVALID_OPERATION /// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. /// + If the command-buffer `hCommand` belongs to has not been finalized. +/// + If `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim used on creation of `hCommand`. +/// + If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL. +/// + If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value when `hCommand` was created with a NULL local work size. +/// + If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value when `hCommand` was created with a non-NULL local work size. /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX diff --git a/include/ur_print.h b/include/ur_print.h index ecb85cb519..3126c1714b 100644 --- a/include/ur_print.h +++ b/include/ur_print.h @@ -994,14 +994,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintExpCommandBufferUpdatePointerArgDesc( /// - `buff_size < out_size` UR_APIEXPORT ur_result_t UR_APICALL urPrintExpCommandBufferUpdateValueArgDesc(const struct ur_exp_command_buffer_update_value_arg_desc_t params, char *buffer, const size_t buff_size, size_t *out_size); -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_exp_command_buffer_update_exec_info_desc_t struct -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_INVALID_SIZE -/// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintExpCommandBufferUpdateExecInfoDesc(const struct ur_exp_command_buffer_update_exec_info_desc_t params, char *buffer, const size_t buff_size, size_t *out_size); - /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_exp_command_buffer_update_kernel_launch_desc_t struct /// @returns diff --git a/include/ur_print.hpp b/include/ur_print.hpp index d770cf2a45..63eeb17a0e 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -334,7 +334,6 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_memobj_arg_desc_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_pointer_arg_desc_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_value_arg_desc_t params); -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_exec_info_desc_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_kernel_launch_desc_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_exp_peer_info_t value); @@ -1049,9 +1048,6 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_structure_type_t value case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC: os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC"; break; - case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC: - os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC"; - break; case UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES: os << "UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES"; break; @@ -1290,11 +1286,6 @@ inline ur_result_t printStruct(std::ostream &os, const void *ptr) { printPtr(os, pstruct); } break; - case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC: { - const ur_exp_command_buffer_update_exec_info_desc_t *pstruct = (const ur_exp_command_buffer_update_exec_info_desc_t *)ptr; - printPtr(os, pstruct); - } break; - case UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES: { const ur_exp_sampler_mip_properties_t *pstruct = (const ur_exp_sampler_mip_properties_t *)ptr; printPtr(os, pstruct); @@ -9620,46 +9611,6 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_bu return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_exp_command_buffer_update_exec_info_desc_t type -/// @returns -/// std::ostream & -inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_buffer_update_exec_info_desc_t params) { - os << "(struct ur_exp_command_buffer_update_exec_info_desc_t){"; - - os << ".stype = "; - - os << (params.stype); - - os << ", "; - os << ".pNext = "; - - ur::details::printStruct(os, - (params.pNext)); - - os << ", "; - os << ".propName = "; - - os << (params.propName); - - os << ", "; - os << ".propSize = "; - - os << (params.propSize); - - os << ", "; - os << ".pProperties = "; - - os << (params.pProperties); - - os << ", "; - os << ".pNewExecInfo = "; - - os << (params.pNewExecInfo); - - os << "}"; - return os; -} -/////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_exp_command_buffer_update_kernel_launch_desc_t type /// @returns /// std::ostream & @@ -9691,11 +9642,6 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_bu os << (params.numNewValueArgs); - os << ", "; - os << ".numNewExecInfos = "; - - os << (params.numNewExecInfos); - os << ", "; os << ".newWorkDim = "; @@ -9734,17 +9680,6 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_bu } os << "}"; - os << ", "; - os << ".pNewExecInfoList = {"; - for (size_t i = 0; (params.pNewExecInfoList) != NULL && i < params.numNewExecInfos; ++i) { - if (i != 0) { - os << ", "; - } - - os << ((params.pNewExecInfoList))[i]; - } - os << "}"; - os << ", "; os << ".pNewGlobalWorkOffset = {"; for (size_t i = 0; (params.pNewGlobalWorkOffset) != NULL && i < params.newWorkDim; ++i) { diff --git a/scripts/core/EXP-COMMAND-BUFFER.rst b/scripts/core/EXP-COMMAND-BUFFER.rst index 0143b72c77..eb4656ba05 100644 --- a/scripts/core/EXP-COMMAND-BUFFER.rst +++ b/scripts/core/EXP-COMMAND-BUFFER.rst @@ -256,7 +256,6 @@ Enums * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC - * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC * ${x}_command_t * ${X}_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP * ${x}_function_t @@ -290,7 +289,6 @@ Types * ${x}_exp_command_buffer_update_memobj_arg_desc_t * ${x}_exp_command_buffer_update_pointer_arg_desc_t * ${x}_exp_command_buffer_update_value_arg_desc_t -* ${x}_exp_command_buffer_update_exec_info_desc_t * ${x}_exp_command_buffer_sync_point_t * ${x}_exp_command_buffer_handle_t * ${x}_exp_command_buffer_command_handle_t diff --git a/scripts/core/exp-command-buffer.yml b/scripts/core/exp-command-buffer.yml index d2292ceb22..6eb02229ed 100644 --- a/scripts/core/exp-command-buffer.yml +++ b/scripts/core/exp-command-buffer.yml @@ -63,9 +63,6 @@ etors: - name: EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC desc: $x_exp_command_buffer_update_value_arg_desc_t value: "0x1004" - - name: EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC - desc: $x_exp_command_buffer_update_exec_info_desc_t - value: "0x1005" --- #-------------------------------------------------------------------------- type: enum extend: true @@ -163,24 +160,6 @@ members: desc: "[in][optional] Argument value representing matching kernel arg type to set at argument index." --- #-------------------------------------------------------------------------- type: struct -desc: "Descriptor type for updating kernel command execution info." -base: $x_base_desc_t -name: $x_exp_command_buffer_update_exec_info_desc_t -members: - - type: ur_kernel_exec_info_t - name: propName - desc: "[in] Name of execution attribute." - - type: size_t - name: propSize - desc: "[in] Size of execution attribute." - - type: "const ur_kernel_exec_info_properties_t *" - name: pProperties - desc: "[in][optional] Pointer to execution info properties." - - type: "const void *" - name: pNewExecInfo - desc: "[in] Pointer to memory location holding the execution info value." ---- #-------------------------------------------------------------------------- -type: struct desc: "Descriptor type for updating a kernel launch command." base: $x_base_desc_t name: $x_exp_command_buffer_update_kernel_launch_desc_t @@ -194,9 +173,6 @@ members: - type: uint32_t name: numNewValueArgs desc: "[in] Length of pNewValueArgList." - - type: uint32_t - name: numNewExecInfos - desc: "[in] Length of pNewExecInfoList." - type: uint32_t name: newWorkDim desc: "[in] Number of work dimensions in the kernel ND-range, from 1-3." @@ -209,9 +185,6 @@ members: - type: "const $x_exp_command_buffer_update_value_arg_desc_t*" name: pNewValueArgList desc: "[in][optional][range(0, numNewValueArgs)] An array describing the new kernel value arguments for the command." - - type: "const $x_exp_command_buffer_update_exec_info_desc_t*" - name: pNewExecInfoList - desc: "[in][optional][range(0, numNewExecInfos)] An array describing the execution info objects for the command." - type: "size_t*" name: pNewGlobalWorkOffset desc: "[in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned values that describe the offset used to calculate the global ID." @@ -220,7 +193,7 @@ members: desc: "[in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned values that describe the number of global work-items." - type: "size_t*" name: pNewLocalWorkSize - desc: "[in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned values that describe the number of work-items that make up a work-group. If nullptr, the runtime implementation will choose the work-group size." + desc: "[in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned values that describe the number of work-items that make up a work-group. If newWorkDim is non-zero and pNewLocalWorkSize is nullptr, then runtime implementation will choose the work-group size. If newWorkDim is zero and pNewLocalWorkSize is nullptr, then the local work size is unchanged." --- #-------------------------------------------------------------------------- type: typedef desc: "A value that identifies a command inside of a command-buffer, used for defining dependencies between commands in the same command-buffer." @@ -261,6 +234,8 @@ params: returns: - $X_RESULT_ERROR_INVALID_CONTEXT - $X_RESULT_ERROR_INVALID_DEVICE + - $X_RESULT_ERROR_INVALID_OPERATION: + - "If `pCommandBufferDesc->isUpdatable` is true and `hDevice` does not support UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP." - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- @@ -325,7 +300,7 @@ params: desc: "[in] Global work size to use when executing kernel." - type: "const size_t*" name: pLocalWorkSize - desc: "[in] Local work size to use when executing kernel." + desc: "[in][optional] Local work size to use when executing kernel." - type: uint32_t name: numSyncPointsInWaitList desc: "[in] The number of sync points in the provided dependency list." @@ -924,6 +899,10 @@ returns: - $X_RESULT_ERROR_INVALID_OPERATION: - "If $x_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to." - "If the command-buffer `hCommand` belongs to has not been finalized." + - "If `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim used on creation of `hCommand`." + - "If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL." + - "If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value when `hCommand` was created with a NULL local work size." + - "If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value when `hCommand` was created with a non-NULL local work size." - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP - $X_RESULT_ERROR_INVALID_MEM_OBJECT - $X_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index d9d980073a..8f1ede3010 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -914,6 +914,29 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( return UR_RESULT_ERROR_INVALID_OPERATION; } + if (auto NewWorkDim = pUpdateKernelLaunch->newWorkDim) { + // Error if work dim changes + if (NewWorkDim != hCommand->WorkDim) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + + // Error If Local size and not global size + if ((pUpdateKernelLaunch->pNewLocalWorkSize != nullptr) && + (pUpdateKernelLaunch->pNewGlobalWorkSize == nullptr)) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + + // Error if local size non-nullptr and created with null + // or if local size nullptr and created with non-null + const bool IsNewLocalSizeNull = + pUpdateKernelLaunch->pNewLocalWorkSize == nullptr; + const bool IsOriginalLocalSizeNull = hCommand->isNullLocalSize(); + + if (IsNewLocalSizeNull ^ IsOriginalLocalSizeNull) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + } + // Kernel corresponding to the command to update ur_kernel_handle_t Kernel = hCommand->Kernel; @@ -1001,11 +1024,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( size_t *GlobalWorkOffset = hCommand->GlobalWorkOffset; size_t *GlobalWorkSize = hCommand->GlobalWorkSize; - const bool ProvidedLocalSize = hCommand->LocalWorkSize[0] != 0 || - hCommand->LocalWorkSize[1] != 0 || - hCommand->LocalWorkSize[2] != 0; // If no worksize is provided make sure we pass nullptr to setKernelParams so // it can guess the local work size. + const bool ProvidedLocalSize = !hCommand->isNullLocalSize(); size_t *LocalWorkSize = ProvidedLocalSize ? hCommand->LocalWorkSize : nullptr; uint32_t WorkDim = hCommand->WorkDim; diff --git a/source/adapters/cuda/command_buffer.hpp b/source/adapters/cuda/command_buffer.hpp index 92f3433162..84a9e0405b 100644 --- a/source/adapters/cuda/command_buffer.hpp +++ b/source/adapters/cuda/command_buffer.hpp @@ -215,6 +215,11 @@ struct ur_exp_command_buffer_command_handle_t_ { } } + bool isNullLocalSize() const noexcept { + const size_t Zeros[3] = {0, 0, 0}; + return 0 == std::memcmp(LocalWorkSize, Zeros, sizeof(LocalWorkSize)); + } + uint32_t incrementInternalReferenceCount() noexcept { return ++RefCountInternal; } diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index 180c90d064..38bb110154 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -942,6 +942,29 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( return UR_RESULT_ERROR_INVALID_OPERATION; } + if (auto NewWorkDim = pUpdateKernelLaunch->newWorkDim) { + // Error if work dim changes + if (NewWorkDim != hCommand->WorkDim) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + + // Error If Local size and not global size + if ((pUpdateKernelLaunch->pNewLocalWorkSize != nullptr) && + (pUpdateKernelLaunch->pNewGlobalWorkSize == nullptr)) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + + // Error if local size non-nullptr and created with null + // or if local size nullptr and created with non-null + const bool IsNewLocalSizeNull = + pUpdateKernelLaunch->pNewLocalWorkSize == nullptr; + const bool IsOriginalLocalSizeNull = hCommand->isNullLocalSize(); + + if (IsNewLocalSizeNull ^ IsOriginalLocalSizeNull) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + } + // Kernel corresponding to the command to update ur_kernel_handle_t Kernel = hCommand->Kernel; ur_device_handle_t Device = CommandBuffer->Device; @@ -1030,11 +1053,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( size_t *GlobalWorkOffset = hCommand->GlobalWorkOffset; size_t *GlobalWorkSize = hCommand->GlobalWorkSize; - const bool ProvidedLocalSize = hCommand->LocalWorkSize[0] != 0 || - hCommand->LocalWorkSize[1] != 0 || - hCommand->LocalWorkSize[2] != 0; // If no worksize is provided make sure we pass nullptr to setKernelParams so // it can guess the local work size. + const bool ProvidedLocalSize = !hCommand->isNullLocalSize(); size_t *LocalWorkSize = ProvidedLocalSize ? hCommand->LocalWorkSize : nullptr; uint32_t WorkDim = hCommand->WorkDim; diff --git a/source/adapters/hip/command_buffer.hpp b/source/adapters/hip/command_buffer.hpp index 53b648b5cd..50fddc5448 100644 --- a/source/adapters/hip/command_buffer.hpp +++ b/source/adapters/hip/command_buffer.hpp @@ -214,6 +214,11 @@ struct ur_exp_command_buffer_command_handle_t_ { } } + bool isNullLocalSize() const noexcept { + const size_t Zeros[3] = {0, 0, 0}; + return 0 == std::memcmp(LocalWorkSize, Zeros, sizeof(LocalWorkSize)); + } + uint32_t incrementInternalReferenceCount() noexcept { return ++RefCountInternal; } diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 959c3f4bdb..46e2e33607 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -81,8 +81,10 @@ ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() { ur_exp_command_buffer_command_handle_t_:: ur_exp_command_buffer_command_handle_t_( ur_exp_command_buffer_handle_t CommandBuffer, uint64_t CommandId, + uint32_t WorkDim, bool UserDefinedLocalSize, ur_kernel_handle_t Kernel = nullptr) - : CommandBuffer(CommandBuffer), CommandId(CommandId), Kernel(Kernel) { + : CommandBuffer(CommandBuffer), CommandId(CommandId), WorkDim(WorkDim), + UserDefinedLocalSize(UserDefinedLocalSize), Kernel(Kernel) { urCommandBufferRetainExp(CommandBuffer); if (Kernel) urKernelRetain(Kernel); @@ -591,8 +593,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( } try { if (Command) - *Command = new ur_exp_command_buffer_command_handle_t_(CommandBuffer, - CommandId, Kernel); + *Command = new ur_exp_command_buffer_command_handle_t_( + CommandBuffer, CommandId, WorkDim, LocalWorkSize != nullptr, Kernel); } catch (const std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; } catch (...) { @@ -1039,8 +1041,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( UR_ASSERT(Command->CommandBuffer->IsFinalized, UR_RESULT_ERROR_INVALID_OPERATION); - auto CommandBuffer = Command->CommandBuffer; uint32_t Dim = CommandDesc->newWorkDim; + if (Dim != 0) { + // Error if work dim changes + if (Dim != Command->WorkDim) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + + // Error If Local size and not global size + if ((CommandDesc->pNewLocalWorkSize != nullptr) && + (CommandDesc->pNewGlobalWorkSize == nullptr)) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + + // Error if local size non-nullptr and created with null + // or if local size nullptr and created with non-null + const bool IsNewLocalSizeNull = CommandDesc->pNewLocalWorkSize == nullptr; + const bool IsOriginalLocalSizeNull = !Command->UserDefinedLocalSize; + + if (IsNewLocalSizeNull ^ IsOriginalLocalSizeNull) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + } + + auto CommandBuffer = Command->CommandBuffer; const void *NextDesc = nullptr; auto SupportedFeatures = Command->CommandBuffer->Device->ZeDeviceMutableCmdListsProperties @@ -1231,56 +1255,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( ArgDescs.push_back(std::move(ZeMutableArgDesc)); } - // Check if there are new exec info flags provided. - for (uint32_t NewExecInfoNum = CommandDesc->numNewExecInfos; - NewExecInfoNum-- > 0;) { - ur_exp_command_buffer_update_exec_info_desc_t NewExecInfoDesc = - CommandDesc->pNewExecInfoList[NewExecInfoNum]; - ur_kernel_exec_info_t PropName = NewExecInfoDesc.propName; - const void *PropValue = NewExecInfoDesc.pNewExecInfo; - if (PropName == UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS) { - // The whole point for users really was to not need to know anything - // about the types of allocations kernel uses. So in DPC++ we always - // just set all 3 modes for each kernel. - if (*(static_cast(PropValue)) == true) { - ze_kernel_indirect_access_flags_t IndirectFlags = - ZE_KERNEL_INDIRECT_ACCESS_FLAG_HOST | - ZE_KERNEL_INDIRECT_ACCESS_FLAG_DEVICE | - ZE_KERNEL_INDIRECT_ACCESS_FLAG_SHARED; - ZE2UR_CALL(zeKernelSetIndirectAccess, - (Command->Kernel->ZeKernel, IndirectFlags)); - } - } else if (PropName == UR_KERNEL_EXEC_INFO_CACHE_CONFIG) { - ze_cache_config_flag_t ZeCacheConfig{}; - auto CacheConfig = - *(static_cast(PropValue)); - switch (CacheConfig) { - case UR_KERNEL_CACHE_CONFIG_LARGE_SLM: - ZeCacheConfig = ZE_CACHE_CONFIG_FLAG_LARGE_SLM; - break; - case UR_KERNEL_CACHE_CONFIG_LARGE_DATA: - ZeCacheConfig = ZE_CACHE_CONFIG_FLAG_LARGE_DATA; - break; - case UR_KERNEL_CACHE_CONFIG_DEFAULT: - ZeCacheConfig = static_cast(0); - break; - default: - // Unexpected cache configuration value. - return UR_RESULT_ERROR_INVALID_VALUE; - } - ZE2UR_CALL(zeKernelSetCacheConfig, - (Command->Kernel->ZeKernel, ZeCacheConfig);); - } else if (PropName == UR_KERNEL_EXEC_INFO_USM_PTRS) { - // Ignore this property as such kernel property is not supported by Level - // Zero. - continue; - } else { - logger::error("urCommandBufferUpdateKernelLaunchExp: unsupported name of " - "execution attribute."); - return UR_RESULT_ERROR_INVALID_VALUE; - } - } - ZeStruct MutableCommandDesc; MutableCommandDesc.pNext = NextDesc; MutableCommandDesc.flags = 0; diff --git a/source/adapters/level_zero/command_buffer.hpp b/source/adapters/level_zero/command_buffer.hpp index 67f4afd54c..f9a9288712 100644 --- a/source/adapters/level_zero/command_buffer.hpp +++ b/source/adapters/level_zero/command_buffer.hpp @@ -86,7 +86,8 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object { struct ur_exp_command_buffer_command_handle_t_ : public _ur_object { ur_exp_command_buffer_command_handle_t_(ur_exp_command_buffer_handle_t, - uint64_t, ur_kernel_handle_t); + uint64_t, uint32_t, bool, + ur_kernel_handle_t); ~ur_exp_command_buffer_command_handle_t_(); @@ -94,5 +95,9 @@ struct ur_exp_command_buffer_command_handle_t_ : public _ur_object { ur_exp_command_buffer_handle_t CommandBuffer; uint64_t CommandId; + // Work-dimension the command was originally created with. + uint32_t WorkDim; + // Set to true if the user set the local work size on command creation. + bool UserDefinedLocalSize; ur_kernel_handle_t Kernel; }; diff --git a/source/adapters/null/ur_nullddi.cpp b/source/adapters/null/ur_nullddi.cpp index a85547d2d1..9a0f8aec58 100644 --- a/source/adapters/null/ur_nullddi.cpp +++ b/source/adapters/null/ur_nullddi.cpp @@ -4806,8 +4806,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. - const size_t - *pLocalWorkSize, ///< [in] Local work size to use when executing kernel. + const size_t * + pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index ac5650b1a1..a409b46d6c 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -11,9 +11,51 @@ #include "command_buffer.hpp" #include "common.hpp" +namespace { +ur_result_t +commandBufferReleaseInternal(ur_exp_command_buffer_handle_t CommandBuffer) { + if (CommandBuffer->decrementInternalReferenceCount() != 0) { + return UR_RESULT_SUCCESS; + } + + delete CommandBuffer; + return UR_RESULT_SUCCESS; +} + +ur_result_t +commandHandleReleaseInternal(ur_exp_command_buffer_command_handle_t Command) { + if (Command->decrementInternalReferenceCount() != 0) { + return UR_RESULT_SUCCESS; + } + + // Decrement parent command-buffer internal ref count + commandBufferReleaseInternal(Command->hCommandBuffer); + + delete Command; + return UR_RESULT_SUCCESS; +} +} // end anonymous namespace + +/// The ur_exp_command_buffer_handle_t_ destructor calls CL release +/// command-buffer to free the underlying object. +ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() { + urQueueRelease(hInternalQueue); + + cl_context CLContext = cl_adapter::cast(hContext); + cl_ext::clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHR = nullptr; + cl_int Res = + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clReleaseCommandBufferKHRCache, + cl_ext::ReleaseCommandBufferName, &clReleaseCommandBufferKHR); + assert(Res == CL_SUCCESS); + (void)Res; + + clReleaseCommandBufferKHR(CLCommandBuffer); +} + UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, - [[maybe_unused]] const ur_exp_command_buffer_desc_t *pCommandBufferDesc, + const ur_exp_command_buffer_desc_t *pCommandBufferDesc, ur_exp_command_buffer_handle_t *phCommandBuffer) { ur_queue_handle_t Queue = nullptr; @@ -26,14 +68,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( CLContext, cl_ext::ExtFuncPtrCache->clCreateCommandBufferKHRCache, cl_ext::CreateCommandBufferName, &clCreateCommandBufferKHR)); + const bool IsUpdatable = + pCommandBufferDesc ? pCommandBufferDesc->isUpdatable : false; + + bool DeviceSupportsUpdate = false; + cl_device_id CLDevice = cl_adapter::cast(hDevice); + CL_RETURN_ON_FAILURE(deviceSupportsURCommandBufferKernelUpdate( + CLDevice, DeviceSupportsUpdate)); + + if (IsUpdatable && !DeviceSupportsUpdate) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + + cl_command_buffer_properties_khr Properties[3] = { + CL_COMMAND_BUFFER_FLAGS_KHR, + IsUpdatable ? CL_COMMAND_BUFFER_MUTABLE_KHR : 0u, 0}; + cl_int Res = CL_SUCCESS; auto CLCommandBuffer = clCreateCommandBufferKHR( - 1, cl_adapter::cast(&Queue), nullptr, &Res); + 1, cl_adapter::cast(&Queue), Properties, &Res); CL_RETURN_ON_FAILURE_AND_SET_NULL(Res, phCommandBuffer); try { auto URCommandBuffer = std::make_unique( - Queue, hContext, CLCommandBuffer); + Queue, hContext, CLCommandBuffer, IsUpdatable); *phCommandBuffer = URCommandBuffer.release(); } catch (...) { return UR_RESULT_ERROR_OUT_OF_RESOURCES; @@ -45,33 +103,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer) { - UR_RETURN_ON_FAILURE(urQueueRetain(hCommandBuffer->hInternalQueue)); - - cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); - cl_ext::clRetainCommandBufferKHR_fn clRetainCommandBuffer = nullptr; - UR_RETURN_ON_FAILURE( - cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clRetainCommandBufferKHRCache, - cl_ext::RetainCommandBufferName, &clRetainCommandBuffer)); - - CL_RETURN_ON_FAILURE(clRetainCommandBuffer(hCommandBuffer->CLCommandBuffer)); + hCommandBuffer->incrementInternalReferenceCount(); + hCommandBuffer->incrementExternalReferenceCount(); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) { - UR_RETURN_ON_FAILURE(urQueueRelease(hCommandBuffer->hInternalQueue)); - - cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); - cl_ext::clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHR = nullptr; - UR_RETURN_ON_FAILURE( - cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clReleaseCommandBufferKHRCache, - cl_ext::ReleaseCommandBufferName, &clReleaseCommandBufferKHR)); + if (hCommandBuffer->decrementExternalReferenceCount() == 0) { + // External ref count has reached zero, internal release of created + // commands. + for (auto Command : hCommandBuffer->CommandHandles) { + commandHandleReleaseInternal(Command); + } + } - CL_RETURN_ON_FAILURE( - clReleaseCommandBufferKHR(hCommandBuffer->CLCommandBuffer)); - return UR_RESULT_SUCCESS; + return commandBufferReleaseInternal(hCommandBuffer); } UR_APIEXPORT ur_result_t UR_APICALL @@ -85,6 +132,7 @@ urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) { CL_RETURN_ON_FAILURE( clFinalizeCommandBufferKHR(hCommandBuffer->CLCommandBuffer)); + hCommandBuffer->IsFinalized = true; return UR_RESULT_SUCCESS; } @@ -95,7 +143,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ur_exp_command_buffer_sync_point_t *pSyncPoint, - ur_exp_command_buffer_command_handle_t *) { + ur_exp_command_buffer_command_handle_t *phCommandHandle) { cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); cl_ext::clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR = nullptr; @@ -104,11 +152,35 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( CLContext, cl_ext::ExtFuncPtrCache->clCommandNDRangeKernelKHRCache, cl_ext::CommandNRRangeKernelName, &clCommandNDRangeKernelKHR)); + cl_mutable_command_khr CommandHandle = nullptr; + cl_mutable_command_khr *OutCommandHandle = + hCommandBuffer->IsUpdatable ? &CommandHandle : nullptr; + + cl_ndrange_kernel_command_properties_khr UpdateProperties[] = { + CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, + CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR | + CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR | + CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR | + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR | CL_MUTABLE_DISPATCH_EXEC_INFO_KHR, + 0}; + + cl_ndrange_kernel_command_properties_khr *Properties = + hCommandBuffer->IsUpdatable ? UpdateProperties : nullptr; CL_RETURN_ON_FAILURE(clCommandNDRangeKernelKHR( - hCommandBuffer->CLCommandBuffer, nullptr, nullptr, + hCommandBuffer->CLCommandBuffer, nullptr, Properties, cl_adapter::cast(hKernel), workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint, nullptr)); + pSyncPointWaitList, pSyncPoint, OutCommandHandle)); + + try { + auto URCommandHandle = + std::make_unique( + hCommandBuffer, CommandHandle, workDim, pLocalWorkSize != nullptr); + *phCommandHandle = URCommandHandle.release(); + hCommandBuffer->CommandHandles.push_back(*phCommandHandle); + } catch (...) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } return UR_RESULT_SUCCESS; } @@ -336,62 +408,204 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainCommandExp( - [[maybe_unused]] ur_exp_command_buffer_command_handle_t hCommand) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + ur_exp_command_buffer_command_handle_t hCommand) { + hCommand->incrementExternalReferenceCount(); + hCommand->incrementInternalReferenceCount(); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( - [[maybe_unused]] ur_exp_command_buffer_command_handle_t hCommand) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + ur_exp_command_buffer_command_handle_t hCommand) { + hCommand->decrementExternalReferenceCount(); + return commandHandleReleaseInternal(hCommand); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( - [[maybe_unused]] ur_exp_command_buffer_command_handle_t hCommand, - [[maybe_unused]] const ur_exp_command_buffer_update_kernel_launch_desc_t +namespace { +void updateKernelPointerArgs( + std::vector &CLUSMArgs, + const ur_exp_command_buffer_update_kernel_launch_desc_t *pUpdateKernelLaunch) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + + // WARNING - This relies on USM and SVM using the same implementation, + // which is not guaranteed. + // See https://github.com/KhronosGroup/OpenCL-Docs/issues/843 + const uint32_t NumPointerArgs = pUpdateKernelLaunch->numNewPointerArgs; + const ur_exp_command_buffer_update_pointer_arg_desc_t *ArgPointerList = + pUpdateKernelLaunch->pNewPointerArgList; + + CLUSMArgs.resize(NumPointerArgs); + for (uint32_t i = 0; i < NumPointerArgs; i++) { + const ur_exp_command_buffer_update_pointer_arg_desc_t &URPointerArg = + ArgPointerList[i]; + cl_mutable_dispatch_arg_khr &USMArg = CLUSMArgs[i]; + USMArg.arg_index = URPointerArg.argIndex; + USMArg.arg_value = *(void *const *)URPointerArg.pNewPointerArg; + } } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( - ur_exp_command_buffer_handle_t hCommandBuffer, - ur_exp_command_buffer_info_t propName, size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { +void updateKernelArgs(std::vector &CLArgs, + const ur_exp_command_buffer_update_kernel_launch_desc_t + *pUpdateKernelLaunch) { + const uint32_t NumMemobjArgs = pUpdateKernelLaunch->numNewMemObjArgs; + const ur_exp_command_buffer_update_memobj_arg_desc_t *ArgMemobjList = + pUpdateKernelLaunch->pNewMemObjArgList; + const uint32_t NumValueArgs = pUpdateKernelLaunch->numNewValueArgs; + const ur_exp_command_buffer_update_value_arg_desc_t *ArgValueList = + pUpdateKernelLaunch->pNewValueArgList; + + for (uint32_t i = 0; i < NumMemobjArgs; i++) { + const ur_exp_command_buffer_update_memobj_arg_desc_t &URMemObjArg = + ArgMemobjList[i]; + cl_mutable_dispatch_arg_khr CLArg{ + URMemObjArg.argIndex, // arg_index + sizeof(cl_mem), // arg_size + cl_adapter::cast( + &URMemObjArg.hNewMemObjArg) // arg_value + }; + + CLArgs.push_back(CLArg); + } + + for (uint32_t i = 0; i < NumValueArgs; i++) { + const ur_exp_command_buffer_update_value_arg_desc_t &URValueArg = + ArgValueList[i]; + cl_mutable_dispatch_arg_khr CLArg{ + URValueArg.argIndex, // arg_index + URValueArg.argSize, // arg_size + URValueArg.pNewValueArg // arg_value + }; + CLArgs.push_back(CLArg); + } +} +} // end anonymous namespace + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t hCommand, + const ur_exp_command_buffer_update_kernel_launch_desc_t + *pUpdateKernelLaunch) { + + ur_exp_command_buffer_handle_t hCommandBuffer = hCommand->hCommandBuffer; cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); - cl_ext::clGetCommandBufferInfoKHR_fn clGetCommandBufferInfoKHR = nullptr; - UR_RETURN_ON_FAILURE( - cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clGetCommandBufferInfoKHRCache, - cl_ext::GetCommandBufferInfoName, &clGetCommandBufferInfoKHR)); - if (propName != UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT) { - return UR_RESULT_ERROR_INVALID_ENUMERATION; + cl_ext::clUpdateMutableCommandsKHR_fn clUpdateMutableCommandsKHR = nullptr; + UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clUpdateMutableCommandsKHRCache, + cl_ext::UpdateMutableCommandsName, &clUpdateMutableCommandsKHR)); + + if (!hCommandBuffer->IsFinalized || !hCommandBuffer->IsUpdatable) + return UR_RESULT_ERROR_INVALID_OPERATION; + + if (cl_uint NewWorkDim = pUpdateKernelLaunch->newWorkDim) { + // Error if work dim changes + if (NewWorkDim != hCommand->WorkDim) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + + // Error If Local size and not global size + if ((pUpdateKernelLaunch->pNewLocalWorkSize != nullptr) && + (pUpdateKernelLaunch->pNewGlobalWorkSize == nullptr)) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + + // Error if local size non-nullptr and created with null + // or if local size nullptr and created with non-null + const bool IsNewLocalSizeNull = + pUpdateKernelLaunch->pNewLocalWorkSize == nullptr; + const bool IsOriginalLocalSizeNull = !hCommand->UserDefinedLocalSize; + + if (IsNewLocalSizeNull ^ IsOriginalLocalSizeNull) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } } - if (pPropSizeRet) { - *pPropSizeRet = sizeof(cl_uint); + // Find the CL USM pointer arguments to the kernel to update + std::vector CLUSMArgs; + updateKernelPointerArgs(CLUSMArgs, pUpdateKernelLaunch); + + // Find the memory object and scalar arguments to the kernel to update + std::vector CLArgs; + + updateKernelArgs(CLArgs, pUpdateKernelLaunch); + + // Find the updated ND-Range configuration of the kernel. + std::vector CLGlobalWorkOffset, CLGlobalWorkSize, CLLocalWorkSize; + cl_uint &CommandWorkDim = hCommand->WorkDim; + + // Lambda for N-Dimensional update + auto updateNDRange = [CommandWorkDim](std::vector &NDRange, + size_t *UpdatePtr) { + NDRange.resize(CommandWorkDim, 0); + const size_t CopySize = sizeof(size_t) * CommandWorkDim; + std::memcpy(NDRange.data(), UpdatePtr, CopySize); + }; + + if (auto GlobalWorkOffsetPtr = pUpdateKernelLaunch->pNewGlobalWorkOffset) { + updateNDRange(CLGlobalWorkOffset, GlobalWorkOffsetPtr); } - cl_uint ref_count; - CL_RETURN_ON_FAILURE(clGetCommandBufferInfoKHR( - hCommandBuffer->CLCommandBuffer, CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR, - sizeof(ref_count), &ref_count, nullptr)); + if (auto GlobalWorkSizePtr = pUpdateKernelLaunch->pNewGlobalWorkSize) { + updateNDRange(CLGlobalWorkSize, GlobalWorkSizePtr); + } - if (pPropValue) { - if (propSize != sizeof(cl_uint)) { - return UR_RESULT_ERROR_INVALID_SIZE; - } - static_assert(sizeof(cl_uint) == sizeof(uint32_t)); - *static_cast(pPropValue) = static_cast(ref_count); + if (auto LocalWorkSizePtr = pUpdateKernelLaunch->pNewLocalWorkSize) { + updateNDRange(CLLocalWorkSize, LocalWorkSizePtr); } + cl_mutable_command_khr command = + cl_adapter::cast(hCommand->CLMutableCommand); + cl_mutable_dispatch_config_khr dispatch_config = { + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + static_cast(CLArgs.size()), // num_args + static_cast(CLUSMArgs.size()), // num_svm_args + 0, // num_exec_infos + CommandWorkDim, // work_dim + CLArgs.data(), // arg_list + CLUSMArgs.data(), // arg_svm_list + nullptr, // exec_info_list + CLGlobalWorkOffset.data(), // global_work_offset + CLGlobalWorkSize.data(), // global_work_size + CLLocalWorkSize.data(), // local_work_size + }; + cl_mutable_base_config_khr config = { + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, &dispatch_config}; + CL_RETURN_ON_FAILURE( + clUpdateMutableCommandsKHR(hCommandBuffer->CLCommandBuffer, &config)); + return UR_RESULT_SUCCESS; } +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( + ur_exp_command_buffer_handle_t hCommandBuffer, + ur_exp_command_buffer_info_t propName, size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { + + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + + switch (propName) { + case UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT: + return ReturnValue(hCommandBuffer->getExternalReferenceCount()); + default: + assert(!"Command-buffer info request not implemented"); + } + + return UR_RESULT_ERROR_INVALID_ENUMERATION; +} + UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( - [[maybe_unused]] ur_exp_command_buffer_command_handle_t hCommand, - [[maybe_unused]] ur_exp_command_buffer_command_info_t propName, - [[maybe_unused]] size_t propSize, [[maybe_unused]] void *pPropValue, - [[maybe_unused]] size_t *pPropSizeRet) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + ur_exp_command_buffer_command_handle_t hCommand, + ur_exp_command_buffer_command_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) { + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + + switch (propName) { + case UR_EXP_COMMAND_BUFFER_COMMAND_INFO_REFERENCE_COUNT: + return ReturnValue(hCommand->getExternalReferenceCount()); + default: + assert(!"Command-buffer command info request not implemented"); + } + + return UR_RESULT_ERROR_INVALID_ENUMERATION; } diff --git a/source/adapters/opencl/command_buffer.hpp b/source/adapters/opencl/command_buffer.hpp index d80f29594b..4c39b1ad74 100644 --- a/source/adapters/opencl/command_buffer.hpp +++ b/source/adapters/opencl/command_buffer.hpp @@ -11,14 +11,97 @@ #include #include +/// Handle to a kernel command. +struct ur_exp_command_buffer_command_handle_t_ { + /// Command-buffer this command belongs to. + ur_exp_command_buffer_handle_t hCommandBuffer; + /// OpenCL command-handle. + cl_mutable_command_khr CLMutableCommand; + /// Work-dimension the command was originally created with. + cl_uint WorkDim; + /// Set to true if the user set the local work size on command creation. + bool UserDefinedLocalSize; + /// Internal & External reference counts. + /// We need to maintain these because in OpenCL a command-handle isn't + /// reference counting, but is tied to the lifetime of the parent + /// command-buffer. This is not the case in UR where a command-handle is + /// reference counted. + std::atomic_uint32_t RefCountInternal; + std::atomic_uint32_t RefCountExternal; + + ur_exp_command_buffer_command_handle_t_( + ur_exp_command_buffer_handle_t hCommandBuffer, + cl_mutable_command_khr CLMutableCommand, cl_uint WorkDim, + bool UserDefinedLocalSize) + : hCommandBuffer(hCommandBuffer), CLMutableCommand(CLMutableCommand), + WorkDim(WorkDim), UserDefinedLocalSize(UserDefinedLocalSize), + RefCountInternal(0), RefCountExternal(0) {} + + uint32_t incrementInternalReferenceCount() noexcept { + return ++RefCountInternal; + } + uint32_t decrementInternalReferenceCount() noexcept { + return --RefCountInternal; + } + + uint32_t incrementExternalReferenceCount() noexcept { + return ++RefCountExternal; + } + uint32_t decrementExternalReferenceCount() noexcept { + return --RefCountExternal; + } + uint32_t getExternalReferenceCount() const noexcept { + return RefCountExternal; + } +}; + +/// Handle to a command-buffer object. struct ur_exp_command_buffer_handle_t_ { + /// UR queue belonging to the command-buffer, required for OpenCL creation. ur_queue_handle_t hInternalQueue; + /// Context the command-buffer is created for. ur_context_handle_t hContext; + /// OpenCL command-buffer object. cl_command_buffer_khr CLCommandBuffer; + /// Set to true if the kernel commands in the command-buffer can be updated, + /// false otherwise + bool IsUpdatable; + /// Set to true if the command-buffer has been finalized, false otherwise + bool IsFinalized; + /// List of commands in the command-buffer. + std::vector CommandHandles; + /// Internal & External reference counts of the command-buffer. We do this + /// manually rather than forward to the OpenCL retain/release APIs because + /// we also need to track the lifetimes of command handle objects, which + /// extended the lifetime of a UR command-buffer even if its reference + /// count is zero. + std::atomic_uint32_t RefCountInternal; + std::atomic_uint32_t RefCountExternal; ur_exp_command_buffer_handle_t_(ur_queue_handle_t hQueue, ur_context_handle_t hContext, - cl_command_buffer_khr CLCommandBuffer) + cl_command_buffer_khr CLCommandBuffer, + bool IsUpdatable) : hInternalQueue(hQueue), hContext(hContext), - CLCommandBuffer(CLCommandBuffer) {} + CLCommandBuffer(CLCommandBuffer), IsUpdatable(IsUpdatable), + IsFinalized(false), RefCountInternal(0), RefCountExternal(0) {} + + ~ur_exp_command_buffer_handle_t_(); + + uint32_t incrementInternalReferenceCount() noexcept { + return ++RefCountInternal; + } + uint32_t decrementInternalReferenceCount() noexcept { + return --RefCountInternal; + } + + uint32_t incrementExternalReferenceCount() noexcept { + return ++RefCountExternal; + } + uint32_t decrementExternalReferenceCount() noexcept { + return --RefCountExternal; + } + uint32_t getExternalReferenceCount() const noexcept { + return RefCountExternal; + } }; diff --git a/source/adapters/opencl/common.cpp b/source/adapters/opencl/common.cpp index c1668494da..63981187e7 100644 --- a/source/adapters/opencl/common.cpp +++ b/source/adapters/opencl/common.cpp @@ -105,3 +105,35 @@ ur_result_t getNativeHandle(void *URObj, ur_native_handle_t *NativeHandle) { *NativeHandle = reinterpret_cast(URObj); return UR_RESULT_SUCCESS; } + +cl_int deviceSupportsURCommandBufferKernelUpdate(cl_device_id Dev, + bool &Result) { + size_t ExtSize = 0; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize)); + + std::string ExtStr(ExtSize, '\0'); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, ExtSize, + ExtStr.data(), nullptr)); + + std::string SupportedExtensions(ExtStr.c_str()); + if (ExtStr.find("cl_khr_command_buffer_mutable_dispatch") == + std::string::npos) { + Result = false; + return CL_SUCCESS; + } + + // All the CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR capabilities must + // be supported by a device for UR update. + cl_mutable_dispatch_fields_khr mutable_capabilities; + CL_RETURN_ON_FAILURE(clGetDeviceInfo( + Dev, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, + sizeof(mutable_capabilities), &mutable_capabilities, nullptr)); + const cl_mutable_dispatch_fields_khr required_caps = + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR | + CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR | + CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR | CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR | + CL_MUTABLE_DISPATCH_EXEC_INFO_KHR; + Result = (mutable_capabilities & required_caps) == required_caps; + return CL_SUCCESS; +} diff --git a/source/adapters/opencl/common.hpp b/source/adapters/opencl/common.hpp index 256fce0c22..399f668077 100644 --- a/source/adapters/opencl/common.hpp +++ b/source/adapters/opencl/common.hpp @@ -217,6 +217,7 @@ CONSTFIX char CommandCopyBufferRectName[] = "clCommandCopyBufferRectKHR"; CONSTFIX char CommandFillBufferName[] = "clCommandFillBufferKHR"; CONSTFIX char EnqueueCommandBufferName[] = "clEnqueueCommandBufferKHR"; CONSTFIX char GetCommandBufferInfoName[] = "clGetCommandBufferInfoKHR"; +CONSTFIX char UpdateMutableCommandsName[] = "clUpdateMutableCommandsKHR"; #undef CONSTFIX @@ -311,6 +312,10 @@ using clGetCommandBufferInfoKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)( cl_command_buffer_khr command_buffer, cl_command_buffer_info_khr param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret); +using clUpdateMutableCommandsKHR_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_command_buffer_khr command_buffer, + const cl_mutable_base_config_khr *mutable_config); + template struct FuncPtrCache { std::map Map; std::mutex Mutex; @@ -352,6 +357,7 @@ struct ExtFuncPtrCacheT { FuncPtrCache clCommandFillBufferKHRCache; FuncPtrCache clEnqueueCommandBufferKHRCache; FuncPtrCache clGetCommandBufferInfoKHRCache; + FuncPtrCache clUpdateMutableCommandsKHRCache; }; // A raw pointer is used here since the lifetime of this map has to be tied to // piTeardown to avoid issues with static destruction order (a user application @@ -422,3 +428,6 @@ static ur_result_t getExtFuncFromContext(cl_context Context, ur_result_t mapCLErrorToUR(cl_int Result); ur_result_t getNativeHandle(void *URObj, ur_native_handle_t *NativeHandle); + +cl_int deviceSupportsURCommandBufferKernelUpdate(cl_device_id Dev, + bool &Result); diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index 52671e6b01..d89a9492a5 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -1000,7 +1000,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, std::string::npos); } case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: { - return ReturnValue(false); + cl_device_id Dev = cl_adapter::cast(hDevice); + bool Supported = false; + CL_RETURN_ON_FAILURE( + deviceSupportsURCommandBufferKernelUpdate(Dev, Supported)); + return ReturnValue(Supported); } default: { return UR_RESULT_ERROR_INVALID_ENUMERATION; diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index 78c9a223e7..f097238036 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -5205,8 +5205,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. - const size_t - *pLocalWorkSize, ///< [in] Local work size to use when executing kernel. + const size_t * + pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index c3b009a724..8483b6a879 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -7817,8 +7817,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. - const size_t - *pLocalWorkSize, ///< [in] Local work size to use when executing kernel. + const size_t * + pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * @@ -7852,10 +7852,6 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( return UR_RESULT_ERROR_INVALID_NULL_POINTER; } - if (NULL == pLocalWorkSize) { - return UR_RESULT_ERROR_INVALID_NULL_POINTER; - } - if (pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0) { return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; } diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index dac56ddd1d..9e61ca7227 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -6706,8 +6706,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. - const size_t - *pLocalWorkSize, ///< [in] Local work size to use when executing kernel. + const size_t * + pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 01cc285752..7f5736663f 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -7181,6 +7181,8 @@ ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /// + `NULL == phCommandBuffer` /// - ::UR_RESULT_ERROR_INVALID_CONTEXT /// - ::UR_RESULT_ERROR_INVALID_DEVICE +/// - ::UR_RESULT_ERROR_INVALID_OPERATION +/// + If `pCommandBufferDesc->isUpdatable` is true and `hDevice` does not support UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferCreateExp( @@ -7302,7 +7304,6 @@ ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pGlobalWorkOffset` /// + `NULL == pGlobalWorkSize` -/// + `NULL == pLocalWorkSize` /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP /// - ::UR_RESULT_ERROR_INVALID_KERNEL /// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION @@ -7323,8 +7324,8 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. - const size_t - *pLocalWorkSize, ///< [in] Local work size to use when executing kernel. + const size_t * + pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * @@ -8065,6 +8066,10 @@ ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( /// - ::UR_RESULT_ERROR_INVALID_OPERATION /// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. /// + If the command-buffer `hCommand` belongs to has not been finalized. +/// + If `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim used on creation of `hCommand`. +/// + If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL. +/// + If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value when `hCommand` was created with a NULL local work size. +/// + If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value when `hCommand` was created with a non-NULL local work size. /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX diff --git a/source/loader/ur_print.cpp b/source/loader/ur_print.cpp index 9c3b6f1277..5721ebf3ef 100644 --- a/source/loader/ur_print.cpp +++ b/source/loader/ur_print.cpp @@ -1003,14 +1003,6 @@ ur_result_t urPrintExpCommandBufferUpdateValueArgDesc( return str_copy(&ss, buffer, buff_size, out_size); } -ur_result_t urPrintExpCommandBufferUpdateExecInfoDesc( - const struct ur_exp_command_buffer_update_exec_info_desc_t params, - char *buffer, const size_t buff_size, size_t *out_size) { - std::stringstream ss; - ss << params; - return str_copy(&ss, buffer, buff_size, out_size); -} - ur_result_t urPrintExpCommandBufferUpdateKernelLaunchDesc( const struct ur_exp_command_buffer_update_kernel_launch_desc_t params, char *buffer, const size_t buff_size, size_t *out_size) { diff --git a/source/ur_api.cpp b/source/ur_api.cpp index e3b1ba0481..b3b7a6bf92 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -6106,6 +6106,8 @@ ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /// + `NULL == phCommandBuffer` /// - ::UR_RESULT_ERROR_INVALID_CONTEXT /// - ::UR_RESULT_ERROR_INVALID_DEVICE +/// - ::UR_RESULT_ERROR_INVALID_OPERATION +/// + If `pCommandBufferDesc->isUpdatable` is true and `hDevice` does not support UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferCreateExp( @@ -6199,7 +6201,6 @@ ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pGlobalWorkOffset` /// + `NULL == pGlobalWorkSize` -/// + `NULL == pLocalWorkSize` /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP /// - ::UR_RESULT_ERROR_INVALID_KERNEL /// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION @@ -6220,8 +6221,8 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. - const size_t - *pLocalWorkSize, ///< [in] Local work size to use when executing kernel. + const size_t * + pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * @@ -6825,6 +6826,10 @@ ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( /// - ::UR_RESULT_ERROR_INVALID_OPERATION /// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. /// + If the command-buffer `hCommand` belongs to has not been finalized. +/// + If `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim used on creation of `hCommand`. +/// + If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL. +/// + If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value when `hCommand` was created with a NULL local work size. +/// + If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value when `hCommand` was created with a non-NULL local work size. /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX diff --git a/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp index 3928e76cf1..2663e6824a 100644 --- a/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp @@ -14,10 +14,23 @@ struct BufferFillCommandTest UUR_RETURN_ON_FATAL_FAILURE( urUpdatableCommandBufferExpExecutionTest::SetUp()); - // First argument is buffer to fill (will also be hidden accessor arg) - AddBuffer1DArg(sizeof(val) * global_size, &buffer); + ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, + sizeof(val) * global_size, nullptr, + &buffer)); + + // First argument is buffer to fill + ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, 0, nullptr, buffer)); + + // second arg is hidden accessor + struct { + size_t offsets[1] = {0}; + } accessor; + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(accessor), nullptr, + &accessor)); + // Second argument is scalar to fill with. - AddPodArg(val); + ASSERT_SUCCESS( + urKernelSetArgValue(kernel, 2, sizeof(val), nullptr, &val)); // Append kernel command to command-buffer and close command-buffer ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( @@ -99,12 +112,10 @@ TEST_P(BufferFillCommandTest, UpdateParameters) { 1, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs - 0, // numNewExecInfos 0, // newWorkDim &new_output_desc, // pNewMemObjArgList nullptr, // pNewPointerArgList &new_input_desc, // pNewValueArgList - nullptr, // pNewExecInfoList nullptr, // pNewGlobalWorkOffset nullptr, // pNewGlobalWorkSize nullptr, // pNewLocalWorkSize @@ -150,21 +161,20 @@ TEST_P(BufferFillCommandTest, UpdateGlobalSize) { new_buffer, // hArgValue }; + auto new_local_size = local_size; ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext 1, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs - 0, // numNewExecInfos 1, // newWorkDim &new_output_desc, // pNewMemObjArgList nullptr, // pNewPointerArgList nullptr, // pNewValueArgList - nullptr, // pNewExecInfoList nullptr, // pNewGlobalWorkOffset &new_global_size, // pNewGlobalWorkSize - nullptr, // pNewLocalWorkSize + &new_local_size, // pNewLocalWorkSize }; ASSERT_SUCCESS( @@ -209,12 +219,10 @@ TEST_P(BufferFillCommandTest, SeparateUpdateCalls) { 1, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs - 0, // numNewExecInfos 0, // newWorkDim &new_output_desc, // pNewMemObjArgList nullptr, // pNewPointerArgList nullptr, // pNewValueArgList - nullptr, // pNewExecInfoList nullptr, // pNewGlobalWorkOffset nullptr, // pNewGlobalWorkSize nullptr, // pNewLocalWorkSize @@ -239,12 +247,10 @@ TEST_P(BufferFillCommandTest, SeparateUpdateCalls) { 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs - 0, // numNewExecInfos 0, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList &new_input_desc, // pNewValueArgList - nullptr, // pNewExecInfoList nullptr, // pNewGlobalWorkOffset nullptr, // pNewGlobalWorkSize nullptr, // pNewLocalWorkSize @@ -260,12 +266,10 @@ TEST_P(BufferFillCommandTest, SeparateUpdateCalls) { 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs - 0, // numNewExecInfos 0, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList nullptr, // pNewValueArgList - nullptr, // pNewExecInfoList nullptr, // pNewGlobalWorkOffset &new_global_size, // pNewGlobalWorkSize nullptr, // pNewLocalWorkSize @@ -307,12 +311,10 @@ TEST_P(BufferFillCommandTest, OverrideUpdate) { 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs - 0, // numNewExecInfos 0, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList &first_input_desc, // pNewValueArgList - nullptr, // pNewExecInfoList nullptr, // pNewGlobalWorkOffset nullptr, // pNewGlobalWorkSize nullptr, // pNewLocalWorkSize @@ -336,12 +338,10 @@ TEST_P(BufferFillCommandTest, OverrideUpdate) { 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs - 0, // numNewExecInfos 0, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList &second_input_desc, // pNewValueArgList - nullptr, // pNewExecInfoList nullptr, // pNewGlobalWorkOffset nullptr, // pNewGlobalWorkSize nullptr, // pNewLocalWorkSize @@ -394,12 +394,10 @@ TEST_P(BufferFillCommandTest, OverrideArgList) { 0, // numNewMemObjArgs 0, // numNewPointerArgs 2, // numNewValueArgs - 0, // numNewExecInfos 0, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList input_descs, // pNewValueArgList - nullptr, // pNewExecInfoList nullptr, // pNewGlobalWorkOffset nullptr, // pNewGlobalWorkSize nullptr, // pNewLocalWorkSize diff --git a/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp index b29ad8c6c5..55e6773cb7 100644 --- a/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp @@ -186,12 +186,10 @@ TEST_P(BufferSaxpyKernelTest, UpdateParameters) { 2, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs - 0, // numNewExecInfos 0, // newWorkDim new_input_descs, // pNewMemObjArgList nullptr, // pNewPointerArgList &new_A_desc, // pNewValueArgList - nullptr, // pNewExecInfoList nullptr, // pNewGlobalWorkOffset nullptr, // pNewGlobalWorkSize nullptr, // pNewLocalWorkSize diff --git a/test/conformance/exp_command_buffer/fixtures.h b/test/conformance/exp_command_buffer/fixtures.h index 2f9656c0f9..7e5367aa9c 100644 --- a/test/conformance/exp_command_buffer/fixtures.h +++ b/test/conformance/exp_command_buffer/fixtures.h @@ -100,7 +100,7 @@ struct urCommandBufferExpExecutionTest : uur::urKernelExecutionTest { ur_exp_command_buffer_handle_t cmd_buf_handle = nullptr; ur_bool_t updatable_command_buffer_support = false; - ur_platform_backend_t backend; + ur_platform_backend_t backend{}; }; struct urUpdatableCommandBufferExpExecutionTest diff --git a/test/conformance/exp_command_buffer/invalid_update.cpp b/test/conformance/exp_command_buffer/invalid_update.cpp index 00cf04ea85..5ec1e382bf 100644 --- a/test/conformance/exp_command_buffer/invalid_update.cpp +++ b/test/conformance/exp_command_buffer/invalid_update.cpp @@ -41,6 +41,13 @@ struct InvalidUpdateTest } void TearDown() override { + // Workaround an issue with the OpenCL adapter implementing urUsmFree + // using a blocking free where hangs + if (updatable_cmd_buf_handle && !finalized) { + EXPECT_SUCCESS( + urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + if (shared_ptr) { EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); } @@ -61,6 +68,7 @@ struct InvalidUpdateTest static constexpr size_t allocation_size = sizeof(val) * global_size; void *shared_ptr = nullptr; ur_exp_command_buffer_command_handle_t command_handle = nullptr; + bool finalized = false; }; UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(InvalidUpdateTest); @@ -84,12 +92,10 @@ TEST_P(InvalidUpdateTest, NotFinalizedCommandBuffer) { 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs - 0, // numNewExecInfos 0, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList &new_input_desc, // pNewValueArgList - nullptr, // pNewExecInfoList nullptr, // pNewGlobalWorkOffset nullptr, // pNewGlobalWorkSize nullptr, // pNewLocalWorkSize @@ -117,6 +123,7 @@ TEST_P(InvalidUpdateTest, NotUpdatableCommandBuffer) { EXPECT_NE(test_command_handle, nullptr); EXPECT_SUCCESS(urCommandBufferFinalizeExp(test_cmd_buf_handle)); + finalized = true; // Set new value to use for fill at kernel index 1 uint32_t new_val = 33; @@ -135,12 +142,10 @@ TEST_P(InvalidUpdateTest, NotUpdatableCommandBuffer) { 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs - 0, // numNewExecInfos 0, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList &new_input_desc, // pNewValueArgList - nullptr, // pNewExecInfoList nullptr, // pNewGlobalWorkOffset nullptr, // pNewGlobalWorkSize nullptr, // pNewLocalWorkSize @@ -159,3 +164,99 @@ TEST_P(InvalidUpdateTest, NotUpdatableCommandBuffer) { EXPECT_SUCCESS(urCommandBufferReleaseExp(test_cmd_buf_handle)); } } + +// Test setting `pNewLocalWorkSize` to a non-NULL value and `pNewGlobalWorkSize` +// to NULL gives the correct error. +TEST_P(InvalidUpdateTest, GlobalLocalSizeMistach) { + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + finalized = true; + + size_t new_local_size = 16; + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + n_dimensions, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + &new_local_size, // pNewLocalWorkSize + }; + + // Update command local size but not global size + ur_result_t result = + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc); + ASSERT_EQ(UR_RESULT_ERROR_INVALID_OPERATION, result); +} + +// Test setting `pNewLocalWorkSize` to a non-NULL value when the command was +// created with a NULL local work size gives the correct error. +TEST_P(InvalidUpdateTest, ImplToUserDefinedLocalSize) { + // Append kernel command to command-buffer using NULL local work size + ur_exp_command_buffer_command_handle_t second_command_handle = nullptr; + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &global_size, nullptr, 0, nullptr, nullptr, &second_command_handle)); + ASSERT_NE(second_command_handle, nullptr); + + EXPECT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + finalized = true; + + size_t new_global_size = 64; + size_t new_local_size = 16; + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + n_dimensions, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + nullptr, // pNewGlobalWorkOffset + &new_global_size, // pNewGlobalWorkSize + &new_local_size, // pNewLocalWorkSize + }; + + // Update command local size to non-NULL when created with NULL value + ur_result_t result = urCommandBufferUpdateKernelLaunchExp( + second_command_handle, &update_desc); + EXPECT_EQ(UR_RESULT_ERROR_INVALID_OPERATION, result); + + if (second_command_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(second_command_handle)); + } +} + +// Test setting `pNewLocalWorkSize` to a NULL value when the command was +// created with a non-NULL local work size gives the correct error. +TEST_P(InvalidUpdateTest, UserToImplDefinedLocalSize) { + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + finalized = true; + + size_t new_global_size = 64; + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + n_dimensions, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + nullptr, // pNewGlobalWorkOffset + &new_global_size, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Update command local size to NULL when created with non-NULL value + ur_result_t result = + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc); + ASSERT_EQ(UR_RESULT_ERROR_INVALID_OPERATION, result); +} diff --git a/test/conformance/exp_command_buffer/ndrange_update.cpp b/test/conformance/exp_command_buffer/ndrange_update.cpp index 217bd87f2a..486837df85 100644 --- a/test/conformance/exp_command_buffer/ndrange_update.cpp +++ b/test/conformance/exp_command_buffer/ndrange_update.cpp @@ -128,20 +128,19 @@ TEST_P(NDRangeUpdateTest, Update3D) { // Set local size and global offset to update to std::array new_local_size = {4, 2, 2}; std::array new_global_offset = {3, 2, 1}; + std::array new_global_size = global_size; ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs - 0, // numNewExecInfos 3, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList nullptr, // pNewValueArgList - nullptr, // pNewExecInfoList new_global_offset.data(), // pNewGlobalWorkOffset - nullptr, // pNewGlobalWorkSize + new_global_size.data(), // pNewGlobalWorkSize new_local_size.data(), // pNewLocalWorkSize }; @@ -153,11 +152,11 @@ TEST_P(NDRangeUpdateTest, Update3D) { ASSERT_SUCCESS(urQueueFinish(queue)); // Verify that update occurred correctly - Validate(global_size, new_local_size, new_global_offset); + Validate(new_global_size, new_local_size, new_global_offset); } -// Update the kernel work dimensions to 2, and update global size, local size, -// and global offset to new values. +// Update the kernel work dimensions to use 1 in the Z dimension, +// and update global size, local size, and global offset to new values. TEST_P(NDRangeUpdateTest, Update2D) { // Run command-buffer prior to update an verify output ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, @@ -180,12 +179,10 @@ TEST_P(NDRangeUpdateTest, Update2D) { 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs - 0, // numNewExecInfos - 2, // newWorkDim + 3, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList nullptr, // pNewValueArgList - nullptr, // pNewExecInfoList new_global_offset.data(), // pNewGlobalWorkOffset new_global_size.data(), // pNewGlobalWorkSize new_local_size.data(), // pNewLocalWorkSize @@ -206,8 +203,9 @@ TEST_P(NDRangeUpdateTest, Update2D) { Validate(new_global_size, new_local_size, new_global_offset); } -// Update the kernel work dimensions to 1, and check that previously -// set global size, local size, and global offset update accordingly. +// Update the kernel work dimensions to be 1 in Y & Z dimensions, and check +// that the previously set global size, local size, and global offset update +// accordingly. TEST_P(NDRangeUpdateTest, Update1D) { // Run command-buffer prior to update an verify output ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, @@ -216,21 +214,22 @@ TEST_P(NDRangeUpdateTest, Update1D) { Validate(global_size, local_size, global_offset); // Set dimensions to 1 + std::array new_global_size = {9, 1, 1}; + std::array new_local_size = {3, 1, 1}; + std::array new_global_offset = {0, 0, 0}; ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - 0, // numNewMemObjArgs - 0, // numNewPointerArgs - 0, // numNewValueArgs - 0, // numNewExecInfos - 1, // newWorkDim - nullptr, // pNewMemObjArgList - nullptr, // pNewPointerArgList - nullptr, // pNewValueArgList - nullptr, // pNewExecInfoList - nullptr, // pNewGlobalWorkOffset - nullptr, // pNewGlobalWorkSize - nullptr, // pNewLocalWorkSize + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + 3, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + new_global_offset.data(), // pNewGlobalWorkOffset + new_global_size.data(), // pNewGlobalWorkSize + new_local_size.data(), // pNewLocalWorkSize }; // Reset output to remove old values which will no longer have a @@ -245,8 +244,29 @@ TEST_P(NDRangeUpdateTest, Update1D) { ASSERT_SUCCESS(urQueueFinish(queue)); // Verify that update occurred correctly - std::array new_global_size = {global_size[0], 1, 1}; - std::array new_local_size = {local_size[0], 1, 1}; - std::array new_global_offset = {global_offset[0], 0, 0}; Validate(new_global_size, new_local_size, new_global_offset); } + +// Test error code is returned if work dimension parameter changes +TEST_P(NDRangeUpdateTest, Invalid) { + const size_t new_work_dim = n_dimensions - 1; + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + new_work_dim, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Update command to command-buffer to use different work dim + ur_result_t result = + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc); + ASSERT_EQ(UR_RESULT_ERROR_INVALID_OPERATION, result); +} diff --git a/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp index 5962bd3487..cf0259c7ab 100644 --- a/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp @@ -122,12 +122,10 @@ TEST_P(USMFillCommandTest, UpdateParameters) { 0, // numNewMemObjArgs 1, // numNewPointerArgs 1, // numNewValueArgs - 0, // numNewExecInfos 0, // newWorkDim nullptr, // pNewMemObjArgList &new_output_desc, // pNewPointerArgList &new_input_desc, // pNewValueArgList - nullptr, // pNewExecInfoList nullptr, // pNewGlobalWorkOffset updatable_execution_range_support ? &new_global_size : nullptr, // pNewGlobalWorkSize @@ -145,81 +143,6 @@ TEST_P(USMFillCommandTest, UpdateParameters) { Validate((uint32_t *)new_shared_ptr, new_global_size, new_val); } -// Test updating the kernel execution info -TEST_P(USMFillCommandTest, UpdateExecInfo) { - // Run command-buffer prior to update an verify output - ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, - nullptr, nullptr)); - ASSERT_SUCCESS(urQueueFinish(queue)); - Validate((uint32_t *)shared_ptr, global_size, val); - - ur_exp_command_buffer_update_exec_info_desc_t new_exec_info_descs[3]; - - // Update direct access flag - bool indirect_access = false; - new_exec_info_descs[0] = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC, // stype - nullptr, // pNext - UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS, // propName - sizeof(indirect_access), // propSize - nullptr, // pProperties - &indirect_access, // pPropValue - }; - - // Update cache config - ur_kernel_cache_config_t cache_config = UR_KERNEL_CACHE_CONFIG_DEFAULT; - new_exec_info_descs[1] = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC, // stype - nullptr, // pNext - UR_KERNEL_EXEC_INFO_CACHE_CONFIG, // propName - sizeof(cache_config), // propSize - nullptr, // pProperties - &cache_config, // pPropValue - }; - - // Create a new USM allocation to set indirect access for - ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, - allocation_size, &new_shared_ptr)); - ASSERT_NE(new_shared_ptr, nullptr); - void *pointers = {new_shared_ptr}; - new_exec_info_descs[2] = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC, // stype - nullptr, // pNext - UR_KERNEL_EXEC_INFO_USM_PTRS, // propName - sizeof(pointers), // propSize - nullptr, // pProperties - &pointers, // pPropValue - }; - - ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype - nullptr, // pNext - 0, // numNewMemObjArgs - 0, // numNewPointerArgs - 0, // numNewValueArgs - 3, // numNewExecInfos - 0, // newWorkDim - nullptr, // pNewMemObjArgList - nullptr, // pNewPointerArgList - nullptr, // pNewValueArgList - new_exec_info_descs, // pNewExecInfoList - nullptr, // pNewGlobalWorkOffset - nullptr, // pNewGlobalWorkSize - nullptr, // pNewLocalWorkSize - }; - - // Update kernel and enqueue command-buffer again - ASSERT_SUCCESS( - urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); - ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, - nullptr, nullptr)); - ASSERT_SUCCESS(urQueueFinish(queue)); - - // Verify results are correct, although exec info modifications should - // have no effect on output - Validate((uint32_t *)shared_ptr, global_size, val); -} - // Test updating a command-buffer with multiple USM fill kernel commands struct USMMultipleFillCommandTest : uur::command_buffer::urUpdatableCommandBufferExpExecutionTest { @@ -351,12 +274,10 @@ TEST_P(USMMultipleFillCommandTest, UpdateAllKernels) { 0, // numNewMemObjArgs 1, // numNewPointerArgs 1, // numNewValueArgs - 0, // numNewExecInfos 0, // newWorkDim nullptr, // pNewMemObjArgList &new_output_desc, // pNewPointerArgList &new_input_desc, // pNewValueArgList - nullptr, // pNewExecInfoList nullptr, // pNewGlobalWorkOffset nullptr, // pNewGlobalWorkSize nullptr, // pNewLocalWorkSize diff --git a/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp index b3f9f93fe1..8f213e8b24 100644 --- a/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp @@ -138,12 +138,10 @@ TEST_P(USMSaxpyKernelTest, UpdateParameters) { 0, // numNewMemObjArgs 2, // numNewPointerArgs 1, // numNewValueArgs - 0, // numNewExecInfos 0, // newWorkDim nullptr, // pNewMemObjArgList new_input_descs, // pNewPointerArgList &new_A_desc, // pNewValueArgList - nullptr, // pNewExecInfoList nullptr, // pNewGlobalWorkOffset nullptr, // pNewGlobalWorkSize nullptr, // pNewLocalWorkSize