From 99c329bdff6081cf31f39bf53fc8b7ccae8fe4f5 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Wed, 15 Nov 2023 15:56:54 +0000 Subject: [PATCH] [EXP][Command-Buffer] Add kernel command update This change introduces a new API that allows the kernel commands of a command-buffer to be updated with a new configuration. For example, modified arguments or ND-Range. The only implemented adapter is CUDA. See [cl_khr_command_buffer_mutable_dispatch](https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_command_buffer_mutable_dispatch) as prior art. The differences between the proposed API and the above are: * Only the append kernel entry-point returns a command handle. I imagine this will be changed in future to enable other commands to do update. * Only USM and buffer arguments can be updated, there is not equivalent update struct for `urKernelSetArgLocal`, `urKernelSetArgValue`, or `urKernelSetArgSampler` * There is no granularity of optional support for update, an implementer must either implement all the ways to update a kernel configuration, or none of them. --- include/ur.py | 96 ++++- include/ur_api.h | 338 ++++++++++++------ include/ur_ddi.h | 10 +- include/ur_print.hpp | 295 +++++++++++++++ scripts/core/EXP-COMMAND-BUFFER.rst | 131 ++++++- scripts/core/exp-command-buffer.yml | 293 +++++++++++---- scripts/core/registry.yml | 3 + scripts/parse_specs.py | 4 +- source/adapters/cuda/command_buffer.cpp | 55 ++- source/adapters/cuda/command_buffer.hpp | 33 ++ source/adapters/hip/command_buffer.cpp | 11 +- source/adapters/level_zero/command_buffer.cpp | 9 +- source/adapters/native_cpu/command_buffer.cpp | 11 +- source/adapters/null/ur_nullddi.cpp | 153 ++++---- source/adapters/opencl/command_buffer.cpp | 12 +- source/loader/layers/tracing/ur_trcddi.cpp | 161 +++++---- source/loader/layers/validation/ur_valddi.cpp | 182 ++++++---- source/loader/ur_ldrddi.cpp | 171 +++++---- source/loader/ur_ldrddi.hpp | 6 + source/loader/ur_libapi.cpp | 196 ++++++---- source/ur_api.cpp | 187 ++++++---- tools/urinfo/urinfo.hpp | 6 + 22 files changed, 1770 insertions(+), 593 deletions(-) diff --git a/include/ur.py b/include/ur.py index 09b7955e07..0e9df56c6b 100644 --- a/include/ur.py +++ b/include/ur.py @@ -187,6 +187,7 @@ class ur_function_v(IntEnum): ADAPTER_RETAIN = 179 ## Enumerator for ::urAdapterRetain ADAPTER_GET_LAST_ERROR = 180 ## Enumerator for ::urAdapterGetLastError ADAPTER_GET_INFO = 181 ## Enumerator for ::urAdapterGetInfo + COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP = 182 ## Enumerator for ::urCommandBufferUpdateKernelLaunchExp PROGRAM_BUILD_EXP = 197 ## Enumerator for ::urProgramBuildExp PROGRAM_COMPILE_EXP = 198 ## Enumerator for ::urProgramCompileExp PROGRAM_LINK_EXP = 199 ## Enumerator for ::urProgramLinkExp @@ -250,6 +251,10 @@ class ur_structure_type_v(IntEnum): KERNEL_ARG_VALUE_PROPERTIES = 32 ## ::ur_kernel_arg_value_properties_t KERNEL_ARG_LOCAL_PROPERTIES = 33 ## ::ur_kernel_arg_local_properties_t EXP_COMMAND_BUFFER_DESC = 0x1000 ## ::ur_exp_command_buffer_desc_t + EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC = 0x1001 ## ::ur_exp_command_buffer_update_kernel_launch_desc_t + EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC = 0x1002 ## ::ur_exp_command_buffer_update_memobj_arg_desc_t + EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC = 0x1003 ## ::ur_exp_command_buffer_update_pointer_arg_desc_t + EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC = 0x1004 ## ::ur_exp_command_buffer_update_exec_info_desc_t EXP_SAMPLER_MIP_PROPERTIES = 0x2000 ## ::ur_exp_sampler_mip_properties_t EXP_INTEROP_MEM_DESC = 0x2001 ## ::ur_exp_interop_mem_desc_t EXP_INTEROP_SEMAPHORE_DESC = 0x2002 ## ::ur_exp_interop_semaphore_desc_t @@ -455,6 +460,7 @@ class ur_result_v(IntEnum): ERROR_INVALID_COMMAND_BUFFER_EXP = 0x1000 ## Invalid Command-Buffer ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP = 0x1001## Sync point is not valid for the command-buffer ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP = 0x1002 ## Sync point wait list is invalid + ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP = 0x1003## Handle to command-buffer command is invalid ERROR_UNKNOWN = 0x7ffffffe ## Unknown or internal error class ur_result_t(c_int): @@ -865,6 +871,10 @@ class ur_device_info_v(IntEnum): ## version than older devices. VIRTUAL_MEMORY_SUPPORT = 114 ## [::ur_bool_t] return true if the device supports virtual memory. ESIMD_SUPPORT = 115 ## [::ur_bool_t] return true if the device supports ESIMD. + COMMAND_BUFFER_SUPPORT_EXP = 0x1000 ## [::ur_bool_t] returns true if the device supports the use of + ## command-buffers. + COMMAND_BUFFER_UPDATE_SUPPORT_EXP = 0x1001 ## [::ur_bool_t] returns true if the device supports updating the + ## commands in a command-buffer. BINDLESS_IMAGES_SUPPORT_EXP = 0x2000 ## [::ur_bool_t] returns true if the device supports the creation of ## bindless images BINDLESS_IMAGES_SHARED_USM_SUPPORT_EXP = 0x2001 ## [::ur_bool_t] returns true if the device supports the creation of @@ -2300,7 +2310,71 @@ class ur_exp_command_buffer_desc_t(Structure): _fields_ = [ ("stype", ur_structure_type_t), ## [in] type of this structure, must be ## ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC - ("pNext", c_void_p) ## [in][optional] pointer to extension-specific structure + ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure + ("isUpdatable", ur_c_bool_t) ## [in] Commands in a finalized command-buffer can be updated. + ] + +############################################################################### +## @brief Descriptor type for updating a kernel command memobj argument. +class ur_exp_command_buffer_update_memobj_arg_desc_t(Structure): + _fields_ = [ + ("stype", ur_structure_type_t), ## [in] type of this structure, must be + ## ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC + ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure + ("argIndex", c_ulong), ## [in] Argument index. + ("pProperties", *), ## [in][optinal] Pointer to memory object properties. + ("hArgValue", ur_mem_handle_t) ## [in][optional] Handle of memory object. + ] + +############################################################################### +## @brief Descriptor type for updating a kernel command pointer argument. +class ur_exp_command_buffer_update_pointer_arg_desc_t(Structure): + _fields_ = [ + ("stype", ur_structure_type_t), ## [in] type of this structure, must be + ## ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC + ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure + ("argIndex", c_ulong), ## [in] Argument index. + ("pProperties", *), ## [in][optinal] Pointer to USM pointer properties. + ("pArgValue", *) ## [in][optional] USM pointer to memory location holding the argument + ## value. + ] + +############################################################################### +## @brief Descriptor type for updating kernel command execution info. +class ur_exp_command_buffer_update_exec_info_desc_t(Structure): + _fields_ = [ + ("stype", ur_structure_type_t), ## [in] type of this structure, must be + ## ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC + ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure + ("propName", ur_kernel_exec_info_t), ## [in] Name of execution attribute. + ("propSize", c_size_t), ## [in] Size of execution attribute. + ("pProperties", *), ## [in][optional] Pointer to execution info properties. + ("pPropValue", *) ## [in] Pointer to memory location holding the property value. + ] + +############################################################################### +## @brief Descriptor type for updating a kernel launch command. +class ur_exp_command_buffer_update_kernel_launch_desc_t(Structure): + _fields_ = [ + ("stype", ur_structure_type_t), ## [in] type of this structure, must be + ## ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC + ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure + ("numMemobjArgs", c_ulong), ## [in] Length of pArgMemobjList. + ("numPointerArgs", c_ulong), ## [in] Length of pArgPointerList. + ("numExecInfos", c_ulong), ## [in] Length of pExecInfoList. + ("workDim", c_ulong), ## [in] Number of work dimensions in the kernel ND-range, from 1-3. + ("pArgMemobjList", POINTER(ur_exp_command_buffer_update_memobj_arg_desc_t)),## [in] An array describing the new kernel mem obj arguments for the + ## command. + ("pArgPointerList", POINTER(ur_exp_command_buffer_update_pointer_arg_desc_t)), ## [in] An array describing the new kernel pointer arguments for the + ## command. + ("pArgExecInfoList", POINTER(ur_exp_command_buffer_update_exec_info_desc_t)), ## [in] An array describing the execution info objects for the command. + ("pGlobalWorkOffset", POINTER(c_size_t)), ## [in] Array of workDim unsigned values that describe the offset used to + ## calculate the global ID. + ("pGlobalWorkSize", POINTER(c_size_t)), ## [in] Array of workDim unsigned values that describe the number of + ## global work-items. + ("pLocalWorkSize", POINTER(c_size_t)) ## [in] Array of workDim unsigned values that describe the number of + ## work-items that make up a work-group. If nullptr, the runtime + ## implementation will choose the work-group size. ] ############################################################################### @@ -2314,6 +2388,11 @@ class ur_exp_command_buffer_sync_point_t(c_ulong): class ur_exp_command_buffer_handle_t(c_void_p): pass +############################################################################### +## @brief Handle of a Command-Buffer command +class ur_exp_command_buffer_command_handle_t(c_void_p): + pass + ############################################################################### ## @brief The extension string which defines support for cooperative-kernels ## which is returned when querying device extensions. @@ -3610,9 +3689,9 @@ class ur_usm_exp_dditable_t(Structure): ############################################################################### ## @brief Function-pointer for urCommandBufferAppendKernelLaunchExp if __use_win_types: - _urCommandBufferAppendKernelLaunchExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_kernel_handle_t, c_ulong, POINTER(c_size_t), POINTER(c_size_t), POINTER(c_size_t), c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + _urCommandBufferAppendKernelLaunchExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_kernel_handle_t, c_ulong, POINTER(c_size_t), POINTER(c_size_t), POINTER(c_size_t), c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_command_handle_t) ) else: - _urCommandBufferAppendKernelLaunchExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_kernel_handle_t, c_ulong, POINTER(c_size_t), POINTER(c_size_t), POINTER(c_size_t), c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t) ) + _urCommandBufferAppendKernelLaunchExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_kernel_handle_t, c_ulong, POINTER(c_size_t), POINTER(c_size_t), POINTER(c_size_t), c_ulong, POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_sync_point_t), POINTER(ur_exp_command_buffer_command_handle_t) ) ############################################################################### ## @brief Function-pointer for urCommandBufferAppendUSMMemcpyExp @@ -3698,6 +3777,13 @@ class ur_usm_exp_dditable_t(Structure): else: _urCommandBufferEnqueueExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_handle_t, ur_queue_handle_t, c_ulong, POINTER(ur_event_handle_t), POINTER(ur_event_handle_t) ) +############################################################################### +## @brief Function-pointer for urCommandBufferUpdateKernelLaunchExp +if __use_win_types: + _urCommandBufferUpdateKernelLaunchExp_t = WINFUNCTYPE( ur_result_t, ur_exp_command_buffer_command_handle_t, POINTER(ur_exp_command_buffer_update_kernel_launch_desc_t) ) +else: + _urCommandBufferUpdateKernelLaunchExp_t = CFUNCTYPE( ur_result_t, ur_exp_command_buffer_command_handle_t, POINTER(ur_exp_command_buffer_update_kernel_launch_desc_t) ) + ############################################################################### ## @brief Table of CommandBufferExp functions pointers @@ -3719,7 +3805,8 @@ class ur_command_buffer_exp_dditable_t(Structure): ("pfnAppendMemBufferFillExp", c_void_p), ## _urCommandBufferAppendMemBufferFillExp_t ("pfnAppendUSMPrefetchExp", c_void_p), ## _urCommandBufferAppendUSMPrefetchExp_t ("pfnAppendUSMAdviseExp", c_void_p), ## _urCommandBufferAppendUSMAdviseExp_t - ("pfnEnqueueExp", c_void_p) ## _urCommandBufferEnqueueExp_t + ("pfnEnqueueExp", c_void_p), ## _urCommandBufferEnqueueExp_t + ("pfnUpdateKernelLaunchExp", c_void_p) ## _urCommandBufferUpdateKernelLaunchExp_t ] ############################################################################### @@ -4255,6 +4342,7 @@ def __init__(self, version : ur_api_version_t): self.urCommandBufferAppendUSMPrefetchExp = _urCommandBufferAppendUSMPrefetchExp_t(self.__dditable.CommandBufferExp.pfnAppendUSMPrefetchExp) self.urCommandBufferAppendUSMAdviseExp = _urCommandBufferAppendUSMAdviseExp_t(self.__dditable.CommandBufferExp.pfnAppendUSMAdviseExp) self.urCommandBufferEnqueueExp = _urCommandBufferEnqueueExp_t(self.__dditable.CommandBufferExp.pfnEnqueueExp) + self.urCommandBufferUpdateKernelLaunchExp = _urCommandBufferUpdateKernelLaunchExp_t(self.__dditable.CommandBufferExp.pfnUpdateKernelLaunchExp) # call driver to get function pointers UsmP2PExp = ur_usm_p2p_exp_dditable_t() diff --git a/include/ur_api.h b/include/ur_api.h index 09f6d77a6b..e84225b79c 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -196,6 +196,7 @@ typedef enum ur_function_t { UR_FUNCTION_ADAPTER_RETAIN = 179, ///< Enumerator for ::urAdapterRetain UR_FUNCTION_ADAPTER_GET_LAST_ERROR = 180, ///< Enumerator for ::urAdapterGetLastError UR_FUNCTION_ADAPTER_GET_INFO = 181, ///< Enumerator for ::urAdapterGetInfo + UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP = 182, ///< Enumerator for ::urCommandBufferUpdateKernelLaunchExp UR_FUNCTION_PROGRAM_BUILD_EXP = 197, ///< Enumerator for ::urProgramBuildExp UR_FUNCTION_PROGRAM_COMPILE_EXP = 198, ///< Enumerator for ::urProgramCompileExp UR_FUNCTION_PROGRAM_LINK_EXP = 199, ///< Enumerator for ::urProgramLinkExp @@ -224,48 +225,52 @@ typedef enum ur_function_t { /////////////////////////////////////////////////////////////////////////////// /// @brief Defines structure types typedef enum ur_structure_type_t { - UR_STRUCTURE_TYPE_CONTEXT_PROPERTIES = 0, ///< ::ur_context_properties_t - UR_STRUCTURE_TYPE_IMAGE_DESC = 1, ///< ::ur_image_desc_t - UR_STRUCTURE_TYPE_BUFFER_PROPERTIES = 2, ///< ::ur_buffer_properties_t - UR_STRUCTURE_TYPE_BUFFER_REGION = 3, ///< ::ur_buffer_region_t - UR_STRUCTURE_TYPE_BUFFER_CHANNEL_PROPERTIES = 4, ///< ::ur_buffer_channel_properties_t - UR_STRUCTURE_TYPE_BUFFER_ALLOC_LOCATION_PROPERTIES = 5, ///< ::ur_buffer_alloc_location_properties_t - UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES = 6, ///< ::ur_program_properties_t - UR_STRUCTURE_TYPE_USM_DESC = 7, ///< ::ur_usm_desc_t - UR_STRUCTURE_TYPE_USM_HOST_DESC = 8, ///< ::ur_usm_host_desc_t - UR_STRUCTURE_TYPE_USM_DEVICE_DESC = 9, ///< ::ur_usm_device_desc_t - UR_STRUCTURE_TYPE_USM_POOL_DESC = 10, ///< ::ur_usm_pool_desc_t - UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC = 11, ///< ::ur_usm_pool_limits_desc_t - UR_STRUCTURE_TYPE_DEVICE_BINARY = 12, ///< ::ur_device_binary_t - UR_STRUCTURE_TYPE_SAMPLER_DESC = 13, ///< ::ur_sampler_desc_t - UR_STRUCTURE_TYPE_QUEUE_PROPERTIES = 14, ///< ::ur_queue_properties_t - UR_STRUCTURE_TYPE_QUEUE_INDEX_PROPERTIES = 15, ///< ::ur_queue_index_properties_t - UR_STRUCTURE_TYPE_CONTEXT_NATIVE_PROPERTIES = 16, ///< ::ur_context_native_properties_t - UR_STRUCTURE_TYPE_KERNEL_NATIVE_PROPERTIES = 17, ///< ::ur_kernel_native_properties_t - UR_STRUCTURE_TYPE_QUEUE_NATIVE_PROPERTIES = 18, ///< ::ur_queue_native_properties_t - UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES = 19, ///< ::ur_mem_native_properties_t - UR_STRUCTURE_TYPE_EVENT_NATIVE_PROPERTIES = 20, ///< ::ur_event_native_properties_t - UR_STRUCTURE_TYPE_PLATFORM_NATIVE_PROPERTIES = 21, ///< ::ur_platform_native_properties_t - UR_STRUCTURE_TYPE_DEVICE_NATIVE_PROPERTIES = 22, ///< ::ur_device_native_properties_t - UR_STRUCTURE_TYPE_PROGRAM_NATIVE_PROPERTIES = 23, ///< ::ur_program_native_properties_t - UR_STRUCTURE_TYPE_SAMPLER_NATIVE_PROPERTIES = 24, ///< ::ur_sampler_native_properties_t - UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC = 25, ///< ::ur_queue_native_desc_t - UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES = 26, ///< ::ur_device_partition_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES = 27, ///< ::ur_kernel_arg_mem_obj_properties_t - UR_STRUCTURE_TYPE_PHYSICAL_MEM_PROPERTIES = 28, ///< ::ur_physical_mem_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_POINTER_PROPERTIES = 29, ///< ::ur_kernel_arg_pointer_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_SAMPLER_PROPERTIES = 30, ///< ::ur_kernel_arg_sampler_properties_t - UR_STRUCTURE_TYPE_KERNEL_EXEC_INFO_PROPERTIES = 31, ///< ::ur_kernel_exec_info_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_VALUE_PROPERTIES = 32, ///< ::ur_kernel_arg_value_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_LOCAL_PROPERTIES = 33, ///< ::ur_kernel_arg_local_properties_t - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC = 0x1000, ///< ::ur_exp_command_buffer_desc_t - UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES = 0x2000, ///< ::ur_exp_sampler_mip_properties_t - UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC = 0x2001, ///< ::ur_exp_interop_mem_desc_t - UR_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC = 0x2002, ///< ::ur_exp_interop_semaphore_desc_t - UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR = 0x2003, ///< ::ur_exp_file_descriptor_t - UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE = 0x2004, ///< ::ur_exp_win32_handle_t - UR_STRUCTURE_TYPE_EXP_LAYERED_IMAGE_PROPERTIES = 0x2005, ///< ::ur_exp_layered_image_properties_t - UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES = 0x2006, ///< ::ur_exp_sampler_addr_modes_t + UR_STRUCTURE_TYPE_CONTEXT_PROPERTIES = 0, ///< ::ur_context_properties_t + UR_STRUCTURE_TYPE_IMAGE_DESC = 1, ///< ::ur_image_desc_t + UR_STRUCTURE_TYPE_BUFFER_PROPERTIES = 2, ///< ::ur_buffer_properties_t + UR_STRUCTURE_TYPE_BUFFER_REGION = 3, ///< ::ur_buffer_region_t + UR_STRUCTURE_TYPE_BUFFER_CHANNEL_PROPERTIES = 4, ///< ::ur_buffer_channel_properties_t + UR_STRUCTURE_TYPE_BUFFER_ALLOC_LOCATION_PROPERTIES = 5, ///< ::ur_buffer_alloc_location_properties_t + UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES = 6, ///< ::ur_program_properties_t + UR_STRUCTURE_TYPE_USM_DESC = 7, ///< ::ur_usm_desc_t + UR_STRUCTURE_TYPE_USM_HOST_DESC = 8, ///< ::ur_usm_host_desc_t + UR_STRUCTURE_TYPE_USM_DEVICE_DESC = 9, ///< ::ur_usm_device_desc_t + UR_STRUCTURE_TYPE_USM_POOL_DESC = 10, ///< ::ur_usm_pool_desc_t + UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC = 11, ///< ::ur_usm_pool_limits_desc_t + UR_STRUCTURE_TYPE_DEVICE_BINARY = 12, ///< ::ur_device_binary_t + UR_STRUCTURE_TYPE_SAMPLER_DESC = 13, ///< ::ur_sampler_desc_t + UR_STRUCTURE_TYPE_QUEUE_PROPERTIES = 14, ///< ::ur_queue_properties_t + UR_STRUCTURE_TYPE_QUEUE_INDEX_PROPERTIES = 15, ///< ::ur_queue_index_properties_t + UR_STRUCTURE_TYPE_CONTEXT_NATIVE_PROPERTIES = 16, ///< ::ur_context_native_properties_t + UR_STRUCTURE_TYPE_KERNEL_NATIVE_PROPERTIES = 17, ///< ::ur_kernel_native_properties_t + UR_STRUCTURE_TYPE_QUEUE_NATIVE_PROPERTIES = 18, ///< ::ur_queue_native_properties_t + UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES = 19, ///< ::ur_mem_native_properties_t + UR_STRUCTURE_TYPE_EVENT_NATIVE_PROPERTIES = 20, ///< ::ur_event_native_properties_t + UR_STRUCTURE_TYPE_PLATFORM_NATIVE_PROPERTIES = 21, ///< ::ur_platform_native_properties_t + UR_STRUCTURE_TYPE_DEVICE_NATIVE_PROPERTIES = 22, ///< ::ur_device_native_properties_t + UR_STRUCTURE_TYPE_PROGRAM_NATIVE_PROPERTIES = 23, ///< ::ur_program_native_properties_t + UR_STRUCTURE_TYPE_SAMPLER_NATIVE_PROPERTIES = 24, ///< ::ur_sampler_native_properties_t + UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC = 25, ///< ::ur_queue_native_desc_t + UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES = 26, ///< ::ur_device_partition_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES = 27, ///< ::ur_kernel_arg_mem_obj_properties_t + UR_STRUCTURE_TYPE_PHYSICAL_MEM_PROPERTIES = 28, ///< ::ur_physical_mem_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_POINTER_PROPERTIES = 29, ///< ::ur_kernel_arg_pointer_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_SAMPLER_PROPERTIES = 30, ///< ::ur_kernel_arg_sampler_properties_t + UR_STRUCTURE_TYPE_KERNEL_EXEC_INFO_PROPERTIES = 31, ///< ::ur_kernel_exec_info_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_VALUE_PROPERTIES = 32, ///< ::ur_kernel_arg_value_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_LOCAL_PROPERTIES = 33, ///< ::ur_kernel_arg_local_properties_t + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC = 0x1000, ///< ::ur_exp_command_buffer_desc_t + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC = 0x1001, ///< ::ur_exp_command_buffer_update_kernel_launch_desc_t + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC = 0x1002, ///< ::ur_exp_command_buffer_update_memobj_arg_desc_t + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC = 0x1003, ///< ::ur_exp_command_buffer_update_pointer_arg_desc_t + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC = 0x1004, ///< ::ur_exp_command_buffer_update_exec_info_desc_t + UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES = 0x2000, ///< ::ur_exp_sampler_mip_properties_t + UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC = 0x2001, ///< ::ur_exp_interop_mem_desc_t + UR_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC = 0x2002, ///< ::ur_exp_interop_semaphore_desc_t + UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR = 0x2003, ///< ::ur_exp_file_descriptor_t + UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE = 0x2004, ///< ::ur_exp_win32_handle_t + UR_STRUCTURE_TYPE_EXP_LAYERED_IMAGE_PROPERTIES = 0x2005, ///< ::ur_exp_layered_image_properties_t + UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES = 0x2006, ///< ::ur_exp_sampler_addr_modes_t /// @cond UR_STRUCTURE_TYPE_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -483,6 +488,7 @@ typedef enum ur_result_t { UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP = 0x1000, ///< Invalid Command-Buffer UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP = 0x1001, ///< Sync point is not valid for the command-buffer UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP = 0x1002, ///< Sync point wait list is invalid + UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP = 0x1003, ///< Handle to command-buffer command is invalid UR_RESULT_ERROR_UNKNOWN = 0x7ffffffe, ///< Unknown or internal error /// @cond UR_RESULT_FORCE_UINT32 = 0x7fffffff @@ -1530,6 +1536,10 @@ typedef enum ur_device_info_t { ///< version than older devices. UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT = 114, ///< [::ur_bool_t] return true if the device supports virtual memory. UR_DEVICE_INFO_ESIMD_SUPPORT = 115, ///< [::ur_bool_t] return true if the device supports ESIMD. + UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP = 0x1000, ///< [::ur_bool_t] returns true if the device supports the use of + ///< command-buffers. + UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP = 0x1001, ///< [::ur_bool_t] returns true if the device supports updating the + ///< commands in a command-buffer. UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP = 0x2000, ///< [::ur_bool_t] returns true if the device supports the creation of ///< bindless images UR_DEVICE_INFO_BINDLESS_IMAGES_SHARED_USM_SUPPORT_EXP = 0x2001, ///< [::ur_bool_t] returns true if the device supports the creation of @@ -7751,9 +7761,73 @@ typedef struct ur_exp_command_buffer_desc_t { ur_structure_type_t stype; ///< [in] type of this structure, must be ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC const void *pNext; ///< [in][optional] pointer to extension-specific structure + ur_bool_t isUpdatable; ///< [in] Commands in a finalized command-buffer can be updated. } ur_exp_command_buffer_desc_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Descriptor type for updating a kernel command memobj argument. +typedef struct ur_exp_command_buffer_update_memobj_arg_desc_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC + const void *pNext; ///< [in][optional] pointer to extension-specific structure + uint32_t argIndex; ///< [in] Argument index. + const ur_kernel_arg_mem_obj_properties_t *pProperties; ///< [in][optinal] Pointer to memory object properties. + ur_mem_handle_t hArgValue; ///< [in][optional] Handle of memory object. + +} ur_exp_command_buffer_update_memobj_arg_desc_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Descriptor type for updating a kernel command pointer argument. +typedef struct ur_exp_command_buffer_update_pointer_arg_desc_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC + const void *pNext; ///< [in][optional] pointer to extension-specific structure + uint32_t argIndex; ///< [in] Argument index. + const ur_kernel_arg_pointer_properties_t *pProperties; ///< [in][optinal] Pointer to USM pointer properties. + const void *pArgValue; ///< [in][optional] USM pointer to memory location holding the argument + ///< value. + +} ur_exp_command_buffer_update_pointer_arg_desc_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Descriptor type for updating kernel command execution info. +typedef struct ur_exp_command_buffer_update_exec_info_desc_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC + const void *pNext; ///< [in][optional] pointer to extension-specific structure + ur_kernel_exec_info_t propName; ///< [in] Name of execution attribute. + size_t propSize; ///< [in] Size of execution attribute. + const ur_kernel_exec_info_properties_t *pProperties; ///< [in][optional] Pointer to execution info properties. + const void *pPropValue; ///< [in] Pointer to memory location holding the property value. + +} ur_exp_command_buffer_update_exec_info_desc_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Descriptor type for updating a kernel launch command. +typedef struct ur_exp_command_buffer_update_kernel_launch_desc_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC + const void *pNext; ///< [in][optional] pointer to extension-specific structure + uint32_t numMemobjArgs; ///< [in] Length of pArgMemobjList. + uint32_t numPointerArgs; ///< [in] Length of pArgPointerList. + uint32_t numExecInfos; ///< [in] Length of pExecInfoList. + uint32_t workDim; ///< [in] Number of work dimensions in the kernel ND-range, from 1-3. + const ur_exp_command_buffer_update_memobj_arg_desc_t *pArgMemobjList; ///< [in] An array describing the new kernel mem obj arguments for the + ///< command. + const ur_exp_command_buffer_update_pointer_arg_desc_t *pArgPointerList; ///< [in] An array describing the new kernel pointer arguments for the + ///< command. + const ur_exp_command_buffer_update_exec_info_desc_t *pArgExecInfoList; ///< [in] An array describing the execution info objects for the command. + size_t *pGlobalWorkOffset; ///< [in] Array of workDim unsigned values that describe the offset used to + ///< calculate the global ID. + size_t *pGlobalWorkSize; ///< [in] Array of workDim unsigned values that describe the number of + ///< global work-items. + size_t *pLocalWorkSize; ///< [in] Array of workDim unsigned values that describe the number of + ///< work-items that make up a work-group. If nullptr, the runtime + ///< implementation will choose the work-group size. + +} ur_exp_command_buffer_update_kernel_launch_desc_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief A value that identifies a command inside of a command-buffer, used for /// defining dependencies between commands in the same command-buffer. @@ -7763,11 +7837,15 @@ typedef uint32_t ur_exp_command_buffer_sync_point_t; /// @brief Handle of Command-Buffer object typedef struct ur_exp_command_buffer_handle_t_ *ur_exp_command_buffer_handle_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Handle of a Command-Buffer command +typedef struct ur_exp_command_buffer_command_handle_t_ *ur_exp_command_buffer_command_handle_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Create a Command-Buffer object /// /// @details -/// - Create a command-buffer object +/// - Create a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7785,10 +7863,10 @@ typedef struct ur_exp_command_buffer_handle_t_ *ur_exp_command_buffer_handle_t; /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object - const ur_exp_command_buffer_desc_t *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor - ur_exp_command_buffer_handle_t *phCommandBuffer ///< [out] pointer to Command-Buffer handle + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. + const ur_exp_command_buffer_desc_t *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. + ur_exp_command_buffer_handle_t *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ); /////////////////////////////////////////////////////////////////////////////// @@ -7806,7 +7884,7 @@ urCommandBufferCreateExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainExp( - ur_exp_command_buffer_handle_t hCommandBuffer ///< [in] handle of the command-buffer object + ur_exp_command_buffer_handle_t hCommandBuffer ///< [in] Handle of the command-buffer object. ); /////////////////////////////////////////////////////////////////////////////// @@ -7825,7 +7903,7 @@ urCommandBufferRetainExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseExp( - ur_exp_command_buffer_handle_t hCommandBuffer ///< [in] handle of the command-buffer object + ur_exp_command_buffer_handle_t hCommandBuffer ///< [in] Handle of the command-buffer object. ); /////////////////////////////////////////////////////////////////////////////// @@ -7844,11 +7922,11 @@ urCommandBufferReleaseExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferFinalizeExp( - ur_exp_command_buffer_handle_t hCommandBuffer ///< [in] handle of the command-buffer object + ur_exp_command_buffer_handle_t hCommandBuffer ///< [in] Handle of the command-buffer object. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a kernel execution command to a command-buffer object +/// @brief Append a kernel execution command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7875,19 +7953,20 @@ urCommandBufferFinalizeExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t *pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. const size_t *pLocalWorkSize, ///< [in] Local work size to use when executing kernel. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t *hCommand ///< [out][optional] Handle to this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM memcpy command to a command-buffer object +/// @brief Append a USM memcpy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7912,17 +7991,17 @@ urCommandBufferAppendKernelLaunchExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM fill command to a command-buffer object +/// @brief Append a USM fill command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7961,7 +8040,7 @@ urCommandBufferAppendUSMFillExp( ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory copy command to a command-buffer object +/// @brief Append a memory copy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7982,7 +8061,7 @@ urCommandBufferAppendUSMFillExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -7990,11 +8069,11 @@ urCommandBufferAppendMemBufferCopyExp( size_t size, ///< [in] The number of bytes to be copied. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory write command to a command-buffer object +/// @brief Append a memory write command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8016,18 +8095,18 @@ urCommandBufferAppendMemBufferCopyExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - const void *pSrc, ///< [in] pointer to host memory where data is to be written from. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + const void *pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory read command to a command-buffer object +/// @brief Append a memory read command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8049,18 +8128,18 @@ urCommandBufferAppendMemBufferWriteExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory copy command to a command-buffer object +/// @brief Append a rectangular memory copy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8081,7 +8160,7 @@ urCommandBufferAppendMemBufferReadExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t srcOrigin, ///< [in] Origin for the region of data to be copied from the source. @@ -8093,11 +8172,11 @@ urCommandBufferAppendMemBufferCopyRectExp( size_t dstSlicePitch, ///< [in] Slice pitch of the destination memory. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory write command to a command-buffer object +/// @brief Append a rectangular memory write command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8119,26 +8198,26 @@ urCommandBufferAppendMemBufferCopyRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. - size_t bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. - size_t bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + size_t bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. + size_t bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. - size_t hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + size_t hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. - size_t hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + size_t hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. - void *pSrc, ///< [in] pointer to host memory where data is to be written from. + void *pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory read command to a command-buffer object +/// @brief Append a rectangular memory read command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8160,25 +8239,25 @@ urCommandBufferAppendMemBufferWriteRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. - size_t bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. - size_t bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. - size_t hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + size_t bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. + size_t bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. + size_t hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. - size_t hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + size_t hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory fill command to a command-buffer object +/// @brief Append a memory fill command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8214,7 +8293,7 @@ urCommandBufferAppendMemBufferFillExp( ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM Prefetch command to a command-buffer object +/// @brief Append a USM Prefetch command to a command-buffer object. /// /// @details /// - Prefetching may not be supported for all devices or allocation types. @@ -8255,7 +8334,7 @@ urCommandBufferAppendUSMPrefetchExp( ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM Advise command to a command-buffer object +/// @brief Append a USM Advise command to a command-buffer object. /// /// @details /// - Not all memory advice hints may be supported for all devices or @@ -8317,17 +8396,54 @@ urCommandBufferAppendUSMAdviseExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_queue_handle_t hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_queue_handle_t hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t *phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. ); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Update a kernel launch command. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pUpdateKernelLaunch` +/// + `NULL == pUpdateKernelLaunch->pArgMemobjList` +/// + `NULL == pUpdateKernelLaunch->pArgPointerList` +/// + `NULL == pUpdateKernelLaunch->pArgExecInfoList` +/// + `NULL == pUpdateKernelLaunch->pGlobalWorkOffset` +/// + `NULL == pUpdateKernelLaunch->pGlobalWorkSize` +/// + `NULL == pUpdateKernelLaunch->pLocalWorkSize` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If update functionality is not supported by the device. +/// - ::UR_RESULT_ERROR_INVALID_OPERATION +/// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX +/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION +/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t *pUpdateKernelLaunch ///< [in] Handle of the command-buffer kernel command to update. +); + #if !defined(__GNUC__) #pragma endregion #endif @@ -10486,6 +10602,7 @@ typedef struct ur_command_buffer_append_kernel_launch_exp_params_t { uint32_t *pnumSyncPointsInWaitList; const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; ur_exp_command_buffer_sync_point_t **ppSyncPoint; + ur_exp_command_buffer_command_handle_t **phCommand; } ur_command_buffer_append_kernel_launch_exp_params_t; /////////////////////////////////////////////////////////////////////////////// @@ -10679,6 +10796,15 @@ typedef struct ur_command_buffer_enqueue_exp_params_t { ur_event_handle_t **pphEvent; } ur_command_buffer_enqueue_exp_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urCommandBufferUpdateKernelLaunchExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_command_buffer_update_kernel_launch_exp_params_t { + ur_exp_command_buffer_command_handle_t *phCommand; + const ur_exp_command_buffer_update_kernel_launch_desc_t **ppUpdateKernelLaunch; +} ur_command_buffer_update_kernel_launch_exp_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urUsmP2PEnablePeerAccessExp /// @details Each entry is a pointer to the parameter passed to the function; diff --git a/include/ur_ddi.h b/include/ur_ddi.h index 92fc742f72..f8962efab8 100644 --- a/include/ur_ddi.h +++ b/include/ur_ddi.h @@ -1853,7 +1853,8 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendKernelLaunchExp_t)( const size_t *, uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *); + ur_exp_command_buffer_sync_point_t *, + ur_exp_command_buffer_command_handle_t *); /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urCommandBufferAppendUSMMemcpyExp @@ -2010,6 +2011,12 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferEnqueueExp_t)( const ur_event_handle_t *, ur_event_handle_t *); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urCommandBufferUpdateKernelLaunchExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferUpdateKernelLaunchExp_t)( + ur_exp_command_buffer_command_handle_t, + const ur_exp_command_buffer_update_kernel_launch_desc_t *); + /////////////////////////////////////////////////////////////////////////////// /// @brief Table of CommandBufferExp functions pointers typedef struct ur_command_buffer_exp_dditable_t { @@ -2030,6 +2037,7 @@ typedef struct ur_command_buffer_exp_dditable_t { ur_pfnCommandBufferAppendUSMPrefetchExp_t pfnAppendUSMPrefetchExp; ur_pfnCommandBufferAppendUSMAdviseExp_t pfnAppendUSMAdviseExp; ur_pfnCommandBufferEnqueueExp_t pfnEnqueueExp; + ur_pfnCommandBufferUpdateKernelLaunchExp_t pfnUpdateKernelLaunchExp; } ur_command_buffer_exp_dditable_t; /////////////////////////////////////////////////////////////////////////////// diff --git a/include/ur_print.hpp b/include/ur_print.hpp index dc7442068c..1dda761310 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -58,6 +58,8 @@ template <> struct is_handle : std::true_type {}; template <> struct is_handle : std::true_type {}; +template <> +struct is_handle : std::true_type {}; template inline constexpr bool is_handle_v = is_handle::value; template @@ -319,6 +321,10 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_interop_semaphore_desc_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_layered_image_properties_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_memobj_arg_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_pointer_arg_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_exec_info_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_kernel_launch_desc_t params); inline std::ostream &operator<<(std::ostream &os, ur_exp_peer_info_t value); /////////////////////////////////////////////////////////////////////////////// @@ -822,6 +828,9 @@ inline std::ostream &operator<<(std::ostream &os, ur_function_t value) { case UR_FUNCTION_ADAPTER_GET_INFO: os << "UR_FUNCTION_ADAPTER_GET_INFO"; break; + case UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP"; + break; case UR_FUNCTION_PROGRAM_BUILD_EXP: os << "UR_FUNCTION_PROGRAM_BUILD_EXP"; break; @@ -996,6 +1005,18 @@ inline std::ostream &operator<<(std::ostream &os, ur_structure_type_t value) { case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC: os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC"; break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC: + os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC"; + break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC: + os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC"; + break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC: + os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC"; + break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC: + os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC"; + break; case UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES: os << "UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES"; break; @@ -1209,6 +1230,26 @@ inline ur_result_t printStruct(std::ostream &os, const void *ptr) { printPtr(os, pstruct); } break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC: { + const ur_exp_command_buffer_update_kernel_launch_desc_t *pstruct = (const ur_exp_command_buffer_update_kernel_launch_desc_t *)ptr; + printPtr(os, pstruct); + } break; + + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC: { + const ur_exp_command_buffer_update_memobj_arg_desc_t *pstruct = (const ur_exp_command_buffer_update_memobj_arg_desc_t *)ptr; + printPtr(os, pstruct); + } break; + + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC: { + const ur_exp_command_buffer_update_pointer_arg_desc_t *pstruct = (const ur_exp_command_buffer_update_pointer_arg_desc_t *)ptr; + printPtr(os, pstruct); + } break; + + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC: { + const ur_exp_command_buffer_update_exec_info_desc_t *pstruct = (const ur_exp_command_buffer_update_exec_info_desc_t *)ptr; + printPtr(os, pstruct); + } break; + case UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES: { const ur_exp_sampler_mip_properties_t *pstruct = (const ur_exp_sampler_mip_properties_t *)ptr; printPtr(os, pstruct); @@ -1472,6 +1513,9 @@ inline std::ostream &operator<<(std::ostream &os, ur_result_t value) { case UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP: os << "UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP"; break; + case UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP: + os << "UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP"; + break; case UR_RESULT_ERROR_UNKNOWN: os << "UR_RESULT_ERROR_UNKNOWN"; break; @@ -2401,6 +2445,12 @@ inline std::ostream &operator<<(std::ostream &os, ur_device_info_t value) { case UR_DEVICE_INFO_ESIMD_SUPPORT: os << "UR_DEVICE_INFO_ESIMD_SUPPORT"; break; + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: + os << "UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP"; + break; + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: + os << "UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP"; + break; case UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP: os << "UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP"; break; @@ -3809,6 +3859,30 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info os << ")"; } break; + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: { + const ur_bool_t *tptr = (const ur_bool_t *)ptr; + if (sizeof(ur_bool_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: { + const ur_bool_t *tptr = (const ur_bool_t *)ptr; + if (sizeof(ur_bool_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; case UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { @@ -9137,6 +9211,198 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_bu ur::details::printStruct(os, (params.pNext)); + os << ", "; + os << ".isUpdatable = "; + + os << (params.isUpdatable); + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_command_buffer_update_memobj_arg_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_buffer_update_memobj_arg_desc_t params) { + os << "(struct ur_exp_command_buffer_update_memobj_arg_desc_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, + (params.pNext)); + + os << ", "; + os << ".argIndex = "; + + os << (params.argIndex); + + os << ", "; + os << ".pProperties = "; + + os << (params.pProperties); + + os << ", "; + os << ".hArgValue = "; + + ur::details::printPtr(os, + (params.hArgValue)); + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_command_buffer_update_pointer_arg_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_buffer_update_pointer_arg_desc_t params) { + os << "(struct ur_exp_command_buffer_update_pointer_arg_desc_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, + (params.pNext)); + + os << ", "; + os << ".argIndex = "; + + os << (params.argIndex); + + os << ", "; + os << ".pProperties = "; + + os << (params.pProperties); + + os << ", "; + os << ".pArgValue = "; + + os << (params.pArgValue); + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_command_buffer_update_exec_info_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_buffer_update_exec_info_desc_t params) { + os << "(struct ur_exp_command_buffer_update_exec_info_desc_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, + (params.pNext)); + + os << ", "; + os << ".propName = "; + + os << (params.propName); + + os << ", "; + os << ".propSize = "; + + os << (params.propSize); + + os << ", "; + os << ".pProperties = "; + + os << (params.pProperties); + + os << ", "; + os << ".pPropValue = "; + + os << (params.pPropValue); + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_command_buffer_update_kernel_launch_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_buffer_update_kernel_launch_desc_t params) { + os << "(struct ur_exp_command_buffer_update_kernel_launch_desc_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, + (params.pNext)); + + os << ", "; + os << ".numMemobjArgs = "; + + os << (params.numMemobjArgs); + + os << ", "; + os << ".numPointerArgs = "; + + os << (params.numPointerArgs); + + os << ", "; + os << ".numExecInfos = "; + + os << (params.numExecInfos); + + os << ", "; + os << ".workDim = "; + + os << (params.workDim); + + os << ", "; + os << ".pArgMemobjList = "; + + ur::details::printPtr(os, + (params.pArgMemobjList)); + + os << ", "; + os << ".pArgPointerList = "; + + ur::details::printPtr(os, + (params.pArgPointerList)); + + os << ", "; + os << ".pArgExecInfoList = "; + + ur::details::printPtr(os, + (params.pArgExecInfoList)); + + os << ", "; + os << ".pGlobalWorkOffset = "; + + ur::details::printPtr(os, + (params.pGlobalWorkOffset)); + + os << ", "; + os << ".pGlobalWorkSize = "; + + ur::details::printPtr(os, + (params.pGlobalWorkSize)); + + os << ", "; + os << ".pLocalWorkSize = "; + + ur::details::printPtr(os, + (params.pLocalWorkSize)); + os << "}"; return os; } @@ -14534,6 +14800,12 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur::details::printPtr(os, *(params->ppSyncPoint)); + os << ", "; + os << ".hCommand = "; + + ur::details::printPtr(os, + *(params->phCommand)); + return os; } @@ -15233,6 +15505,26 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct return os; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_command_buffer_update_kernel_launch_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_command_buffer_update_kernel_launch_exp_params_t *params) { + + os << ".hCommand = "; + + ur::details::printPtr(os, + *(params->phCommand)); + + os << ", "; + os << ".pUpdateKernelLaunch = "; + + ur::details::printPtr(os, + *(params->ppUpdateKernelLaunch)); + + return os; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_usm_p2p_enable_peer_access_exp_params_t type /// @returns @@ -16367,6 +16659,9 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, ur_function_ case UR_FUNCTION_COMMAND_BUFFER_ENQUEUE_EXP: { os << (const struct ur_command_buffer_enqueue_exp_params_t *)params; } break; + case UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP: { + os << (const struct ur_command_buffer_update_kernel_launch_exp_params_t *)params; + } break; case UR_FUNCTION_USM_P2P_ENABLE_PEER_ACCESS_EXP: { os << (const struct ur_usm_p2p_enable_peer_access_exp_params_t *)params; } break; diff --git a/scripts/core/EXP-COMMAND-BUFFER.rst b/scripts/core/EXP-COMMAND-BUFFER.rst index a6a32a66a1..823d186ec2 100644 --- a/scripts/core/EXP-COMMAND-BUFFER.rst +++ b/scripts/core/EXP-COMMAND-BUFFER.rst @@ -57,24 +57,29 @@ returned list of supported extensions. ${x}DeviceGetInfo(hDevice, ${X}_DEVICE_INFO_EXTENSIONS, 0, nullptr, &returnedSize); - // Retrieve extension string + // Retrieve extension string std::unique_ptr returnedExtensions(new char[returnedSize]); - ${x}DeviceGetInfo(hDevice, ${X}_DEVICE_INFO_EXTENSIONS, returnedSize, + ${x}DeviceGetInfo(hDevice, ${X}_DEVICE_INFO_EXTENSIONS, returnedSize, returnedExtensions.get(), nullptr); - + std::string_view ExtensionsString(returnedExtensions.get()); - bool CmdBufferSupport = + bool CmdBufferSupport = ExtensionsString.find(${X}_COMMAND_BUFFER_EXTENSION_STRING_EXP) != std::string::npos; +.. note:: + The ${X}_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP device info query exists to + serve the same purpose as ${X}_COMMAND_BUFFER_EXTENSION_STRING_EXP. + Command-Buffer Creation -------------------------------------------------------------------------------- Command-Buffers are tied to a specific ${x}_context_handle_t and ${x}_device_handle_t. ${x}CommandBufferCreateExp optionally takes a descriptor to provide additional properties for how the command-buffer should be -constructed. There are currently no unique members defined for -${x}_exp_command_buffer_desc_t, however they may be added in the future. +constructed. The only unique member defined in ${x}_exp_command_buffer_desc_t +is ``isUpdatable``, which should be set to ``true`` to support :ref:`updating +command-buffer commands`. Command-buffers are reference counted and can be retained and released by calling ${x}CommandBufferRetainExp and ${x}CommandBufferReleaseExp respectively. @@ -89,6 +94,11 @@ However, they differ in that they take a command-buffer handle instead of a queue handle, and the dependencies and return parameters are sync-points instead of event handles. +The entry-point for appending a kernel launch command also returns an optional +handle to the command being appended. Returning this handle does not extend the +lifetime of the parent command-buffer, and using the handle after the +command-buffer has been destroyed is invalid behaviour. + Currently only the following commands are supported: * ${x}CommandBufferAppendKernelLaunchExp @@ -103,9 +113,9 @@ Currently only the following commands are supported: * ${x}CommandBufferAppendMemBufferFillExp * ${x}CommandBufferAppendUSMPrefetchExp * ${x}CommandBufferAppendUSMAdviseExp - + It is planned to eventually support any command type from the Core API which can -actually be appended to the equiavalent adapter native constructs. +actually be appended to the equivalent adapter native constructs. Sync-Points -------------------------------------------------------------------------------- @@ -122,15 +132,15 @@ were obtained from. // Append a memcpy with no sync-point dependencies ${x}_exp_command_buffer_sync_point_t syncPoint; - ${x}CommandBufferAppendUSMMemcpyExp(hCommandBuffer, pDst, pSrc, size, 0, + ${x}CommandBufferAppendUSMMemcpyExp(hCommandBuffer, pDst, pSrc, size, 0, nullptr, &syncPoint); - + // Append a kernel launch with syncPoint as a dependency, ignore returned // sync-point - ${x}CommandBufferAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, - pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, 1, &syncPoint, - nullptr); + ${x}CommandBufferAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, + pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize, 1, &syncPoint, + nullptr, nullptr); Enqueueing Command-Buffers -------------------------------------------------------------------------------- @@ -147,6 +157,79 @@ enqueued or executed simultaneously, and submissions may be serialized. ${x}CommandBufferEnqueueExp(hCommandBuffer, hQueue, 0, nullptr, &executionEvent); +Updating Command-Buffers Commands +-------------------------------------------------------------------------------- + +An adapter implementing the command-buffer experimental feature can optionally +support updating the configuration of kernel commands recorded to a +command-buffer. Support of this is reported by returning true in the +${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP query. + +Updating a kernel commands is done by passing the new kernel configuration +to ${x}CommandBufferUpdateKernelLaunchExp along with the command handle of +the kernel command to update. Configurations that can be changed are the +kernels ND-Range and parameters. + +.. parsed-literal:: + + // Create a command-buffer with update enabled. + ${x}_exp_command_buffer_desc_t desc { + ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, + nullptr, + true + }; + ${x}_exp_command_buffer_command_handle_t handle; + ${x}CommandBufferCreateExp(hContext, hDevice, &desc, &handle); + + // Append a kernel command which has two buffer parameters, an input + // and an output. + ${x}CommandBufferAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, + pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize, 0, nullptr, + nullptr, &handle); + + // Close the command-buffer before updating + ${x}CommandBufferFinalizeExp(hCommandBuffer); + + // Define kernel argument at index 0 to be a new input buffer object + ${x}_exp_command_buffer_update_memobj_arg_desc_t newInputArg { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG, // stype + nullptr, // pNext, + 0, // argIndex, + nullptr, // pProperties + newInputBuffer, // hArgValue + }; + + // Define kernel argument at index 1 to be a new output buffer object + ${x}_exp_command_buffer_update_memobj_arg_desc_t newOutputArg { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG, // stype + nullptr, // pNext, + 1, // argIndex, + nullptr, // pProperties + newOutputBuffer, // hArgValue + }; + + // Define the new configuration of the kernel command + ${x}_exp_command_buffer_update_memobj_arg_desc_t updatedArgs[2] = {newInputArg, newOutputArg}; + ${x}_exp_command_buffer_update_kernel_launch_desc_t update { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 2, // numMemobjArgs + 0, // numPointerArgs + 0, // numExecInfos + 0, // workDim; + new_args, // pArgMemobjList + nullptr, // pArgPointerList + nullptr, // pArgExecInfoList + nullptr, // pGlobalWorkOffset + nullptr, // pGlobalWorkSize + nullptr, // pLocalWorkSize + }; + + // Perform the update + ${x}CommandBufferUpdateKernelLaunchExp(handle, &update); + + API -------------------------------------------------------------------------------- @@ -156,12 +239,20 @@ Macros Enums ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +* ${x}_device_info_t + * ${X}_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP + * ${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP * ${x}_result_t * ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP * ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP * ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP + * ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP * ${x}_structure_type_t * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC + * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC + * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC + * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC + * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC * ${x}_command_t * ${X}_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP * ${x}_function_t @@ -182,15 +273,18 @@ Enums * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_USM_ADVISE_EXP - - + * ${X}_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP Types ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * ${x}_exp_command_buffer_desc_t +* ${x}_exp_command_buffer_update_kernel_launch_desc_t +* ${x}_exp_command_buffer_update_memobj_arg_desc_t +* ${x}_exp_command_buffer_update_pointer_arg_desc_t +* ${x}_exp_command_buffer_update_exec_info_desc_t * ${x}_exp_command_buffer_sync_point_t * ${x}_exp_command_buffer_handle_t - +* ${x}_exp_command_buffer_command_handle_t Functions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -211,6 +305,7 @@ Functions * ${x}CommandBufferAppendUSMPrefetchExp * ${x}CommandBufferAppendUSMAdviseExp * ${x}CommandBufferEnqueueExp +* ${x}CommandBufferUpdateKernelLaunchExp Changelog -------------------------------------------------------------------------------- @@ -227,6 +322,8 @@ Changelog | 1.3 | Add function definitions for Prefetch and Advise | | | commands | +-----------+-------------------------------------------------------+ +| 1.4 | A function definitions for kernel command update | ++-----------+-------------------------------------------------------+ Contributors -------------------------------------------------------------------------------- diff --git a/scripts/core/exp-command-buffer.yml b/scripts/core/exp-command-buffer.yml index 7d1b686aab..4257713c69 100644 --- a/scripts/core/exp-command-buffer.yml +++ b/scripts/core/exp-command-buffer.yml @@ -11,6 +11,20 @@ type: header desc: "Intel $OneApi Unified Runtime Experimental APIs for Command-Buffers" ordinal: "99" +--- #-------------------------------------------------------------------------- +type: enum +extend: true +typed_etors: true +desc: "Extension enums to $x_device_info_t to support command-buffers." +name: $x_device_info_t +etors: + - name: COMMAND_BUFFER_SUPPORT_EXP + value: "0x1000" + desc: "[$x_bool_t] returns true if the device supports the use of command-buffers." + - name: COMMAND_BUFFER_UPDATE_SUPPORT_EXP + value: "0x1001" + desc: "[$x_bool_t] returns true if the device supports updating the commands in a command-buffer." + --- #-------------------------------------------------------------------------- type: enum extend: true @@ -26,6 +40,9 @@ etors: - name: ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP value: "0x1002" desc: "Sync point wait list is invalid" + - name: ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP + value: "0x1003" + desc: "Handle to command-buffer command is invalid" --- #-------------------------------------------------------------------------- type: enum extend: true @@ -35,6 +52,19 @@ etors: - name: EXP_COMMAND_BUFFER_DESC desc: $x_exp_command_buffer_desc_t value: "0x1000" + - name: EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC + desc: $x_exp_command_buffer_update_kernel_launch_desc_t + value: "0x1001" + - name: EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC + desc: $x_exp_command_buffer_update_memobj_arg_desc_t + value: "0x1002" + - name: EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC + desc: $x_exp_command_buffer_update_pointer_arg_desc_t + value: "0x1003" + - name: EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC + desc: $x_exp_command_buffer_update_exec_info_desc_t + value: "0x1004" + --- #-------------------------------------------------------------------------- type: enum extend: true @@ -54,7 +84,95 @@ type: struct desc: "Command-Buffer Descriptor Type" name: $x_exp_command_buffer_desc_t base: $x_base_desc_t -members: [] +members: + - type: $x_bool_t + name: isUpdatable + desc: "[in] Commands in a finalized command-buffer can be updated." +--- #-------------------------------------------------------------------------- +type: struct +desc: "Descriptor type for updating a kernel command memobj argument." +base: $x_base_desc_t +name: $x_exp_command_buffer_update_memobj_arg_desc_t +members: + - type: uint32_t + name: argIndex + desc: "[in] Argument index." + - type: "const ur_kernel_arg_mem_obj_properties_t *" + name: pProperties + desc: "[in][optinal] Pointer to memory object properties." + - type: $x_mem_handle_t + name: hArgValue + desc: "[in][optional] Handle of memory object." +--- #-------------------------------------------------------------------------- +type: struct +desc: "Descriptor type for updating a kernel command pointer argument." +base: $x_base_desc_t +name: $x_exp_command_buffer_update_pointer_arg_desc_t +members: + - type: uint32_t + name: argIndex + desc: "[in] Argument index." + - type: "const ur_kernel_arg_pointer_properties_t *" + name: pProperties + desc: "[in][optinal] Pointer to USM pointer properties." + - type: "const void *" + name: pArgValue + desc: "[in][optional] USM pointer to memory location holding the argument value." +--- #-------------------------------------------------------------------------- +type: struct +desc: "Descriptor type for updating kernel command execution info." +base: $x_base_desc_t +name: $x_exp_command_buffer_update_exec_info_desc_t +members: + - type: ur_kernel_exec_info_t + name: propName + desc: "[in] Name of execution attribute." + - type: size_t + name: propSize + desc: "[in] Size of execution attribute." + - type: "const ur_kernel_exec_info_properties_t *" + name: pProperties + desc: "[in][optional] Pointer to execution info properties." + - type: "const void *" + name: pPropValue + desc: "[in] Pointer to memory location holding the property value." + +--- #-------------------------------------------------------------------------- +type: struct +desc: "Descriptor type for updating a kernel launch command." +base: $x_base_desc_t +name: $x_exp_command_buffer_update_kernel_launch_desc_t +members: + - type: uint32_t + name: numMemobjArgs + desc: "[in] Length of pArgMemobjList." + - type: uint32_t + name: numPointerArgs + desc: "[in] Length of pArgPointerList." + - type: uint32_t + name: numExecInfos + desc: "[in] Length of pExecInfoList." + - type: uint32_t + name: workDim + desc: "[in] Number of work dimensions in the kernel ND-range, from 1-3." + - type: "const $x_exp_command_buffer_update_memobj_arg_desc_t*" + name: pArgMemobjList + desc: "[in] An array describing the new kernel mem obj arguments for the command." + - type: "const $x_exp_command_buffer_update_pointer_arg_desc_t*" + name: pArgPointerList + desc: "[in] An array describing the new kernel pointer arguments for the command." + - type: "const $x_exp_command_buffer_update_exec_info_desc_t*" + name: pArgExecInfoList + desc: "[in] An array describing the execution info objects for the command." + - type: "size_t*" + name: pGlobalWorkOffset + desc: "[in] Array of workDim unsigned values that describe the offset used to calculate the global ID." + - type: "size_t*" + name: pGlobalWorkSize + desc: "[in] Array of workDim unsigned values that describe the number of global work-items." + - type: "size_t*" + name: pLocalWorkSize + desc: "[in] Array of workDim unsigned values that describe the number of work-items that make up a work-group. If nullptr, the runtime implementation will choose the work-group size." --- #-------------------------------------------------------------------------- type: typedef desc: "A value that identifies a command inside of a command-buffer, used for defining dependencies between commands in the same command-buffer." @@ -67,26 +185,31 @@ desc: "Handle of Command-Buffer object" class: $xCommandBuffer name: "$x_exp_command_buffer_handle_t" --- #-------------------------------------------------------------------------- +type: handle +desc: "Handle of a Command-Buffer command" +class: $xCommandBuffer +name: "$x_exp_command_buffer_command_handle_t" +--- #-------------------------------------------------------------------------- type: function desc: "Create a Command-Buffer object" class: $xCommandBuffer name: CreateExp decl: static details: - - "Create a command-buffer object" + - "Create a command-buffer object." params: - type: $x_context_handle_t name: hContext - desc: "[in] handle of the context object" + desc: "[in] Handle of the context object." - type: $x_device_handle_t name: hDevice - desc: "[in] handle of the device object" + desc: "[in] Handle of the device object." - type: "const $x_exp_command_buffer_desc_t*" name: pCommandBufferDesc - desc: "[in][optional] CommandBuffer descriptor" + desc: "[in][optional] command-buffer descriptor." - type: "$x_exp_command_buffer_handle_t*" name: phCommandBuffer - desc: "[out] pointer to Command-Buffer handle" + desc: "[out] Pointer to command-Buffer handle." returns: - $X_RESULT_ERROR_INVALID_CONTEXT - $X_RESULT_ERROR_INVALID_DEVICE @@ -100,7 +223,7 @@ name: RetainExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object" + desc: "[in] Handle of the command-buffer object." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_OUT_OF_RESOURCES @@ -113,7 +236,7 @@ name: ReleaseExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object" + desc: "[in] Handle of the command-buffer object." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_OUT_OF_RESOURCES @@ -126,26 +249,26 @@ name: FinalizeExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object" + desc: "[in] Handle of the command-buffer object." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a kernel execution command to a command-buffer object" +desc: "Append a kernel execution command to a command-buffer object." class: $xCommandBuffer name: AppendKernelLaunchExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object" + desc: "[in] Handle of the command-buffer object." - type: $x_kernel_handle_t name: hKernel - desc: "[in] kernel to append" + desc: "[in] Kernel to append." - type: uint32_t name: workDim - desc: "[in] dimension of the kernel execution" + desc: "[in] Dimension of the kernel execution." - type: "const size_t*" name: pGlobalWorkOffset desc: "[in] Offset to use when executing kernel." @@ -163,7 +286,11 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." + - type: "$x_exp_command_buffer_command_handle_t*" + name: phCommand + desc: "[out][optional] Handle to this command." + returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_KERNEL @@ -178,13 +305,13 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a USM memcpy command to a command-buffer object" +desc: "Append a USM memcpy command to a command-buffer object." class: $xCommandBuffer name: AppendUSMMemcpyExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: "void*" name: pDst desc: "[in] Location the data will be copied to." @@ -193,7 +320,7 @@ params: desc: "[in] The data to be copied." - type: "size_t" name: size - desc: "[in] The number of bytes to copy" + desc: "[in] The number of bytes to copy." - type: uint32_t name: numSyncPointsInWaitList desc: "[in] The number of sync points in the provided dependency list." @@ -202,7 +329,8 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." + returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_SIZE: @@ -217,7 +345,7 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a USM fill command to a command-buffer object" +desc: "Append a USM fill command to a command-buffer object." class: $xCommandBuffer name: AppendUSMFillExp params: @@ -262,13 +390,13 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a memory copy command to a command-buffer object" +desc: "Append a memory copy command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferCopyExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_mem_handle_t name: hSrcMem desc: "[in] The data to be copied." @@ -292,7 +420,7 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -304,25 +432,25 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a memory write command to a command-buffer object" +desc: "Append a memory write command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferWriteExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_mem_handle_t name: hBuffer - desc: "[in] handle of the buffer object." + desc: "[in] Handle of the buffer object." - type: "size_t" name: offset - desc: "[in] offset in bytes in the buffer object." + desc: "[in] Offset in bytes in the buffer object." - type: "size_t" name: size - desc: "[in] size in bytes of data being written." + desc: "[in] Size in bytes of data being written." - type: "const void*" name: pSrc - desc: "[in] pointer to host memory where data is to be written from." + desc: "[in] Pointer to host memory where data is to be written from." - type: uint32_t name: numSyncPointsInWaitList desc: "[in] The number of sync points in the provided dependency list." @@ -331,7 +459,7 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -343,25 +471,25 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a memory read command to a command-buffer object" +desc: "Append a memory read command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferReadExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_mem_handle_t name: hBuffer - desc: "[in] handle of the buffer object." + desc: "[in] Handle of the buffer object." - type: "size_t" name: offset - desc: "[in] offset in bytes in the buffer object." + desc: "[in] Offset in bytes in the buffer object." - type: "size_t" name: size - desc: "[in] size in bytes of data being written." + desc: "[in] Size in bytes of data being written." - type: "void*" name: pDst - desc: "[in] pointer to host memory where data is to be written to." + desc: "[in] Pointer to host memory where data is to be written to." - type: uint32_t name: numSyncPointsInWaitList desc: "[in] The number of sync points in the provided dependency list." @@ -370,7 +498,7 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -382,13 +510,13 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a rectangular memory copy command to a command-buffer object" +desc: "Append a rectangular memory copy command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferCopyRectExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_mem_handle_t name: hSrcMem desc: "[in] The data to be copied." @@ -424,7 +552,7 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: $x_exp_command_buffer_sync_point_t* name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -436,16 +564,16 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a rectangular memory write command to a command-buffer object" +desc: "Append a rectangular memory write command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferWriteRectExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_mem_handle_t name: hBuffer - desc: "[in] handle of the buffer object." + desc: "[in] Handle of the buffer object." - type: $x_rect_offset_t name: bufferOffset desc: "[in] 3D offset in the buffer." @@ -457,19 +585,19 @@ params: desc: "[in] 3D rectangular region descriptor: width, height, depth." - type: "size_t" name: bufferRowPitch - desc: "[in] length of each row in bytes in the buffer object." + desc: "[in] Length of each row in bytes in the buffer object." - type: "size_t" name: bufferSlicePitch - desc: "[in] length of each 2D slice in bytes in the buffer object being written." + desc: "[in] Length of each 2D slice in bytes in the buffer object being written." - type: "size_t" name: hostRowPitch - desc: "[in] length of each row in bytes in the host memory region pointed to by pSrc." + desc: "[in] Length of each row in bytes in the host memory region pointed to by pSrc." - type: "size_t" name: hostSlicePitch - desc: "[in] length of each 2D slice in bytes in the host memory region pointed to by pSrc." + desc: "[in] Length of each 2D slice in bytes in the host memory region pointed to by pSrc." - type: "void*" name: pSrc - desc: "[in] pointer to host memory where data is to be written from." + desc: "[in] Pointer to host memory where data is to be written from." - type: uint32_t name: numSyncPointsInWaitList desc: "[in] The number of sync points in the provided dependency list." @@ -478,7 +606,7 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: $x_exp_command_buffer_sync_point_t* name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -490,16 +618,16 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a rectangular memory read command to a command-buffer object" +desc: "Append a rectangular memory read command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferReadRectExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_mem_handle_t name: hBuffer - desc: "[in] handle of the buffer object." + desc: "[in] Handle of the buffer object." - type: $x_rect_offset_t name: bufferOffset desc: "[in] 3D offset in the buffer." @@ -511,19 +639,19 @@ params: desc: "[in] 3D rectangular region descriptor: width, height, depth." - type: "size_t" name: bufferRowPitch - desc: "[in] length of each row in bytes in the buffer object." + desc: "[in] Length of each row in bytes in the buffer object." - type: "size_t" name: bufferSlicePitch - desc: "[in] length of each 2D slice in bytes in the buffer object being read." + desc: "[in] Length of each 2D slice in bytes in the buffer object being read." - type: "size_t" name: hostRowPitch - desc: "[in] length of each row in bytes in the host memory region pointed to by pDst." + desc: "[in] Length of each row in bytes in the host memory region pointed to by pDst." - type: "size_t" name: hostSlicePitch - desc: "[in] length of each 2D slice in bytes in the host memory region pointed to by pDst." + desc: "[in] Length of each 2D slice in bytes in the host memory region pointed to by pDst." - type: "void*" name: pDst - desc: "[in] pointer to host memory where data is to be read into." + desc: "[in] Pointer to host memory where data is to be read into." - type: uint32_t name: numSyncPointsInWaitList desc: "[in] The number of sync points in the provided dependency list." @@ -532,7 +660,7 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: $x_exp_command_buffer_sync_point_t* name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -544,7 +672,7 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a memory fill command to a command-buffer object" +desc: "Append a memory fill command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferFillExp params: @@ -588,12 +716,12 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a USM Prefetch command to a command-buffer object" +desc: "Append a USM Prefetch command to a command-buffer object." class: $xCommandBuffer name: AppendUSMPrefetchExp details: - - "Prefetching may not be supported for all devices or allocation types. If memory prefetching - is not supported, the prefetch hint will be ignored." + - "Prefetching may not be supported for all devices or allocation types. If + memory prefetching is not supported, the prefetch hint will be ignored." params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer @@ -630,12 +758,13 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a USM Advise command to a command-buffer object" +desc: "Append a USM Advise command to a command-buffer object." class: $xCommandBuffer name: AppendUSMAdviseExp details: - - "Not all memory advice hints may be supported for all devices or allocation types. - If a memory advice hint is not supported, it will be ignored." + - "Not all memory advice hints may be supported for all devices or + allocation types. If a memory advice hint is not supported, it will be + ignored." params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer @@ -678,18 +807,18 @@ name: EnqueueExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_queue_handle_t name: hQueue - desc: "[in] the queue to submit this command-buffer for execution." + desc: "[in] The queue to submit this command-buffer for execution." - type: uint32_t name: numEventsInWaitList - desc: "[in] size of the event wait list" + desc: "[in] Size of the event wait list." - type: "const $x_event_handle_t*" name: phEventWaitList desc: | [in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the command-buffer execution. - If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + If nullptr, the numEventsInWaitList must be 0, indicating no wait events. - type: $x_event_handle_t* name: phEvent desc: | @@ -704,3 +833,31 @@ returns: - "If event objects in phEventWaitList are not valid events." - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES +--- #-------------------------------------------------------------------------- +type: function +desc: "Update a kernel launch command." +class: $xCommandBuffer +name: UpdateKernelLaunchExp +params: + - type: $x_exp_command_buffer_command_handle_t + name: hCommand + desc: "[in] Handle of the command-buffer kernel command to update." + - type: "const $x_exp_command_buffer_update_kernel_launch_desc_t*" + name: pUpdateKernelLaunch + desc: "[in] Handle of the command-buffer kernel command to update." + +returns: + - $X_RESULT_ERROR_UNSUPPORTED_FEATURE: + - "If update functionality is not supported by the device." + - $X_RESULT_ERROR_INVALID_OPERATION: + - "If $x_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to." + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP + - $X_RESULT_ERROR_INVALID_MEM_OBJECT + - $X_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX + - $X_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE + - $X_RESULT_ERROR_INVALID_ENUMERATION + - $X_RESULT_ERROR_INVALID_WORK_DIMENSION + - $X_RESULT_ERROR_INVALID_WORK_GROUP_SIZE + - $X_RESULT_ERROR_INVALID_VALUE + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY + - $X_RESULT_ERROR_OUT_OF_RESOURCES diff --git a/scripts/core/registry.yml b/scripts/core/registry.yml index deb5ee9604..8ee78f952f 100644 --- a/scripts/core/registry.yml +++ b/scripts/core/registry.yml @@ -502,6 +502,9 @@ etors: - name: ADAPTER_GET_INFO desc: Enumerator for $xAdapterGetInfo value: '181' +- name: COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP + desc: Enumerator for $xCommandBufferUpdateKernelLaunchExp + value: '182' - name: PROGRAM_BUILD_EXP desc: Enumerator for $xProgramBuildExp value: '197' diff --git a/scripts/parse_specs.py b/scripts/parse_specs.py index a1477ce534..0ace863d7f 100644 --- a/scripts/parse_specs.py +++ b/scripts/parse_specs.py @@ -286,8 +286,8 @@ def __validate_members(d, tags): if not annotation: raise Exception(prefix+"'desc' must start with {'[in]', '[out]', '[in,out]'}") - if type_traits.is_handle(item['type']): - raise Exception(prefix+"'type' must not be '*_handle_t': %s"%item['type']) + #if type_traits.is_handle(item['type']): + # raise Exception(prefix+"'type' must not be '*_handle_t': %s"%item['type']) if item['type'].endswith("flag_t"): raise Exception(prefix+"'type' must not be '*_flag_t': %s"%item['type']) diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 24a5d9497c..7825268ae9 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -165,7 +165,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + ur_exp_command_buffer_sync_point_t *pSyncPoint, + ur_exp_command_buffer_command_handle_t *phCommand) { // Preconditions UR_ASSERT(hCommandBuffer->Context == hKernel->getContext(), UR_RESULT_ERROR_INVALID_KERNEL); @@ -239,8 +240,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( hKernel->clearLocalSize(); // Get sync point and register the cuNode with it. - *pSyncPoint = - hCommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + auto NodeSP = std::make_shared(GraphNode); + *pSyncPoint = hCommandBuffer->AddSyncPoint(NodeSP); + + *phCommand = + hCommandBuffer->AddCommandHandle(hKernel, NodeSP, NodeParams).get(); } catch (ur_result_t Err) { Result = Err; } @@ -564,3 +568,48 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( return Result; } + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t hCommand, + const ur_exp_command_buffer_update_kernel_launch_desc_t *pKernelLaunch) { + + ur_kernel_handle_t Kernel = hCommand->Kernel; + CUfunction CuFunc = Kernel->get(); + uint32_t LocalSize = Kernel->getLocalSize(); + + ur_context_handle_t Context = hCommand->CommandBuffer->Context; + ur_device_handle_t Device = hCommand->CommandBuffer->Device; + size_t *GlobalWorkOffset = pKernelLaunch->pGlobalWorkOffset; + size_t *GlobalWorkSize = pKernelLaunch->pGlobalWorkSize; + size_t *LocalWorkSize = pKernelLaunch->pLocalWorkSize; + uint32_t WorkDim = pKernelLaunch->workDim; + + // Set the number of threads per block to the number of threads per warp + // by default unless user has provided a better number + size_t ThreadsPerBlock[3] = {32u, 1u, 1u}; + size_t BlocksPerGrid[3] = {1u, 1u, 1u}; + + auto Result = setKernelParams(Context, Device, WorkDim, GlobalWorkOffset, + GlobalWorkSize, LocalWorkSize, Kernel, CuFunc, + ThreadsPerBlock, BlocksPerGrid); + if (Result != UR_RESULT_SUCCESS) { + return Result; + } + + CUDA_KERNEL_NODE_PARAMS &Params = hCommand->Params; + Params.func = CuFunc; + Params.gridDimX = BlocksPerGrid[0]; + Params.gridDimY = BlocksPerGrid[1]; + Params.gridDimZ = BlocksPerGrid[2]; + Params.blockDimX = ThreadsPerBlock[0]; + Params.blockDimY = ThreadsPerBlock[1]; + Params.blockDimZ = ThreadsPerBlock[2]; + Params.sharedMemBytes = LocalSize; + + // TODO update arguments + // Params.kernelParams = const_cast(ArgIndices.data()); + + CUgraphNode Node = *(hCommand->Node); + UR_CHECK_ERROR(cuGraphKernelNodeSetParams(Node, &Params)); + return UR_RESULT_SUCCESS; +} diff --git a/source/adapters/cuda/command_buffer.hpp b/source/adapters/cuda/command_buffer.hpp index 4ceab42062..48cbb24494 100644 --- a/source/adapters/cuda/command_buffer.hpp +++ b/source/adapters/cuda/command_buffer.hpp @@ -175,6 +175,22 @@ static inline const char *getUrResultString(ur_result_t Result) { fprintf(stderr, "UR <--- %s(%s)\n", #Call, getUrResultString(Result)); \ } +// Handle type specific to kernel command. Will need to +// be refactored when handles can be returned from other +// command types. +struct ur_exp_command_buffer_command_handle_t_ { + ur_exp_command_buffer_command_handle_t_( + ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel, + std::shared_ptr Node, CUDA_KERNEL_NODE_PARAMS Params) + : CommandBuffer(CommandBuffer), Kernel(Kernel), Node(Node), + Params(Params) {} + + ur_exp_command_buffer_handle_t CommandBuffer; + ur_kernel_handle_t Kernel; + std::shared_ptr Node; + CUDA_KERNEL_NODE_PARAMS Params; +}; + struct ur_exp_command_buffer_handle_t_ { ur_exp_command_buffer_handle_t_(ur_context_handle_t hContext, @@ -202,6 +218,20 @@ struct ur_exp_command_buffer_handle_t_ { return SyncPoint; } + // Creates a UR command handle + // @param[in] Kernel UR kernel associated with this command. + // @param[in] Node CUDA Graph node associated with this command. + // @param[in] Params Kernel configuration associated with this node. + // @return Shared pointer to the created handle. + std::shared_ptr + AddCommandHandle(ur_kernel_handle_t Kernel, std::shared_ptr Node, + const CUDA_KERNEL_NODE_PARAMS &Params) { + + Handles.push_back(std::make_shared( + this, Kernel, Node, Params)); + return Handles.back(); + } + // UR context associated with this command-buffer ur_context_handle_t Context; // Device associated with this command buffer @@ -222,6 +252,9 @@ struct ur_exp_command_buffer_handle_t_ { // is not enough) ur_exp_command_buffer_sync_point_t NextSyncPoint; + // List of command handles returned to the user. + std::vector> Handles; + // Used when retaining an object. uint32_t incrementReferenceCount() noexcept { return ++RefCount; } // Used when releasing an object. diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index d2cd156719..2c857f4f1a 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -46,7 +46,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t, ur_kernel_handle_t, uint32_t, const size_t *, const size_t *, const size_t *, uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *) { + ur_exp_command_buffer_sync_point_t *, + ur_exp_command_buffer_command_handle_t *) { detail::ur::die("Experimental Command-buffer feature is not " "implemented for HIP adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; @@ -129,3 +130,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( "implemented for HIP adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t, + const ur_exp_command_buffer_update_kernel_launch_desc_t *) { + detail::ur::die("Experimental Command-buffer feature is not " + "implemented for the HIP adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index e8f3b061f9..6f03030be8 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -475,7 +475,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const size_t *GlobalWorkSize, const size_t *LocalWorkSize, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, - ur_exp_command_buffer_sync_point_t *SyncPoint) { + ur_exp_command_buffer_sync_point_t *SyncPoint, + ur_exp_command_buffer_command_handle_t *) { // Lock automatically releases when this goes out of scope. std::scoped_lock Lock( Kernel->Mutex, Kernel->Program->Mutex); @@ -780,3 +781,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( return UR_RESULT_SUCCESS; } + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t, + const ur_exp_command_buffer_update_kernel_launch_desc_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/native_cpu/command_buffer.cpp b/source/adapters/native_cpu/command_buffer.cpp index f13a57f392..32d4a1dc49 100644 --- a/source/adapters/native_cpu/command_buffer.cpp +++ b/source/adapters/native_cpu/command_buffer.cpp @@ -50,7 +50,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t, ur_kernel_handle_t, uint32_t, const size_t *, const size_t *, const size_t *, uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *) { + ur_exp_command_buffer_sync_point_t *, + ur_exp_command_buffer_command_handle_t *) { detail::ur::die("Experimental Command-buffer feature is not " "implemented for the NativeCPU adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; @@ -133,3 +134,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( "implemented for the NativeCPU adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t, + const ur_exp_command_buffer_update_kernel_launch_desc_t *) { + detail::ur::die("Experimental Command-buffer feature is not " + "implemented for the NativeCPU adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/null/ur_nullddi.cpp b/source/adapters/null/ur_nullddi.cpp index a4e91e3dc0..46137a76fd 100644 --- a/source/adapters/null/ur_nullddi.cpp +++ b/source/adapters/null/ur_nullddi.cpp @@ -4419,12 +4419,12 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferCreateExp __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. const ur_exp_command_buffer_desc_t - *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor + *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. ur_exp_command_buffer_handle_t - *phCommandBuffer ///< [out] pointer to Command-Buffer handle + *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4448,7 +4448,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( /// @brief Intercept function for urCommandBufferRetainExp __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4469,7 +4469,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( /// @brief Intercept function for urCommandBufferReleaseExp __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4490,7 +4490,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( /// @brief Intercept function for urCommandBufferFinalizeExp __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4511,9 +4511,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// @brief Intercept function for urCommandBufferAppendKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * @@ -4524,8 +4524,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t + *hCommand ///< [out][optional] Handle to this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4536,9 +4538,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( result = pfnAppendKernelLaunchExp( hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint); + pSyncPointWaitList, pSyncPoint, hCommand); } else { // generic implementation + if (nullptr != hCommand) { + *hCommand = + reinterpret_cast( + d_context.get()); + } } return result; @@ -4550,16 +4557,16 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// @brief Intercept function for urCommandBufferAppendUSMMemcpyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4618,7 +4625,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -4628,8 +4635,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4653,18 +4660,18 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. const void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4688,17 +4695,17 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4722,7 +4729,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t @@ -4739,8 +4746,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4765,31 +4772,31 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4814,29 +4821,29 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4963,15 +4970,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( /// @brief Intercept function for urCommandBufferEnqueueExp __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_queue_handle_t - hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. @@ -4995,6 +5001,30 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferUpdateKernelLaunchExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t * + pUpdateKernelLaunch ///< [in] Handle of the command-buffer kernel command to update. + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnUpdateKernelLaunchExp = + d_context.urDdiTable.CommandBufferExp.pfnUpdateKernelLaunchExp; + if (nullptr != pfnUpdateKernelLaunchExp) { + result = pfnUpdateKernelLaunchExp(hCommand, pUpdateKernelLaunch); + } else { + // generic implementation + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( @@ -5459,6 +5489,9 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnEnqueueExp = driver::urCommandBufferEnqueueExp; + pDdiTable->pfnUpdateKernelLaunchExp = + driver::urCommandBufferUpdateKernelLaunchExp; + return result; } catch (...) { return exceptionToResult(std::current_exception()); diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 56b4d16b88..6da372dbc9 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -104,7 +104,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + ur_exp_command_buffer_sync_point_t *pSyncPoint, + ur_exp_command_buffer_command_handle_t *) { cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); cl_ext::clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR = nullptr; @@ -322,3 +323,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( return UR_RESULT_SUCCESS; } + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + [[maybe_unused]] ur_exp_command_buffer_command_handle_t hCommand, + [[maybe_unused]] const ur_exp_command_buffer_update_kernel_launch_desc_t + *pUpdateKernelLaunch) { + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index d33a3aaf51..2d97033fec 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -5026,12 +5026,12 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferCreateExp __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. const ur_exp_command_buffer_desc_t - *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor + *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. ur_exp_command_buffer_handle_t - *phCommandBuffer ///< [out] pointer to Command-Buffer handle + *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ) { auto pfnCreateExp = context.urDdiTable.CommandBufferExp.pfnCreateExp; @@ -5058,7 +5058,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( /// @brief Intercept function for urCommandBufferRetainExp __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { auto pfnRetainExp = context.urDdiTable.CommandBufferExp.pfnRetainExp; @@ -5083,7 +5083,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( /// @brief Intercept function for urCommandBufferReleaseExp __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { auto pfnReleaseExp = context.urDdiTable.CommandBufferExp.pfnReleaseExp; @@ -5108,7 +5108,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( /// @brief Intercept function for urCommandBufferFinalizeExp __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { auto pfnFinalizeExp = context.urDdiTable.CommandBufferExp.pfnFinalizeExp; @@ -5134,9 +5134,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// @brief Intercept function for urCommandBufferAppendKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * @@ -5147,8 +5147,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t + *hCommand ///< [out][optional] Handle to this command. ) { auto pfnAppendKernelLaunchExp = context.urDdiTable.CommandBufferExp.pfnAppendKernelLaunchExp; @@ -5166,15 +5168,16 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( &pLocalWorkSize, &numSyncPointsInWaitList, &pSyncPointWaitList, - &pSyncPoint}; + &pSyncPoint, + &hCommand}; uint64_t instance = context.notify_begin( UR_FUNCTION_COMMAND_BUFFER_APPEND_KERNEL_LAUNCH_EXP, "urCommandBufferAppendKernelLaunchExp", ¶ms); ur_result_t result = pfnAppendKernelLaunchExp( hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, - pSyncPoint); + pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, + hCommand); context.notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_KERNEL_LAUNCH_EXP, "urCommandBufferAppendKernelLaunchExp", ¶ms, &result, @@ -5187,16 +5190,16 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// @brief Intercept function for urCommandBufferAppendUSMMemcpyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendUSMMemcpyExp = context.urDdiTable.CommandBufferExp.pfnAppendUSMMemcpyExp; @@ -5270,7 +5273,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -5280,8 +5283,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferCopyExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyExp; @@ -5319,18 +5322,18 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. const void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferWriteExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteExp; @@ -5367,17 +5370,17 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferReadExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferReadExp; @@ -5414,7 +5417,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t @@ -5431,8 +5434,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferCopyRectExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyRectExp; @@ -5476,31 +5479,31 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferWriteRectExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteRectExp; @@ -5544,29 +5547,29 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferReadRectExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferReadRectExp; @@ -5750,15 +5753,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( /// @brief Intercept function for urCommandBufferEnqueueExp __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_queue_handle_t - hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. @@ -5785,6 +5787,37 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferUpdateKernelLaunchExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t * + pUpdateKernelLaunch ///< [in] Handle of the command-buffer kernel command to update. +) { + auto pfnUpdateKernelLaunchExp = + context.urDdiTable.CommandBufferExp.pfnUpdateKernelLaunchExp; + + if (nullptr == pfnUpdateKernelLaunchExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_command_buffer_update_kernel_launch_exp_params_t params = { + &hCommand, &pUpdateKernelLaunch}; + uint64_t instance = context.notify_begin( + UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP, + "urCommandBufferUpdateKernelLaunchExp", ¶ms); + + ur_result_t result = + pfnUpdateKernelLaunchExp(hCommand, pUpdateKernelLaunch); + + context.notify_end(UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP, + "urCommandBufferUpdateKernelLaunchExp", ¶ms, &result, + instance); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( @@ -6365,6 +6398,10 @@ __urdlllocal ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( dditable.pfnEnqueueExp = pDdiTable->pfnEnqueueExp; pDdiTable->pfnEnqueueExp = ur_tracing_layer::urCommandBufferEnqueueExp; + dditable.pfnUpdateKernelLaunchExp = pDdiTable->pfnUpdateKernelLaunchExp; + pDdiTable->pfnUpdateKernelLaunchExp = + ur_tracing_layer::urCommandBufferUpdateKernelLaunchExp; + return result; } /////////////////////////////////////////////////////////////////////////////// diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index a307bb37de..843e083ad7 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -6615,12 +6615,12 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferCreateExp __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. const ur_exp_command_buffer_desc_t - *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor + *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. ur_exp_command_buffer_handle_t - *phCommandBuffer ///< [out] pointer to Command-Buffer handle + *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ) { auto pfnCreateExp = context.urDdiTable.CommandBufferExp.pfnCreateExp; @@ -6652,7 +6652,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( /// @brief Intercept function for urCommandBufferRetainExp __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { auto pfnRetainExp = context.urDdiTable.CommandBufferExp.pfnRetainExp; @@ -6679,7 +6679,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( /// @brief Intercept function for urCommandBufferReleaseExp __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { auto pfnReleaseExp = context.urDdiTable.CommandBufferExp.pfnReleaseExp; @@ -6706,7 +6706,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( /// @brief Intercept function for urCommandBufferFinalizeExp __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { auto pfnFinalizeExp = context.urDdiTable.CommandBufferExp.pfnFinalizeExp; @@ -6729,9 +6729,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// @brief Intercept function for urCommandBufferAppendKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * @@ -6742,8 +6742,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t + *hCommand ///< [out][optional] Handle to this command. ) { auto pfnAppendKernelLaunchExp = context.urDdiTable.CommandBufferExp.pfnAppendKernelLaunchExp; @@ -6784,8 +6786,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_result_t result = pfnAppendKernelLaunchExp( hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, - pSyncPoint); + pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, + hCommand); return result; } @@ -6794,16 +6796,16 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// @brief Intercept function for urCommandBufferAppendUSMMemcpyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendUSMMemcpyExp = context.urDdiTable.CommandBufferExp.pfnAppendUSMMemcpyExp; @@ -6918,7 +6920,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -6928,8 +6930,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferCopyExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyExp; @@ -6971,18 +6973,18 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. const void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferWriteExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteExp; @@ -7024,17 +7026,17 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferReadExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferReadExp; @@ -7076,7 +7078,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t @@ -7093,8 +7095,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferCopyRectExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyRectExp; @@ -7137,31 +7139,31 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferWriteRectExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteRectExp; @@ -7204,29 +7206,29 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferReadRectExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferReadRectExp; @@ -7433,15 +7435,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( /// @brief Intercept function for urCommandBufferEnqueueExp __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_queue_handle_t - hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. @@ -7484,6 +7485,61 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferUpdateKernelLaunchExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t * + pUpdateKernelLaunch ///< [in] Handle of the command-buffer kernel command to update. +) { + auto pfnUpdateKernelLaunchExp = + context.urDdiTable.CommandBufferExp.pfnUpdateKernelLaunchExp; + + if (nullptr == pfnUpdateKernelLaunchExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (context.enableParameterValidation) { + if (NULL == hCommand) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == pUpdateKernelLaunch) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == pUpdateKernelLaunch->pArgMemobjList) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == pUpdateKernelLaunch->pArgPointerList) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == pUpdateKernelLaunch->pArgExecInfoList) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == pUpdateKernelLaunch->pGlobalWorkOffset) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == pUpdateKernelLaunch->pGlobalWorkSize) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == pUpdateKernelLaunch->pLocalWorkSize) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + } + + ur_result_t result = + pfnUpdateKernelLaunchExp(hCommand, pUpdateKernelLaunch); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( @@ -8126,6 +8182,10 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( dditable.pfnEnqueueExp = pDdiTable->pfnEnqueueExp; pDdiTable->pfnEnqueueExp = ur_validation_layer::urCommandBufferEnqueueExp; + dditable.pfnUpdateKernelLaunchExp = pDdiTable->pfnUpdateKernelLaunchExp; + pDdiTable->pfnUpdateKernelLaunchExp = + ur_validation_layer::urCommandBufferUpdateKernelLaunchExp; + return result; } diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index 9327f349c5..24209a7efc 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -32,6 +32,7 @@ ur_exp_image_mem_factory_t ur_exp_image_mem_factory; ur_exp_interop_mem_factory_t ur_exp_interop_mem_factory; ur_exp_interop_semaphore_factory_t ur_exp_interop_semaphore_factory; ur_exp_command_buffer_factory_t ur_exp_command_buffer_factory; +ur_exp_command_buffer_command_factory_t ur_exp_command_buffer_command_factory; /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urAdapterGet @@ -6171,12 +6172,12 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferCreateExp __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. const ur_exp_command_buffer_desc_t - *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor + *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. ur_exp_command_buffer_handle_t - *phCommandBuffer ///< [out] pointer to Command-Buffer handle + *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6217,7 +6218,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( /// @brief Intercept function for urCommandBufferRetainExp __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6245,7 +6246,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( /// @brief Intercept function for urCommandBufferReleaseExp __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6273,7 +6274,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( /// @brief Intercept function for urCommandBufferFinalizeExp __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6301,9 +6302,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// @brief Intercept function for urCommandBufferAppendKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * @@ -6314,8 +6315,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t + *hCommand ///< [out][optional] Handle to this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6341,7 +6344,23 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( result = pfnAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint); + pSyncPointWaitList, pSyncPoint, hCommand); + + if (UR_RESULT_SUCCESS != result) { + return result; + } + + try { + // convert platform handle to loader handle + if (nullptr != hCommand) { + *hCommand = + reinterpret_cast( + ur_exp_command_buffer_command_factory.getInstance( + *hCommand, dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } return result; } @@ -6350,16 +6369,16 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// @brief Intercept function for urCommandBufferAppendUSMMemcpyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6432,7 +6451,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -6442,8 +6461,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6480,18 +6499,18 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. const void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6525,17 +6544,17 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6569,7 +6588,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t @@ -6586,8 +6605,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6625,31 +6644,31 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6684,29 +6703,29 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6867,15 +6886,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( /// @brief Intercept function for urCommandBufferEnqueueExp __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_queue_handle_t - hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. @@ -6928,6 +6946,37 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferUpdateKernelLaunchExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t * + pUpdateKernelLaunch ///< [in] Handle of the command-buffer kernel command to update. +) { + ur_result_t result = UR_RESULT_SUCCESS; + + // extract platform's function pointer table + auto dditable = + reinterpret_cast(hCommand) + ->dditable; + auto pfnUpdateKernelLaunchExp = + dditable->ur.CommandBufferExp.pfnUpdateKernelLaunchExp; + if (nullptr == pfnUpdateKernelLaunchExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hCommand = + reinterpret_cast(hCommand) + ->handle; + + // forward to device-platform + result = pfnUpdateKernelLaunchExp(hCommand, pUpdateKernelLaunch); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( @@ -7543,6 +7592,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = ur_loader::urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnEnqueueExp = ur_loader::urCommandBufferEnqueueExp; + pDdiTable->pfnUpdateKernelLaunchExp = + ur_loader::urCommandBufferUpdateKernelLaunchExp; } else { // return pointers directly to platform's DDIs *pDdiTable = ur_loader::context->platforms.front() diff --git a/source/loader/ur_ldrddi.hpp b/source/loader/ur_ldrddi.hpp index 4edbabbd8b..d98b99a655 100644 --- a/source/loader/ur_ldrddi.hpp +++ b/source/loader/ur_ldrddi.hpp @@ -92,6 +92,12 @@ using ur_exp_command_buffer_factory_t = singleton_factory_t; +using ur_exp_command_buffer_command_object_t = + object_t; +using ur_exp_command_buffer_command_factory_t = + singleton_factory_t; + } // namespace ur_loader #endif /* UR_LOADER_LDRDDI_H */ diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index de9e029536..d801d0f321 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -6985,7 +6985,7 @@ ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /// @brief Create a Command-Buffer object /// /// @details -/// - Create a command-buffer object +/// - Create a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7002,12 +7002,12 @@ ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. const ur_exp_command_buffer_desc_t - *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor + *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. ur_exp_command_buffer_handle_t - *phCommandBuffer ///< [out] pointer to Command-Buffer handle + *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ) try { auto pfnCreateExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnCreateExp; @@ -7035,7 +7035,7 @@ ur_result_t UR_APICALL urCommandBufferCreateExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY ur_result_t UR_APICALL urCommandBufferRetainExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) try { auto pfnRetainExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnRetainExp; @@ -7064,7 +7064,7 @@ ur_result_t UR_APICALL urCommandBufferRetainExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY ur_result_t UR_APICALL urCommandBufferReleaseExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) try { auto pfnReleaseExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnReleaseExp; @@ -7093,7 +7093,7 @@ ur_result_t UR_APICALL urCommandBufferReleaseExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferFinalizeExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) try { auto pfnFinalizeExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnFinalizeExp; @@ -7107,7 +7107,7 @@ ur_result_t UR_APICALL urCommandBufferFinalizeExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a kernel execution command to a command-buffer object +/// @brief Append a kernel execution command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7134,9 +7134,9 @@ ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * @@ -7147,8 +7147,10 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t + *hCommand ///< [out][optional] Handle to this command. ) try { auto pfnAppendKernelLaunchExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendKernelLaunchExp; @@ -7159,13 +7161,13 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( return pfnAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint); + pSyncPointWaitList, pSyncPoint, hCommand); } catch (...) { return exceptionToResult(std::current_exception()); } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM memcpy command to a command-buffer object +/// @brief Append a USM memcpy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7190,16 +7192,16 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendUSMMemcpyExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendUSMMemcpyExp; @@ -7215,7 +7217,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM fill command to a command-buffer object +/// @brief Append a USM fill command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7270,7 +7272,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory copy command to a command-buffer object +/// @brief Append a memory copy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7291,7 +7293,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -7301,8 +7303,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendMemBufferCopyExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyExp; @@ -7318,7 +7320,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory write command to a command-buffer object +/// @brief Append a memory write command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7340,18 +7342,18 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. const void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendMemBufferWriteExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteExp; @@ -7367,7 +7369,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory read command to a command-buffer object +/// @brief Append a memory read command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7389,17 +7391,17 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendMemBufferReadExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendMemBufferReadExp; @@ -7415,7 +7417,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory copy command to a command-buffer object +/// @brief Append a rectangular memory copy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7436,7 +7438,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t @@ -7453,8 +7455,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendMemBufferCopyRectExp = ur_lib::context->urDdiTable.CommandBufferExp @@ -7472,7 +7474,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory write command to a command-buffer object +/// @brief Append a rectangular memory write command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7494,31 +7496,31 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendMemBufferWriteRectExp = ur_lib::context->urDdiTable.CommandBufferExp @@ -7536,7 +7538,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory read command to a command-buffer object +/// @brief Append a rectangular memory read command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7558,29 +7560,29 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendMemBufferReadRectExp = ur_lib::context->urDdiTable.CommandBufferExp @@ -7598,7 +7600,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory fill command to a command-buffer object +/// @brief Append a memory fill command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7650,7 +7652,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM Prefetch command to a command-buffer object +/// @brief Append a USM Prefetch command to a command-buffer object. /// /// @details /// - Prefetching may not be supported for all devices or allocation types. @@ -7706,7 +7708,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM Advise command to a command-buffer object +/// @brief Append a USM Advise command to a command-buffer object. /// /// @details /// - Not all memory advice hints may be supported for all devices or @@ -7783,15 +7785,14 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_queue_handle_t - hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. @@ -7808,6 +7809,55 @@ ur_result_t UR_APICALL urCommandBufferEnqueueExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Update a kernel launch command. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pUpdateKernelLaunch` +/// + `NULL == pUpdateKernelLaunch->pArgMemobjList` +/// + `NULL == pUpdateKernelLaunch->pArgPointerList` +/// + `NULL == pUpdateKernelLaunch->pArgExecInfoList` +/// + `NULL == pUpdateKernelLaunch->pGlobalWorkOffset` +/// + `NULL == pUpdateKernelLaunch->pGlobalWorkSize` +/// + `NULL == pUpdateKernelLaunch->pLocalWorkSize` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If update functionality is not supported by the device. +/// - ::UR_RESULT_ERROR_INVALID_OPERATION +/// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX +/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION +/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t * + pUpdateKernelLaunch ///< [in] Handle of the command-buffer kernel command to update. + ) try { + auto pfnUpdateKernelLaunchExp = + ur_lib::context->urDdiTable.CommandBufferExp.pfnUpdateKernelLaunchExp; + if (nullptr == pfnUpdateKernelLaunchExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnUpdateKernelLaunchExp(hCommand, pUpdateKernelLaunch); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Enqueue a command to execute a cooperative kernel /// diff --git a/source/ur_api.cpp b/source/ur_api.cpp index ca1f82019c..1b92cb1b9d 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -5921,7 +5921,7 @@ ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /// @brief Create a Command-Buffer object /// /// @details -/// - Create a command-buffer object +/// - Create a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -5938,12 +5938,12 @@ ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. const ur_exp_command_buffer_desc_t - *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor + *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. ur_exp_command_buffer_handle_t - *phCommandBuffer ///< [out] pointer to Command-Buffer handle + *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; @@ -5964,7 +5964,7 @@ ur_result_t UR_APICALL urCommandBufferCreateExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY ur_result_t UR_APICALL urCommandBufferRetainExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; @@ -5986,7 +5986,7 @@ ur_result_t UR_APICALL urCommandBufferRetainExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY ur_result_t UR_APICALL urCommandBufferReleaseExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; @@ -6008,14 +6008,14 @@ ur_result_t UR_APICALL urCommandBufferReleaseExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferFinalizeExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a kernel execution command to a command-buffer object +/// @brief Append a kernel execution command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6042,9 +6042,9 @@ ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * @@ -6055,15 +6055,17 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t + *hCommand ///< [out][optional] Handle to this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM memcpy command to a command-buffer object +/// @brief Append a USM memcpy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6088,23 +6090,23 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM fill command to a command-buffer object +/// @brief Append a USM fill command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6150,7 +6152,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory copy command to a command-buffer object +/// @brief Append a memory copy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6171,7 +6173,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -6181,15 +6183,15 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory write command to a command-buffer object +/// @brief Append a memory write command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6211,25 +6213,25 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. const void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory read command to a command-buffer object +/// @brief Append a memory read command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6251,24 +6253,24 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory copy command to a command-buffer object +/// @brief Append a rectangular memory copy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6289,7 +6291,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t @@ -6306,15 +6308,15 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory write command to a command-buffer object +/// @brief Append a rectangular memory write command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6336,38 +6338,38 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory read command to a command-buffer object +/// @brief Append a rectangular memory read command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6389,36 +6391,36 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory fill command to a command-buffer object +/// @brief Append a memory fill command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6461,7 +6463,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM Prefetch command to a command-buffer object +/// @brief Append a USM Prefetch command to a command-buffer object. /// /// @details /// - Prefetching may not be supported for all devices or allocation types. @@ -6508,7 +6510,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM Advise command to a command-buffer object +/// @brief Append a USM Advise command to a command-buffer object. /// /// @details /// - Not all memory advice hints may be supported for all devices or @@ -6576,15 +6578,14 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_queue_handle_t - hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. @@ -6593,6 +6594,48 @@ ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Update a kernel launch command. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pUpdateKernelLaunch` +/// + `NULL == pUpdateKernelLaunch->pArgMemobjList` +/// + `NULL == pUpdateKernelLaunch->pArgPointerList` +/// + `NULL == pUpdateKernelLaunch->pArgExecInfoList` +/// + `NULL == pUpdateKernelLaunch->pGlobalWorkOffset` +/// + `NULL == pUpdateKernelLaunch->pGlobalWorkSize` +/// + `NULL == pUpdateKernelLaunch->pLocalWorkSize` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If update functionality is not supported by the device. +/// - ::UR_RESULT_ERROR_INVALID_OPERATION +/// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX +/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION +/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t * + pUpdateKernelLaunch ///< [in] Handle of the command-buffer kernel command to update. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Enqueue a command to execute a cooperative kernel /// diff --git a/tools/urinfo/urinfo.hpp b/tools/urinfo/urinfo.hpp index d9677c3eab..b959e565c4 100644 --- a/tools/urinfo/urinfo.hpp +++ b/tools/urinfo/urinfo.hpp @@ -323,6 +323,12 @@ inline void printDeviceInfos(ur_device_handle_t hDevice, std::cout << prefix; printDeviceInfo(hDevice, UR_DEVICE_INFO_ESIMD_SUPPORT); std::cout << prefix; + printDeviceInfo(hDevice, + UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP); + std::cout << prefix; + printDeviceInfo( + hDevice, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP); + std::cout << prefix; printDeviceInfo(hDevice, UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP); std::cout << prefix;