From d71022506712ce80db70c8b838438e1ce646e19d Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Wed, 15 Nov 2023 15:56:54 +0000 Subject: [PATCH] [EXP][Command-Buffer] Add kernel command update This change introduces a new API that allows the kernel commands of a command-buffer to be updated with a new configuration. For example, modified arguments or ND-Range. The new API is defined in the following files and then source generated using scripts, so reviewers should look at: * `scripts/core/EXP-COMMAND-BUFFER.rst` * `scripts/core/exp-command-buffer.yml` See [cl_khr_command_buffer_mutable_dispatch](https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_command_buffer_mutable_dispatch) as prior art. The differences between the proposed API and the above are: * Only the append kernel entry-point returns a command handle. I imagine this will be changed in future to enable other commands to do update. * USM, buffer, and scalar arguments can be updated, there is not equivalent update struct for `urKernelSetArgLocal` or `urKernelSetArgSampler` * There is no granularity of optional support for update, an implementer must either implement all the ways to update a kernel configuration, or none of them. * Command-handles are reference counted in UR, and extend the lifetime of the parent command-buffer. The CUDA adapter is the only adapter that currently implements this new feature, other adapters don't report support. This is because CUDA is already an adapter supported by UR command-buffers, and the CUDA API for updating nodes already exists as a non-optional feature. Reviewers should review the changes in `source/adapters/cuda/` to evaluate this, CTS tests are written to verify implementation, as there is not yet a DPC++ feature with testing to stress the code path (see https://github.com/reble/llvm/pull/340 for how that feature could look). A new test directory has been created to test the command-buffer experimental feature, `test/conformance/exp_command_buffer`, which contains tests to stress using the feature defined by this extension so that it has code coverage. Reviewers should look at the new tests added here, and new device kernels in `test/conformance/device_code` to evaluate these changes. --- include/ur_api.h | 526 ++++++++++++--- include/ur_ddi.h | 42 +- include/ur_print.h | 96 +++ include/ur_print.hpp | 608 ++++++++++++++++++ scripts/core/EXP-COMMAND-BUFFER.rst | 145 ++++- scripts/core/exp-command-buffer.yml | 432 +++++++++++-- scripts/core/registry.yml | 15 + source/adapters/cuda/command_buffer.cpp | 317 ++++++++- source/adapters/cuda/command_buffer.hpp | 115 +++- source/adapters/cuda/device.cpp | 4 + source/adapters/cuda/ur_interface_loader.cpp | 5 + source/adapters/hip/command_buffer.cpp | 31 +- source/adapters/hip/device.cpp | 4 + source/adapters/hip/ur_interface_loader.cpp | 5 + source/adapters/level_zero/command_buffer.cpp | 41 +- source/adapters/level_zero/device.cpp | 5 +- .../level_zero/ur_interface_loader.cpp | 5 + source/adapters/native_cpu/command_buffer.cpp | 31 +- source/adapters/native_cpu/device.cpp | 5 + .../native_cpu/ur_interface_loader.cpp | 5 + source/adapters/null/ur_nullddi.cpp | 268 ++++++-- source/adapters/opencl/command_buffer.cpp | 67 +- source/adapters/opencl/common.hpp | 6 + source/adapters/opencl/device.cpp | 18 + .../adapters/opencl/ur_interface_loader.cpp | 5 + source/loader/layers/tracing/ur_trcddi.cpp | 304 +++++++-- source/loader/layers/validation/ur_valddi.cpp | 320 +++++++-- source/loader/ur_ldrddi.cpp | 341 ++++++++-- source/loader/ur_ldrddi.hpp | 6 + source/loader/ur_libapi.cpp | 344 +++++++--- source/loader/ur_print.cpp | 96 +++ source/ur_api.cpp | 305 ++++++--- test/conformance/CMakeLists.txt | 1 + test/conformance/device_code/CMakeLists.txt | 3 + test/conformance/device_code/indexers_usm.cpp | 38 ++ test/conformance/device_code/saxpy.cpp | 33 + test/conformance/device_code/saxpy_usm.cpp | 25 + .../exp_command_buffer/CMakeLists.txt | 15 + .../buffer_fill_kernel_update.cpp | 404 ++++++++++++ .../buffer_saxpy_kernel_update.cpp | 178 +++++ .../exp_command_buffer_adapter_cuda.match | 0 .../exp_command_buffer_adapter_hip.match | 18 + ...xp_command_buffer_adapter_level_zero.match | 0 ...xp_command_buffer_adapter_native_cpu.match | 1 + .../exp_command_buffer_adapter_opencl.match | 0 .../conformance/exp_command_buffer/fixtures.h | 175 +++++ .../exp_command_buffer/invalid_update.cpp | 161 +++++ .../exp_command_buffer/ndrange_update.cpp | 248 +++++++ .../exp_command_buffer/release.cpp | 74 +++ .../conformance/exp_command_buffer/retain.cpp | 56 ++ .../usm_fill_kernel_update.cpp | 378 +++++++++++ .../usm_saxpy_kernel_update.cpp | 164 +++++ test/conformance/testing/include/uur/utils.h | 25 + tools/urinfo/urinfo.hpp | 6 + 54 files changed, 5893 insertions(+), 627 deletions(-) create mode 100644 test/conformance/device_code/indexers_usm.cpp create mode 100644 test/conformance/device_code/saxpy.cpp create mode 100644 test/conformance/device_code/saxpy_usm.cpp create mode 100644 test/conformance/exp_command_buffer/CMakeLists.txt create mode 100644 test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp create mode 100644 test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp create mode 100644 test/conformance/exp_command_buffer/exp_command_buffer_adapter_cuda.match create mode 100644 test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match create mode 100644 test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero.match create mode 100644 test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match create mode 100644 test/conformance/exp_command_buffer/exp_command_buffer_adapter_opencl.match create mode 100644 test/conformance/exp_command_buffer/fixtures.h create mode 100644 test/conformance/exp_command_buffer/invalid_update.cpp create mode 100644 test/conformance/exp_command_buffer/ndrange_update.cpp create mode 100644 test/conformance/exp_command_buffer/release.cpp create mode 100644 test/conformance/exp_command_buffer/retain.cpp create mode 100644 test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp create mode 100644 test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp diff --git a/include/ur_api.h b/include/ur_api.h index 42012ce3b5..8579ff0326 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -196,6 +196,7 @@ typedef enum ur_function_t { UR_FUNCTION_ADAPTER_RETAIN = 179, ///< Enumerator for ::urAdapterRetain UR_FUNCTION_ADAPTER_GET_LAST_ERROR = 180, ///< Enumerator for ::urAdapterGetLastError UR_FUNCTION_ADAPTER_GET_INFO = 181, ///< Enumerator for ::urAdapterGetInfo + UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP = 182, ///< Enumerator for ::urCommandBufferUpdateKernelLaunchExp UR_FUNCTION_PROGRAM_BUILD_EXP = 197, ///< Enumerator for ::urProgramBuildExp UR_FUNCTION_PROGRAM_COMPILE_EXP = 198, ///< Enumerator for ::urProgramCompileExp UR_FUNCTION_PROGRAM_LINK_EXP = 199, ///< Enumerator for ::urProgramLinkExp @@ -215,6 +216,10 @@ typedef enum ur_function_t { UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_ADVISE_EXP = 213, ///< Enumerator for ::urCommandBufferAppendUSMAdviseExp UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP = 214, ///< Enumerator for ::urEnqueueCooperativeKernelLaunchExp UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP = 215, ///< Enumerator for ::urKernelSuggestMaxCooperativeGroupCountExp + UR_FUNCTION_COMMAND_BUFFER_RETAIN_COMMAND_EXP = 216, ///< Enumerator for ::urCommandBufferRetainCommandExp + UR_FUNCTION_COMMAND_BUFFER_RELEASE_COMMAND_EXP = 217, ///< Enumerator for ::urCommandBufferReleaseCommandExp + UR_FUNCTION_COMMAND_BUFFER_GET_INFO_EXP = 218, ///< Enumerator for ::urCommandBufferGetInfoExp + UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP = 219, ///< Enumerator for ::urCommandBufferCommandGetInfoExp /// @cond UR_FUNCTION_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -224,48 +229,53 @@ typedef enum ur_function_t { /////////////////////////////////////////////////////////////////////////////// /// @brief Defines structure types typedef enum ur_structure_type_t { - UR_STRUCTURE_TYPE_CONTEXT_PROPERTIES = 0, ///< ::ur_context_properties_t - UR_STRUCTURE_TYPE_IMAGE_DESC = 1, ///< ::ur_image_desc_t - UR_STRUCTURE_TYPE_BUFFER_PROPERTIES = 2, ///< ::ur_buffer_properties_t - UR_STRUCTURE_TYPE_BUFFER_REGION = 3, ///< ::ur_buffer_region_t - UR_STRUCTURE_TYPE_BUFFER_CHANNEL_PROPERTIES = 4, ///< ::ur_buffer_channel_properties_t - UR_STRUCTURE_TYPE_BUFFER_ALLOC_LOCATION_PROPERTIES = 5, ///< ::ur_buffer_alloc_location_properties_t - UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES = 6, ///< ::ur_program_properties_t - UR_STRUCTURE_TYPE_USM_DESC = 7, ///< ::ur_usm_desc_t - UR_STRUCTURE_TYPE_USM_HOST_DESC = 8, ///< ::ur_usm_host_desc_t - UR_STRUCTURE_TYPE_USM_DEVICE_DESC = 9, ///< ::ur_usm_device_desc_t - UR_STRUCTURE_TYPE_USM_POOL_DESC = 10, ///< ::ur_usm_pool_desc_t - UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC = 11, ///< ::ur_usm_pool_limits_desc_t - UR_STRUCTURE_TYPE_DEVICE_BINARY = 12, ///< ::ur_device_binary_t - UR_STRUCTURE_TYPE_SAMPLER_DESC = 13, ///< ::ur_sampler_desc_t - UR_STRUCTURE_TYPE_QUEUE_PROPERTIES = 14, ///< ::ur_queue_properties_t - UR_STRUCTURE_TYPE_QUEUE_INDEX_PROPERTIES = 15, ///< ::ur_queue_index_properties_t - UR_STRUCTURE_TYPE_CONTEXT_NATIVE_PROPERTIES = 16, ///< ::ur_context_native_properties_t - UR_STRUCTURE_TYPE_KERNEL_NATIVE_PROPERTIES = 17, ///< ::ur_kernel_native_properties_t - UR_STRUCTURE_TYPE_QUEUE_NATIVE_PROPERTIES = 18, ///< ::ur_queue_native_properties_t - UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES = 19, ///< ::ur_mem_native_properties_t - UR_STRUCTURE_TYPE_EVENT_NATIVE_PROPERTIES = 20, ///< ::ur_event_native_properties_t - UR_STRUCTURE_TYPE_PLATFORM_NATIVE_PROPERTIES = 21, ///< ::ur_platform_native_properties_t - UR_STRUCTURE_TYPE_DEVICE_NATIVE_PROPERTIES = 22, ///< ::ur_device_native_properties_t - UR_STRUCTURE_TYPE_PROGRAM_NATIVE_PROPERTIES = 23, ///< ::ur_program_native_properties_t - UR_STRUCTURE_TYPE_SAMPLER_NATIVE_PROPERTIES = 24, ///< ::ur_sampler_native_properties_t - UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC = 25, ///< ::ur_queue_native_desc_t - UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES = 26, ///< ::ur_device_partition_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES = 27, ///< ::ur_kernel_arg_mem_obj_properties_t - UR_STRUCTURE_TYPE_PHYSICAL_MEM_PROPERTIES = 28, ///< ::ur_physical_mem_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_POINTER_PROPERTIES = 29, ///< ::ur_kernel_arg_pointer_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_SAMPLER_PROPERTIES = 30, ///< ::ur_kernel_arg_sampler_properties_t - UR_STRUCTURE_TYPE_KERNEL_EXEC_INFO_PROPERTIES = 31, ///< ::ur_kernel_exec_info_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_VALUE_PROPERTIES = 32, ///< ::ur_kernel_arg_value_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_LOCAL_PROPERTIES = 33, ///< ::ur_kernel_arg_local_properties_t - UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC = 35, ///< ::ur_usm_alloc_location_desc_t - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC = 0x1000, ///< ::ur_exp_command_buffer_desc_t - UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES = 0x2000, ///< ::ur_exp_sampler_mip_properties_t - UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC = 0x2001, ///< ::ur_exp_interop_mem_desc_t - UR_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC = 0x2002, ///< ::ur_exp_interop_semaphore_desc_t - UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR = 0x2003, ///< ::ur_exp_file_descriptor_t - UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE = 0x2004, ///< ::ur_exp_win32_handle_t - UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES = 0x2005, ///< ::ur_exp_sampler_addr_modes_t + UR_STRUCTURE_TYPE_CONTEXT_PROPERTIES = 0, ///< ::ur_context_properties_t + UR_STRUCTURE_TYPE_IMAGE_DESC = 1, ///< ::ur_image_desc_t + UR_STRUCTURE_TYPE_BUFFER_PROPERTIES = 2, ///< ::ur_buffer_properties_t + UR_STRUCTURE_TYPE_BUFFER_REGION = 3, ///< ::ur_buffer_region_t + UR_STRUCTURE_TYPE_BUFFER_CHANNEL_PROPERTIES = 4, ///< ::ur_buffer_channel_properties_t + UR_STRUCTURE_TYPE_BUFFER_ALLOC_LOCATION_PROPERTIES = 5, ///< ::ur_buffer_alloc_location_properties_t + UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES = 6, ///< ::ur_program_properties_t + UR_STRUCTURE_TYPE_USM_DESC = 7, ///< ::ur_usm_desc_t + UR_STRUCTURE_TYPE_USM_HOST_DESC = 8, ///< ::ur_usm_host_desc_t + UR_STRUCTURE_TYPE_USM_DEVICE_DESC = 9, ///< ::ur_usm_device_desc_t + UR_STRUCTURE_TYPE_USM_POOL_DESC = 10, ///< ::ur_usm_pool_desc_t + UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC = 11, ///< ::ur_usm_pool_limits_desc_t + UR_STRUCTURE_TYPE_DEVICE_BINARY = 12, ///< ::ur_device_binary_t + UR_STRUCTURE_TYPE_SAMPLER_DESC = 13, ///< ::ur_sampler_desc_t + UR_STRUCTURE_TYPE_QUEUE_PROPERTIES = 14, ///< ::ur_queue_properties_t + UR_STRUCTURE_TYPE_QUEUE_INDEX_PROPERTIES = 15, ///< ::ur_queue_index_properties_t + UR_STRUCTURE_TYPE_CONTEXT_NATIVE_PROPERTIES = 16, ///< ::ur_context_native_properties_t + UR_STRUCTURE_TYPE_KERNEL_NATIVE_PROPERTIES = 17, ///< ::ur_kernel_native_properties_t + UR_STRUCTURE_TYPE_QUEUE_NATIVE_PROPERTIES = 18, ///< ::ur_queue_native_properties_t + UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES = 19, ///< ::ur_mem_native_properties_t + UR_STRUCTURE_TYPE_EVENT_NATIVE_PROPERTIES = 20, ///< ::ur_event_native_properties_t + UR_STRUCTURE_TYPE_PLATFORM_NATIVE_PROPERTIES = 21, ///< ::ur_platform_native_properties_t + UR_STRUCTURE_TYPE_DEVICE_NATIVE_PROPERTIES = 22, ///< ::ur_device_native_properties_t + UR_STRUCTURE_TYPE_PROGRAM_NATIVE_PROPERTIES = 23, ///< ::ur_program_native_properties_t + UR_STRUCTURE_TYPE_SAMPLER_NATIVE_PROPERTIES = 24, ///< ::ur_sampler_native_properties_t + UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC = 25, ///< ::ur_queue_native_desc_t + UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES = 26, ///< ::ur_device_partition_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES = 27, ///< ::ur_kernel_arg_mem_obj_properties_t + UR_STRUCTURE_TYPE_PHYSICAL_MEM_PROPERTIES = 28, ///< ::ur_physical_mem_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_POINTER_PROPERTIES = 29, ///< ::ur_kernel_arg_pointer_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_SAMPLER_PROPERTIES = 30, ///< ::ur_kernel_arg_sampler_properties_t + UR_STRUCTURE_TYPE_KERNEL_EXEC_INFO_PROPERTIES = 31, ///< ::ur_kernel_exec_info_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_VALUE_PROPERTIES = 32, ///< ::ur_kernel_arg_value_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_LOCAL_PROPERTIES = 33, ///< ::ur_kernel_arg_local_properties_t + UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC = 35, ///< ::ur_usm_alloc_location_desc_t + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC = 0x1000, ///< ::ur_exp_command_buffer_desc_t + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC = 0x1001, ///< ::ur_exp_command_buffer_update_kernel_launch_desc_t + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC = 0x1002, ///< ::ur_exp_command_buffer_update_memobj_arg_desc_t + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC = 0x1003, ///< ::ur_exp_command_buffer_update_pointer_arg_desc_t + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC = 0x1004, ///< ::ur_exp_command_buffer_update_value_arg_desc_t + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC = 0x1005, ///< ::ur_exp_command_buffer_update_exec_info_desc_t + UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES = 0x2000, ///< ::ur_exp_sampler_mip_properties_t + UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC = 0x2001, ///< ::ur_exp_interop_mem_desc_t + UR_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC = 0x2002, ///< ::ur_exp_interop_semaphore_desc_t + UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR = 0x2003, ///< ::ur_exp_file_descriptor_t + UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE = 0x2004, ///< ::ur_exp_win32_handle_t + UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES = 0x2005, ///< ::ur_exp_sampler_addr_modes_t /// @cond UR_STRUCTURE_TYPE_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -483,6 +493,7 @@ typedef enum ur_result_t { UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP = 0x1000, ///< Invalid Command-Buffer UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP = 0x1001, ///< Sync point is not valid for the command-buffer UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP = 0x1002, ///< Sync point wait list is invalid + UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP = 0x1003, ///< Handle to command-buffer command is invalid UR_RESULT_ERROR_UNKNOWN = 0x7ffffffe, ///< Unknown or internal error /// @cond UR_RESULT_FORCE_UINT32 = 0x7fffffff @@ -1534,6 +1545,10 @@ typedef enum ur_device_info_t { ///< this composite device. UR_DEVICE_INFO_COMPOSITE_DEVICE = 117, ///< [::ur_device_handle_t] The composite device containing this component ///< device. + UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP = 0x1000, ///< [::ur_bool_t] Returns true if the device supports the use of + ///< command-buffers. + UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP = 0x1001, ///< [::ur_bool_t] Returns true if the device supports updating the kernel + ///< commands in a command-buffer. UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP = 0x2000, ///< [::ur_bool_t] returns true if the device supports the creation of ///< bindless images UR_DEVICE_INFO_BINDLESS_IMAGES_SHARED_USM_SUPPORT_EXP = 0x2001, ///< [::ur_bool_t] returns true if the device supports the creation of @@ -7758,6 +7773,32 @@ urBindlessImagesSignalExternalSemaphoreExp( #if !defined(__GNUC__) #pragma region command buffer(experimental) #endif +/////////////////////////////////////////////////////////////////////////////// +/// @brief Command-buffer query information type +typedef enum ur_exp_command_buffer_info_t { + UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT = 0, ///< [uint32_t] Reference count of the command-buffer object. + ///< The reference count returned should be considered immediately stale. + ///< It is unsuitable for general use in applications. This feature is + ///< provided for identifying memory leaks. + /// @cond + UR_EXP_COMMAND_BUFFER_INFO_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ur_exp_command_buffer_info_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Command-buffer command query information type +typedef enum ur_exp_command_buffer_command_info_t { + UR_EXP_COMMAND_BUFFER_COMMAND_INFO_REFERENCE_COUNT = 0, ///< [uint32_t] Reference count of the command-buffer object. + ///< The reference count returned should be considered immediately stale. + ///< It is unsuitable for general use in applications. This feature is + ///< provided for identifying memory leaks. + /// @cond + UR_EXP_COMMAND_BUFFER_COMMAND_INFO_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ur_exp_command_buffer_command_info_t; + /////////////////////////////////////////////////////////////////////////////// #ifndef UR_COMMAND_BUFFER_EXTENSION_STRING_EXP /// @brief The extension string which defines support for command-buffers which @@ -7771,9 +7812,92 @@ typedef struct ur_exp_command_buffer_desc_t { ur_structure_type_t stype; ///< [in] type of this structure, must be ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC const void *pNext; ///< [in][optional] pointer to extension-specific structure + ur_bool_t isUpdatable; ///< [in] Commands in a finalized command-buffer can be updated. } ur_exp_command_buffer_desc_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Descriptor type for updating a kernel command memobj argument. +typedef struct ur_exp_command_buffer_update_memobj_arg_desc_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC + const void *pNext; ///< [in][optional] pointer to extension-specific structure + uint32_t argIndex; ///< [in] Argument index. + const ur_kernel_arg_mem_obj_properties_t *pProperties; ///< [in][optinal] Pointer to memory object properties. + ur_mem_handle_t hNewMemObjArg; ///< [in][optional] Handle of memory object to set at argument index. + +} ur_exp_command_buffer_update_memobj_arg_desc_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Descriptor type for updating a kernel command pointer argument. +typedef struct ur_exp_command_buffer_update_pointer_arg_desc_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC + const void *pNext; ///< [in][optional] pointer to extension-specific structure + uint32_t argIndex; ///< [in] Argument index. + const ur_kernel_arg_pointer_properties_t *pProperties; ///< [in][optinal] Pointer to USM pointer properties. + const void *pNewPointerArg; ///< [in][optional] USM pointer to memory location holding the argument + ///< value to set at argument index. + +} ur_exp_command_buffer_update_pointer_arg_desc_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Descriptor type for updating a kernel command value argument. +typedef struct ur_exp_command_buffer_update_value_arg_desc_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC + const void *pNext; ///< [in][optional] pointer to extension-specific structure + uint32_t argIndex; ///< [in] Argument index. + uint32_t argSize; ///< [in] Argument size. + const ur_kernel_arg_value_properties_t *pProperties; ///< [in][optinal] Pointer to value properties. + const void *pNewValueArg; ///< [in][optional] Argument value representing matching kernel arg type to + ///< set at argument index. + +} ur_exp_command_buffer_update_value_arg_desc_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Descriptor type for updating kernel command execution info. +typedef struct ur_exp_command_buffer_update_exec_info_desc_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC + const void *pNext; ///< [in][optional] pointer to extension-specific structure + ur_kernel_exec_info_t propName; ///< [in] Name of execution attribute. + size_t propSize; ///< [in] Size of execution attribute. + const ur_kernel_exec_info_properties_t *pProperties; ///< [in][optional] Pointer to execution info properties. + const void *pNewExecInfo; ///< [in] Pointer to memory location holding the execution info value. + +} ur_exp_command_buffer_update_exec_info_desc_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Descriptor type for updating a kernel launch command. +typedef struct ur_exp_command_buffer_update_kernel_launch_desc_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC + const void *pNext; ///< [in][optional] pointer to extension-specific structure + uint32_t numNewMemObjArgs; ///< [in] Length of pNewMemObjArgList. + uint32_t numNewPointerArgs; ///< [in] Length of pNewPointerArgList. + uint32_t numNewValueArgs; ///< [in] Length of pNewValueArgList. + uint32_t numNewExecInfos; ///< [in] Length of pNewExecInfoList. + uint32_t newWorkDim; ///< [in] Number of work dimensions in the kernel ND-range, from 1-3. + const ur_exp_command_buffer_update_memobj_arg_desc_t *pNewMemObjArgList; ///< [in][optional][range(0, numNewMemObjArgs)] An array describing the new + ///< kernel mem obj arguments for the command. + const ur_exp_command_buffer_update_pointer_arg_desc_t *pNewPointerArgList; ///< [in][optional][range(0, numNewPointerArgs)] An array describing the + ///< new kernel pointer arguments for the command. + const ur_exp_command_buffer_update_value_arg_desc_t *pNewValueArgList; ///< [in][optional][range(0, numNewValueArgs)] An array describing the new + ///< kernel value arguments for the command. + const ur_exp_command_buffer_update_exec_info_desc_t *pNewExecInfoList; ///< [in][optional][range(0, numNewExecInfos)] An array describing the + ///< execution info objects for the command. + size_t *pNewGlobalWorkOffset; ///< [in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned + ///< values that describe the offset used to calculate the global ID. + size_t *pNewGlobalWorkSize; ///< [in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned + ///< values that describe the number of global work-items. + size_t *pNewLocalWorkSize; ///< [in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned + ///< values that describe the number of work-items that make up a + ///< work-group. If nullptr, the runtime implementation will choose the + ///< work-group size. + +} ur_exp_command_buffer_update_kernel_launch_desc_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief A value that identifies a command inside of a command-buffer, used for /// defining dependencies between commands in the same command-buffer. @@ -7783,11 +7907,15 @@ typedef uint32_t ur_exp_command_buffer_sync_point_t; /// @brief Handle of Command-Buffer object typedef struct ur_exp_command_buffer_handle_t_ *ur_exp_command_buffer_handle_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Handle of a Command-Buffer command +typedef struct ur_exp_command_buffer_command_handle_t_ *ur_exp_command_buffer_command_handle_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Create a Command-Buffer object /// /// @details -/// - Create a command-buffer object +/// - Create a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7805,10 +7933,10 @@ typedef struct ur_exp_command_buffer_handle_t_ *ur_exp_command_buffer_handle_t; /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object - const ur_exp_command_buffer_desc_t *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor - ur_exp_command_buffer_handle_t *phCommandBuffer ///< [out] pointer to Command-Buffer handle + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. + const ur_exp_command_buffer_desc_t *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. + ur_exp_command_buffer_handle_t *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ); /////////////////////////////////////////////////////////////////////////////// @@ -7826,7 +7954,7 @@ urCommandBufferCreateExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainExp( - ur_exp_command_buffer_handle_t hCommandBuffer ///< [in] handle of the command-buffer object + ur_exp_command_buffer_handle_t hCommandBuffer ///< [in] Handle of the command-buffer object. ); /////////////////////////////////////////////////////////////////////////////// @@ -7845,7 +7973,7 @@ urCommandBufferRetainExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseExp( - ur_exp_command_buffer_handle_t hCommandBuffer ///< [in] handle of the command-buffer object + ur_exp_command_buffer_handle_t hCommandBuffer ///< [in] Handle of the command-buffer object. ); /////////////////////////////////////////////////////////////////////////////// @@ -7864,11 +7992,11 @@ urCommandBufferReleaseExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferFinalizeExp( - ur_exp_command_buffer_handle_t hCommandBuffer ///< [in] handle of the command-buffer object + ur_exp_command_buffer_handle_t hCommandBuffer ///< [in] Handle of the command-buffer object. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a kernel execution command to a command-buffer object +/// @brief Append a kernel execution command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7895,19 +8023,20 @@ urCommandBufferFinalizeExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t *pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. const size_t *pLocalWorkSize, ///< [in] Local work size to use when executing kernel. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t *phCommand ///< [out][optional] Handle to this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM memcpy command to a command-buffer object +/// @brief Append a USM memcpy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7932,17 +8061,17 @@ urCommandBufferAppendKernelLaunchExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM fill command to a command-buffer object +/// @brief Append a USM fill command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7981,7 +8110,7 @@ urCommandBufferAppendUSMFillExp( ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory copy command to a command-buffer object +/// @brief Append a memory copy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8002,7 +8131,7 @@ urCommandBufferAppendUSMFillExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -8010,11 +8139,11 @@ urCommandBufferAppendMemBufferCopyExp( size_t size, ///< [in] The number of bytes to be copied. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory write command to a command-buffer object +/// @brief Append a memory write command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8036,18 +8165,18 @@ urCommandBufferAppendMemBufferCopyExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - const void *pSrc, ///< [in] pointer to host memory where data is to be written from. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + const void *pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory read command to a command-buffer object +/// @brief Append a memory read command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8069,18 +8198,18 @@ urCommandBufferAppendMemBufferWriteExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory copy command to a command-buffer object +/// @brief Append a rectangular memory copy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8101,7 +8230,7 @@ urCommandBufferAppendMemBufferReadExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t srcOrigin, ///< [in] Origin for the region of data to be copied from the source. @@ -8113,11 +8242,11 @@ urCommandBufferAppendMemBufferCopyRectExp( size_t dstSlicePitch, ///< [in] Slice pitch of the destination memory. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory write command to a command-buffer object +/// @brief Append a rectangular memory write command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8139,26 +8268,26 @@ urCommandBufferAppendMemBufferCopyRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. - size_t bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. - size_t bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + size_t bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. + size_t bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. - size_t hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + size_t hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. - size_t hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + size_t hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. - void *pSrc, ///< [in] pointer to host memory where data is to be written from. + void *pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory read command to a command-buffer object +/// @brief Append a rectangular memory read command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8180,25 +8309,25 @@ urCommandBufferAppendMemBufferWriteRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. - size_t bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. - size_t bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. - size_t hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + size_t bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. + size_t bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. + size_t hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. - size_t hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + size_t hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory fill command to a command-buffer object +/// @brief Append a memory fill command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8234,7 +8363,7 @@ urCommandBufferAppendMemBufferFillExp( ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM Prefetch command to a command-buffer object +/// @brief Append a USM Prefetch command to a command-buffer object. /// /// @details /// - Prefetching may not be supported for all devices or allocation types. @@ -8275,7 +8404,7 @@ urCommandBufferAppendUSMPrefetchExp( ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM Advise command to a command-buffer object +/// @brief Append a USM Advise command to a command-buffer object. /// /// @details /// - Not all memory advice hints may be supported for all devices or @@ -8337,17 +8466,152 @@ urCommandBufferAppendUSMAdviseExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_queue_handle_t hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_queue_handle_t hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t *phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. ); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Increment the command object's reference count. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferRetainCommandExp( + ur_exp_command_buffer_command_handle_t hCommand ///< [in] Handle of the command-buffer command. +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Decrement the command object's reference count and delete the command +/// object if the reference count becomes zero. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferReleaseCommandExp( + ur_exp_command_buffer_command_handle_t hCommand ///< [in] Handle of the command-buffer command. +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Update a kernel launch command in a finalized command-buffer. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pUpdateKernelLaunch` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If update functionality is not supported by the device. +/// - ::UR_RESULT_ERROR_INVALID_OPERATION +/// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. +/// + If the command-buffer `hCommand` belongs to has not been finalized. +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX +/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION +/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t *pUpdateKernelLaunch ///< [in] Struct defining how the kernel command is to be updated. +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Get command-buffer object information. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT < propName` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION +/// + If `propName` is not supported by the adapter. +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `propSize == 0 && pPropValue != NULL` +/// + If `propSize` is less than the real number of bytes needed to return the info. +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `propSize != 0 && pPropValue == NULL` +/// + `pPropValue == NULL && pPropSizeRet == NULL` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferGetInfoExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object + ur_exp_command_buffer_info_t propName, ///< [in] the name of the command-buffer property to query + size_t propSize, ///< [in] size in bytes of the command-buffer property value + void *pPropValue, ///< [out][optional][typename(propName, propSize)] value of the + ///< command-buffer property + size_t *pPropSizeRet ///< [out][optional] bytes returned in command-buffer property +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Get command-buffer object information. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_EXP_COMMAND_BUFFER_COMMAND_INFO_REFERENCE_COUNT < propName` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION +/// + If `propName` is not supported by the adapter. +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `propSize == 0 && pPropValue != NULL` +/// + If `propSize` is less than the real number of bytes needed to return the info. +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `propSize != 0 && pPropValue == NULL` +/// + `pPropValue == NULL && pPropSizeRet == NULL` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferCommandGetInfoExp( + ur_exp_command_buffer_command_handle_t hCommand, ///< [in] handle of the command-buffer command object + ur_exp_command_buffer_command_info_t propName, ///< [in] the name of the command-buffer command property to query + size_t propSize, ///< [in] size in bytes of the command-buffer command property value + void *pPropValue, ///< [out][optional][typename(propName, propSize)] value of the + ///< command-buffer command property + size_t *pPropSizeRet ///< [out][optional] bytes returned in command-buffer command property +); + #if !defined(__GNUC__) #pragma endregion #endif @@ -10507,6 +10771,7 @@ typedef struct ur_command_buffer_append_kernel_launch_exp_params_t { uint32_t *pnumSyncPointsInWaitList; const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; ur_exp_command_buffer_sync_point_t **ppSyncPoint; + ur_exp_command_buffer_command_handle_t **pphCommand; } ur_command_buffer_append_kernel_launch_exp_params_t; /////////////////////////////////////////////////////////////////////////////// @@ -10700,6 +10965,55 @@ typedef struct ur_command_buffer_enqueue_exp_params_t { ur_event_handle_t **pphEvent; } ur_command_buffer_enqueue_exp_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urCommandBufferRetainCommandExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_command_buffer_retain_command_exp_params_t { + ur_exp_command_buffer_command_handle_t *phCommand; +} ur_command_buffer_retain_command_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urCommandBufferReleaseCommandExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_command_buffer_release_command_exp_params_t { + ur_exp_command_buffer_command_handle_t *phCommand; +} ur_command_buffer_release_command_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urCommandBufferUpdateKernelLaunchExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_command_buffer_update_kernel_launch_exp_params_t { + ur_exp_command_buffer_command_handle_t *phCommand; + const ur_exp_command_buffer_update_kernel_launch_desc_t **ppUpdateKernelLaunch; +} ur_command_buffer_update_kernel_launch_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urCommandBufferGetInfoExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_command_buffer_get_info_exp_params_t { + ur_exp_command_buffer_handle_t *phCommandBuffer; + ur_exp_command_buffer_info_t *ppropName; + size_t *ppropSize; + void **ppPropValue; + size_t **ppPropSizeRet; +} ur_command_buffer_get_info_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urCommandBufferCommandGetInfoExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_command_buffer_command_get_info_exp_params_t { + ur_exp_command_buffer_command_handle_t *phCommand; + ur_exp_command_buffer_command_info_t *ppropName; + size_t *ppropSize; + void **ppPropValue; + size_t **ppPropSizeRet; +} ur_command_buffer_command_get_info_exp_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urUsmP2PEnablePeerAccessExp /// @details Each entry is a pointer to the parameter passed to the function; diff --git a/include/ur_ddi.h b/include/ur_ddi.h index 6e1bf577f8..891d8bc7f4 100644 --- a/include/ur_ddi.h +++ b/include/ur_ddi.h @@ -1854,7 +1854,8 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendKernelLaunchExp_t)( const size_t *, uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *); + ur_exp_command_buffer_sync_point_t *, + ur_exp_command_buffer_command_handle_t *); /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urCommandBufferAppendUSMMemcpyExp @@ -2011,6 +2012,40 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferEnqueueExp_t)( const ur_event_handle_t *, ur_event_handle_t *); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urCommandBufferRetainCommandExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferRetainCommandExp_t)( + ur_exp_command_buffer_command_handle_t); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urCommandBufferReleaseCommandExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferReleaseCommandExp_t)( + ur_exp_command_buffer_command_handle_t); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urCommandBufferUpdateKernelLaunchExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferUpdateKernelLaunchExp_t)( + ur_exp_command_buffer_command_handle_t, + const ur_exp_command_buffer_update_kernel_launch_desc_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urCommandBufferGetInfoExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferGetInfoExp_t)( + ur_exp_command_buffer_handle_t, + ur_exp_command_buffer_info_t, + size_t, + void *, + size_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urCommandBufferCommandGetInfoExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferCommandGetInfoExp_t)( + ur_exp_command_buffer_command_handle_t, + ur_exp_command_buffer_command_info_t, + size_t, + void *, + size_t *); + /////////////////////////////////////////////////////////////////////////////// /// @brief Table of CommandBufferExp functions pointers typedef struct ur_command_buffer_exp_dditable_t { @@ -2031,6 +2066,11 @@ typedef struct ur_command_buffer_exp_dditable_t { ur_pfnCommandBufferAppendUSMPrefetchExp_t pfnAppendUSMPrefetchExp; ur_pfnCommandBufferAppendUSMAdviseExp_t pfnAppendUSMAdviseExp; ur_pfnCommandBufferEnqueueExp_t pfnEnqueueExp; + ur_pfnCommandBufferRetainCommandExp_t pfnRetainCommandExp; + ur_pfnCommandBufferReleaseCommandExp_t pfnReleaseCommandExp; + ur_pfnCommandBufferUpdateKernelLaunchExp_t pfnUpdateKernelLaunchExp; + ur_pfnCommandBufferGetInfoExp_t pfnGetInfoExp; + ur_pfnCommandBufferCommandGetInfoExp_t pfnCommandGetInfoExp; } ur_command_buffer_exp_dditable_t; /////////////////////////////////////////////////////////////////////////////// diff --git a/include/ur_print.h b/include/ur_print.h index c847341893..e1718e99f8 100644 --- a/include/ur_print.h +++ b/include/ur_print.h @@ -930,6 +930,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintExpInteropMemDesc(const struct ur_exp /// - `buff_size < out_size` UR_APIEXPORT ur_result_t UR_APICALL urPrintExpInteropSemaphoreDesc(const struct ur_exp_interop_semaphore_desc_t params, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_command_buffer_info_t enum +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpCommandBufferInfo(enum ur_exp_command_buffer_info_t value, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_command_buffer_command_info_t enum +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpCommandBufferCommandInfo(enum ur_exp_command_buffer_command_info_t value, char *buffer, const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_exp_command_buffer_desc_t struct /// @returns @@ -938,6 +954,46 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintExpInteropSemaphoreDesc(const struct /// - `buff_size < out_size` UR_APIEXPORT ur_result_t UR_APICALL urPrintExpCommandBufferDesc(const struct ur_exp_command_buffer_desc_t params, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_command_buffer_update_memobj_arg_desc_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpCommandBufferUpdateMemobjArgDesc(const struct ur_exp_command_buffer_update_memobj_arg_desc_t params, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_command_buffer_update_pointer_arg_desc_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpCommandBufferUpdatePointerArgDesc(const struct ur_exp_command_buffer_update_pointer_arg_desc_t params, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_command_buffer_update_value_arg_desc_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpCommandBufferUpdateValueArgDesc(const struct ur_exp_command_buffer_update_value_arg_desc_t params, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_command_buffer_update_exec_info_desc_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpCommandBufferUpdateExecInfoDesc(const struct ur_exp_command_buffer_update_exec_info_desc_t params, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_command_buffer_update_kernel_launch_desc_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpCommandBufferUpdateKernelLaunchDesc(const struct ur_exp_command_buffer_update_kernel_launch_desc_t params, char *buffer, const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_exp_peer_info_t enum /// @returns @@ -2250,6 +2306,46 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintCommandBufferAppendUsmAdviseExpParams /// - `buff_size < out_size` UR_APIEXPORT ur_result_t UR_APICALL urPrintCommandBufferEnqueueExpParams(const struct ur_command_buffer_enqueue_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_command_buffer_retain_command_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintCommandBufferRetainCommandExpParams(const struct ur_command_buffer_retain_command_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_command_buffer_release_command_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintCommandBufferReleaseCommandExpParams(const struct ur_command_buffer_release_command_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_command_buffer_update_kernel_launch_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintCommandBufferUpdateKernelLaunchExpParams(const struct ur_command_buffer_update_kernel_launch_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_command_buffer_get_info_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintCommandBufferGetInfoExpParams(const struct ur_command_buffer_get_info_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_command_buffer_command_get_info_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintCommandBufferCommandGetInfoExpParams(const struct ur_command_buffer_command_get_info_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_usm_p2p_enable_peer_access_exp_params_t struct /// @returns diff --git a/include/ur_print.hpp b/include/ur_print.hpp index cd147bc10e..b4c777b77d 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -58,6 +58,8 @@ template <> struct is_handle : std::true_type {}; template <> struct is_handle : std::true_type {}; +template <> +struct is_handle : std::true_type {}; template inline constexpr bool is_handle_v = is_handle::value; template @@ -199,6 +201,12 @@ inline ur_result_t printFlag(std::ostream &os, uint32_t template <> inline ur_result_t printFlag(std::ostream &os, uint32_t flag); +template <> +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_exp_command_buffer_info_t value, size_t size); + +template <> +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_exp_command_buffer_command_info_t value, size_t size); + template <> inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_exp_peer_info_t value, size_t size); @@ -318,7 +326,14 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_sampler_addr_modes_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_interop_mem_desc_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_interop_semaphore_desc_t params); +inline std::ostream &operator<<(std::ostream &os, ur_exp_command_buffer_info_t value); +inline std::ostream &operator<<(std::ostream &os, ur_exp_command_buffer_command_info_t value); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_memobj_arg_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_pointer_arg_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_value_arg_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_exec_info_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_kernel_launch_desc_t params); inline std::ostream &operator<<(std::ostream &os, ur_exp_peer_info_t value); /////////////////////////////////////////////////////////////////////////////// @@ -822,6 +837,9 @@ inline std::ostream &operator<<(std::ostream &os, ur_function_t value) { case UR_FUNCTION_ADAPTER_GET_INFO: os << "UR_FUNCTION_ADAPTER_GET_INFO"; break; + case UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP"; + break; case UR_FUNCTION_PROGRAM_BUILD_EXP: os << "UR_FUNCTION_PROGRAM_BUILD_EXP"; break; @@ -879,6 +897,18 @@ inline std::ostream &operator<<(std::ostream &os, ur_function_t value) { case UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP: os << "UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP"; break; + case UR_FUNCTION_COMMAND_BUFFER_RETAIN_COMMAND_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_RETAIN_COMMAND_EXP"; + break; + case UR_FUNCTION_COMMAND_BUFFER_RELEASE_COMMAND_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_RELEASE_COMMAND_EXP"; + break; + case UR_FUNCTION_COMMAND_BUFFER_GET_INFO_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_GET_INFO_EXP"; + break; + case UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP"; + break; default: os << "unknown enumerator"; break; @@ -999,6 +1029,21 @@ inline std::ostream &operator<<(std::ostream &os, ur_structure_type_t value) { case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC: os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC"; break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC: + os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC"; + break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC: + os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC"; + break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC: + os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC"; + break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC: + os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC"; + break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC: + os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC"; + break; case UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES: os << "UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES"; break; @@ -1214,6 +1259,31 @@ inline ur_result_t printStruct(std::ostream &os, const void *ptr) { printPtr(os, pstruct); } break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC: { + const ur_exp_command_buffer_update_kernel_launch_desc_t *pstruct = (const ur_exp_command_buffer_update_kernel_launch_desc_t *)ptr; + printPtr(os, pstruct); + } break; + + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC: { + const ur_exp_command_buffer_update_memobj_arg_desc_t *pstruct = (const ur_exp_command_buffer_update_memobj_arg_desc_t *)ptr; + printPtr(os, pstruct); + } break; + + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC: { + const ur_exp_command_buffer_update_pointer_arg_desc_t *pstruct = (const ur_exp_command_buffer_update_pointer_arg_desc_t *)ptr; + printPtr(os, pstruct); + } break; + + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC: { + const ur_exp_command_buffer_update_value_arg_desc_t *pstruct = (const ur_exp_command_buffer_update_value_arg_desc_t *)ptr; + printPtr(os, pstruct); + } break; + + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC: { + const ur_exp_command_buffer_update_exec_info_desc_t *pstruct = (const ur_exp_command_buffer_update_exec_info_desc_t *)ptr; + printPtr(os, pstruct); + } break; + case UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES: { const ur_exp_sampler_mip_properties_t *pstruct = (const ur_exp_sampler_mip_properties_t *)ptr; printPtr(os, pstruct); @@ -1472,6 +1542,9 @@ inline std::ostream &operator<<(std::ostream &os, ur_result_t value) { case UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP: os << "UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP"; break; + case UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP: + os << "UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP"; + break; case UR_RESULT_ERROR_UNKNOWN: os << "UR_RESULT_ERROR_UNKNOWN"; break; @@ -2407,6 +2480,12 @@ inline std::ostream &operator<<(std::ostream &os, ur_device_info_t value) { case UR_DEVICE_INFO_COMPOSITE_DEVICE: os << "UR_DEVICE_INFO_COMPOSITE_DEVICE"; break; + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: + os << "UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP"; + break; + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: + os << "UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP"; + break; case UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP: os << "UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP"; break; @@ -3843,6 +3922,30 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info os << ")"; } break; + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: { + const ur_bool_t *tptr = (const ur_bool_t *)ptr; + if (sizeof(ur_bool_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: { + const ur_bool_t *tptr = (const ur_bool_t *)ptr; + if (sizeof(ur_bool_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; case UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { @@ -9180,6 +9283,96 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_interop_se os << "}"; return os; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_command_buffer_info_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_exp_command_buffer_info_t value) { + switch (value) { + case UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT: + os << "UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_command_buffer_info_t enum value +template <> +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_exp_command_buffer_info_t value, size_t size) { + if (ptr == NULL) { + return printPtr(os, ptr); + } + + switch (value) { + case UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT: { + const uint32_t *tptr = (const uint32_t *)ptr; + if (sizeof(uint32_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; + default: + os << "unknown enumerator"; + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + return UR_RESULT_SUCCESS; +} +} // namespace ur::details + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_command_buffer_command_info_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_exp_command_buffer_command_info_t value) { + switch (value) { + case UR_EXP_COMMAND_BUFFER_COMMAND_INFO_REFERENCE_COUNT: + os << "UR_EXP_COMMAND_BUFFER_COMMAND_INFO_REFERENCE_COUNT"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_command_buffer_command_info_t enum value +template <> +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_exp_command_buffer_command_info_t value, size_t size) { + if (ptr == NULL) { + return printPtr(os, ptr); + } + + switch (value) { + case UR_EXP_COMMAND_BUFFER_COMMAND_INFO_REFERENCE_COUNT: { + const uint32_t *tptr = (const uint32_t *)ptr; + if (sizeof(uint32_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; + default: + os << "unknown enumerator"; + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + return UR_RESULT_SUCCESS; +} +} // namespace ur::details + /////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_exp_command_buffer_desc_t type /// @returns @@ -9197,6 +9390,284 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_bu ur::details::printStruct(os, (params.pNext)); + os << ", "; + os << ".isUpdatable = "; + + os << (params.isUpdatable); + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_command_buffer_update_memobj_arg_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_buffer_update_memobj_arg_desc_t params) { + os << "(struct ur_exp_command_buffer_update_memobj_arg_desc_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, + (params.pNext)); + + os << ", "; + os << ".argIndex = "; + + os << (params.argIndex); + + os << ", "; + os << ".pProperties = "; + + os << (params.pProperties); + + os << ", "; + os << ".hNewMemObjArg = "; + + ur::details::printPtr(os, + (params.hNewMemObjArg)); + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_command_buffer_update_pointer_arg_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_buffer_update_pointer_arg_desc_t params) { + os << "(struct ur_exp_command_buffer_update_pointer_arg_desc_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, + (params.pNext)); + + os << ", "; + os << ".argIndex = "; + + os << (params.argIndex); + + os << ", "; + os << ".pProperties = "; + + os << (params.pProperties); + + os << ", "; + os << ".pNewPointerArg = "; + + os << (params.pNewPointerArg); + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_command_buffer_update_value_arg_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_buffer_update_value_arg_desc_t params) { + os << "(struct ur_exp_command_buffer_update_value_arg_desc_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, + (params.pNext)); + + os << ", "; + os << ".argIndex = "; + + os << (params.argIndex); + + os << ", "; + os << ".argSize = "; + + os << (params.argSize); + + os << ", "; + os << ".pProperties = "; + + os << (params.pProperties); + + os << ", "; + os << ".pNewValueArg = "; + + os << (params.pNewValueArg); + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_command_buffer_update_exec_info_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_buffer_update_exec_info_desc_t params) { + os << "(struct ur_exp_command_buffer_update_exec_info_desc_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, + (params.pNext)); + + os << ", "; + os << ".propName = "; + + os << (params.propName); + + os << ", "; + os << ".propSize = "; + + os << (params.propSize); + + os << ", "; + os << ".pProperties = "; + + os << (params.pProperties); + + os << ", "; + os << ".pNewExecInfo = "; + + os << (params.pNewExecInfo); + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_command_buffer_update_kernel_launch_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_buffer_update_kernel_launch_desc_t params) { + os << "(struct ur_exp_command_buffer_update_kernel_launch_desc_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, + (params.pNext)); + + os << ", "; + os << ".numNewMemObjArgs = "; + + os << (params.numNewMemObjArgs); + + os << ", "; + os << ".numNewPointerArgs = "; + + os << (params.numNewPointerArgs); + + os << ", "; + os << ".numNewValueArgs = "; + + os << (params.numNewValueArgs); + + os << ", "; + os << ".numNewExecInfos = "; + + os << (params.numNewExecInfos); + + os << ", "; + os << ".newWorkDim = "; + + os << (params.newWorkDim); + + os << ", "; + os << ".pNewMemObjArgList = {"; + for (size_t i = 0; (params.pNewMemObjArgList) != NULL && i < params.numNewMemObjArgs; ++i) { + if (i != 0) { + os << ", "; + } + + os << ((params.pNewMemObjArgList))[i]; + } + os << "}"; + + os << ", "; + os << ".pNewPointerArgList = {"; + for (size_t i = 0; (params.pNewPointerArgList) != NULL && i < params.numNewPointerArgs; ++i) { + if (i != 0) { + os << ", "; + } + + os << ((params.pNewPointerArgList))[i]; + } + os << "}"; + + os << ", "; + os << ".pNewValueArgList = {"; + for (size_t i = 0; (params.pNewValueArgList) != NULL && i < params.numNewValueArgs; ++i) { + if (i != 0) { + os << ", "; + } + + os << ((params.pNewValueArgList))[i]; + } + os << "}"; + + os << ", "; + os << ".pNewExecInfoList = {"; + for (size_t i = 0; (params.pNewExecInfoList) != NULL && i < params.numNewExecInfos; ++i) { + if (i != 0) { + os << ", "; + } + + os << ((params.pNewExecInfoList))[i]; + } + os << "}"; + + os << ", "; + os << ".pNewGlobalWorkOffset = {"; + for (size_t i = 0; (params.pNewGlobalWorkOffset) != NULL && i < params.newWorkDim; ++i) { + if (i != 0) { + os << ", "; + } + + os << ((params.pNewGlobalWorkOffset))[i]; + } + os << "}"; + + os << ", "; + os << ".pNewGlobalWorkSize = {"; + for (size_t i = 0; (params.pNewGlobalWorkSize) != NULL && i < params.newWorkDim; ++i) { + if (i != 0) { + os << ", "; + } + + os << ((params.pNewGlobalWorkSize))[i]; + } + os << "}"; + + os << ", "; + os << ".pNewLocalWorkSize = {"; + for (size_t i = 0; (params.pNewLocalWorkSize) != NULL && i < params.newWorkDim; ++i) { + if (i != 0) { + os << ", "; + } + + os << ((params.pNewLocalWorkSize))[i]; + } + os << "}"; + os << "}"; return os; } @@ -14600,6 +15071,12 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur::details::printPtr(os, *(params->ppSyncPoint)); + os << ", "; + os << ".phCommand = "; + + ur::details::printPtr(os, + *(params->pphCommand)); + return os; } @@ -15299,6 +15776,122 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct return os; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_command_buffer_retain_command_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_command_buffer_retain_command_exp_params_t *params) { + + os << ".hCommand = "; + + ur::details::printPtr(os, + *(params->phCommand)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_command_buffer_release_command_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_command_buffer_release_command_exp_params_t *params) { + + os << ".hCommand = "; + + ur::details::printPtr(os, + *(params->phCommand)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_command_buffer_update_kernel_launch_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_command_buffer_update_kernel_launch_exp_params_t *params) { + + os << ".hCommand = "; + + ur::details::printPtr(os, + *(params->phCommand)); + + os << ", "; + os << ".pUpdateKernelLaunch = "; + + ur::details::printPtr(os, + *(params->ppUpdateKernelLaunch)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_command_buffer_get_info_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_command_buffer_get_info_exp_params_t *params) { + + os << ".hCommandBuffer = "; + + ur::details::printPtr(os, + *(params->phCommandBuffer)); + + os << ", "; + os << ".propName = "; + + os << *(params->ppropName); + + os << ", "; + os << ".propSize = "; + + os << *(params->ppropSize); + + os << ", "; + os << ".pPropValue = "; + ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), *(params->ppropSize)); + + os << ", "; + os << ".pPropSizeRet = "; + + ur::details::printPtr(os, + *(params->ppPropSizeRet)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_command_buffer_command_get_info_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_command_buffer_command_get_info_exp_params_t *params) { + + os << ".hCommand = "; + + ur::details::printPtr(os, + *(params->phCommand)); + + os << ", "; + os << ".propName = "; + + os << *(params->ppropName); + + os << ", "; + os << ".propSize = "; + + os << *(params->ppropSize); + + os << ", "; + os << ".pPropValue = "; + ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), *(params->ppropSize)); + + os << ", "; + os << ".pPropSizeRet = "; + + ur::details::printPtr(os, + *(params->ppPropSizeRet)); + + return os; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_usm_p2p_enable_peer_access_exp_params_t type /// @returns @@ -16433,6 +17026,21 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, ur_function_ case UR_FUNCTION_COMMAND_BUFFER_ENQUEUE_EXP: { os << (const struct ur_command_buffer_enqueue_exp_params_t *)params; } break; + case UR_FUNCTION_COMMAND_BUFFER_RETAIN_COMMAND_EXP: { + os << (const struct ur_command_buffer_retain_command_exp_params_t *)params; + } break; + case UR_FUNCTION_COMMAND_BUFFER_RELEASE_COMMAND_EXP: { + os << (const struct ur_command_buffer_release_command_exp_params_t *)params; + } break; + case UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP: { + os << (const struct ur_command_buffer_update_kernel_launch_exp_params_t *)params; + } break; + case UR_FUNCTION_COMMAND_BUFFER_GET_INFO_EXP: { + os << (const struct ur_command_buffer_get_info_exp_params_t *)params; + } break; + case UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP: { + os << (const struct ur_command_buffer_command_get_info_exp_params_t *)params; + } break; case UR_FUNCTION_USM_P2P_ENABLE_PEER_ACCESS_EXP: { os << (const struct ur_usm_p2p_enable_peer_access_exp_params_t *)params; } break; diff --git a/scripts/core/EXP-COMMAND-BUFFER.rst b/scripts/core/EXP-COMMAND-BUFFER.rst index a6a32a66a1..0143b72c77 100644 --- a/scripts/core/EXP-COMMAND-BUFFER.rst +++ b/scripts/core/EXP-COMMAND-BUFFER.rst @@ -57,24 +57,29 @@ returned list of supported extensions. ${x}DeviceGetInfo(hDevice, ${X}_DEVICE_INFO_EXTENSIONS, 0, nullptr, &returnedSize); - // Retrieve extension string + // Retrieve extension string std::unique_ptr returnedExtensions(new char[returnedSize]); - ${x}DeviceGetInfo(hDevice, ${X}_DEVICE_INFO_EXTENSIONS, returnedSize, + ${x}DeviceGetInfo(hDevice, ${X}_DEVICE_INFO_EXTENSIONS, returnedSize, returnedExtensions.get(), nullptr); - + std::string_view ExtensionsString(returnedExtensions.get()); - bool CmdBufferSupport = + bool CmdBufferSupport = ExtensionsString.find(${X}_COMMAND_BUFFER_EXTENSION_STRING_EXP) != std::string::npos; +.. note:: + The ${X}_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP device info query exists to + serve the same purpose as ${X}_COMMAND_BUFFER_EXTENSION_STRING_EXP. + Command-Buffer Creation -------------------------------------------------------------------------------- Command-Buffers are tied to a specific ${x}_context_handle_t and ${x}_device_handle_t. ${x}CommandBufferCreateExp optionally takes a descriptor to provide additional properties for how the command-buffer should be -constructed. There are currently no unique members defined for -${x}_exp_command_buffer_desc_t, however they may be added in the future. +constructed. The only unique member defined in ${x}_exp_command_buffer_desc_t +is ``isUpdatable``, which should be set to ``true`` to support :ref:`updating +command-buffer commands`. Command-buffers are reference counted and can be retained and released by calling ${x}CommandBufferRetainExp and ${x}CommandBufferReleaseExp respectively. @@ -89,6 +94,11 @@ However, they differ in that they take a command-buffer handle instead of a queue handle, and the dependencies and return parameters are sync-points instead of event handles. +The entry-point for appending a kernel launch command also returns an optional +handle to the command being appended. This handle can be used to update the +command configuration between command-buffer executions, see the section on +:ref:`updating command-buffer commands`. + Currently only the following commands are supported: * ${x}CommandBufferAppendKernelLaunchExp @@ -103,9 +113,9 @@ Currently only the following commands are supported: * ${x}CommandBufferAppendMemBufferFillExp * ${x}CommandBufferAppendUSMPrefetchExp * ${x}CommandBufferAppendUSMAdviseExp - + It is planned to eventually support any command type from the Core API which can -actually be appended to the equiavalent adapter native constructs. +actually be appended to the equivalent adapter native constructs. Sync-Points -------------------------------------------------------------------------------- @@ -122,15 +132,15 @@ were obtained from. // Append a memcpy with no sync-point dependencies ${x}_exp_command_buffer_sync_point_t syncPoint; - ${x}CommandBufferAppendUSMMemcpyExp(hCommandBuffer, pDst, pSrc, size, 0, + ${x}CommandBufferAppendUSMMemcpyExp(hCommandBuffer, pDst, pSrc, size, 0, nullptr, &syncPoint); - + // Append a kernel launch with syncPoint as a dependency, ignore returned // sync-point - ${x}CommandBufferAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, - pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, 1, &syncPoint, - nullptr); + ${x}CommandBufferAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, + pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize, 1, &syncPoint, + nullptr, nullptr); Enqueueing Command-Buffers -------------------------------------------------------------------------------- @@ -147,6 +157,82 @@ enqueued or executed simultaneously, and submissions may be serialized. ${x}CommandBufferEnqueueExp(hCommandBuffer, hQueue, 0, nullptr, &executionEvent); +Updating Command-Buffer Commands +-------------------------------------------------------------------------------- + +An adapter implementing the command-buffer experimental feature can optionally +support updating the configuration of kernel commands recorded to a +command-buffer. Support for this is reported by returning true in the +${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP query. + +Updating kernel commands is done by passing the new kernel configuration +to ${x}CommandBufferUpdateKernelLaunchExp along with the command handle of +the kernel command to update. Configurations that can be changed are the +parameters to the kernel and the execution ND-Range. + +.. parsed-literal:: + + // Create a command-buffer with update enabled. + ${x}_exp_command_buffer_desc_t desc { + ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, + nullptr, + true // isUpdatable + }; + ${x}_exp_command_buffer_handle_t hCommandBuffer; + ${x}CommandBufferCreateExp(hContext, hDevice, &desc, &hCommandBuffer); + + // Append a kernel command which has two buffer parameters, an input + // and an output. + ${x}_exp_command_buffer_command_handle_t hCommand; + ${x}CommandBufferAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, + pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize, 0, nullptr, + nullptr, &hCommand); + + // Close the command-buffer before updating + ${x}CommandBufferFinalizeExp(hCommandBuffer); + + // Define kernel argument at index 0 to be a new input buffer object + ${x}_exp_command_buffer_update_memobj_arg_desc_t newInputArg { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC, // stype + nullptr, // pNext + 0, // argIndex + nullptr, // pProperties + newInputBuffer, // hNewMemObjArg + }; + + // Define kernel argument at index 1 to be a new output buffer object + ${x}_exp_command_buffer_update_memobj_arg_desc_t newOutputArg { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC, // stype + nullptr, // pNext + 1, // argIndex + nullptr, // pProperties + newOutputBuffer, // hNewMemObjArg + }; + + // Define the new configuration of the kernel command + ${x}_exp_command_buffer_update_memobj_arg_desc_t updatedArgs[2] = {newInputArg, newOutputArg}; + ${x}_exp_command_buffer_update_kernel_launch_desc_t update { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 2, // numNewMemobjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + 0, // numNewExecInfos + 0, // newWorkDim + new_args, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + nullptr, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Perform the update + ${x}CommandBufferUpdateKernelLaunchExp(hCommand, &update); + + API -------------------------------------------------------------------------------- @@ -156,12 +242,21 @@ Macros Enums ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +* ${x}_device_info_t + * ${X}_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP + * ${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP * ${x}_result_t * ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP * ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP * ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP + * ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP * ${x}_structure_type_t * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC + * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC + * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC + * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC + * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC + * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC * ${x}_command_t * ${X}_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP * ${x}_function_t @@ -182,15 +277,23 @@ Enums * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_USM_ADVISE_EXP - - + * ${X}_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP +* ${x}_exp_command_buffer_info_t + * ${X}_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT +* ${x}_exp_command_buffer_command_info_t + * ${X}_EXP_COMMAND_BUFFER_COMMAND_INFO_REFERENCE_COUNT Types ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * ${x}_exp_command_buffer_desc_t +* ${x}_exp_command_buffer_update_kernel_launch_desc_t +* ${x}_exp_command_buffer_update_memobj_arg_desc_t +* ${x}_exp_command_buffer_update_pointer_arg_desc_t +* ${x}_exp_command_buffer_update_value_arg_desc_t +* ${x}_exp_command_buffer_update_exec_info_desc_t * ${x}_exp_command_buffer_sync_point_t * ${x}_exp_command_buffer_handle_t - +* ${x}_exp_command_buffer_command_handle_t Functions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -211,6 +314,11 @@ Functions * ${x}CommandBufferAppendUSMPrefetchExp * ${x}CommandBufferAppendUSMAdviseExp * ${x}CommandBufferEnqueueExp +* ${x}CommandBufferRetainCommandExp +* ${x}CommandBufferReleaseCommandExp +* ${x}CommandBufferUpdateKernelLaunchExp +* ${x}CommandBufferGetInfoExp +* ${x}CommandBufferCommandGetInfoExp Changelog -------------------------------------------------------------------------------- @@ -227,6 +335,8 @@ Changelog | 1.3 | Add function definitions for Prefetch and Advise | | | commands | +-----------+-------------------------------------------------------+ +| 1.4 | Add function definitions for kernel command update | ++-----------+-------------------------------------------------------+ Contributors -------------------------------------------------------------------------------- @@ -234,3 +344,4 @@ Contributors * Ben Tracy `ben.tracy@codeplay.com `_ * Ewan Crawford `ewan@codeplay.com `_ * Maxime France-Pillois `maxime.francepillois@codeplay.com `_ +* Aaron Greig `aaron.greig@codeplay.com `_ diff --git a/scripts/core/exp-command-buffer.yml b/scripts/core/exp-command-buffer.yml index 7d1b686aab..d2292ceb22 100644 --- a/scripts/core/exp-command-buffer.yml +++ b/scripts/core/exp-command-buffer.yml @@ -14,6 +14,19 @@ ordinal: "99" --- #-------------------------------------------------------------------------- type: enum extend: true +typed_etors: true +desc: "Extension enums to $x_device_info_t to support command-buffers." +name: $x_device_info_t +etors: + - name: COMMAND_BUFFER_SUPPORT_EXP + value: "0x1000" + desc: "[$x_bool_t] Returns true if the device supports the use of command-buffers." + - name: COMMAND_BUFFER_UPDATE_SUPPORT_EXP + value: "0x1001" + desc: "[$x_bool_t] Returns true if the device supports updating the kernel commands in a command-buffer." +--- #-------------------------------------------------------------------------- +type: enum +extend: true desc: "Experimental Command Buffer result type enums." name: $x_result_t etors: @@ -26,6 +39,9 @@ etors: - name: ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP value: "0x1002" desc: "Sync point wait list is invalid" + - name: ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP + value: "0x1003" + desc: "Handle to command-buffer command is invalid" --- #-------------------------------------------------------------------------- type: enum extend: true @@ -35,6 +51,21 @@ etors: - name: EXP_COMMAND_BUFFER_DESC desc: $x_exp_command_buffer_desc_t value: "0x1000" + - name: EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC + desc: $x_exp_command_buffer_update_kernel_launch_desc_t + value: "0x1001" + - name: EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC + desc: $x_exp_command_buffer_update_memobj_arg_desc_t + value: "0x1002" + - name: EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC + desc: $x_exp_command_buffer_update_pointer_arg_desc_t + value: "0x1003" + - name: EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC + desc: $x_exp_command_buffer_update_value_arg_desc_t + value: "0x1004" + - name: EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC + desc: $x_exp_command_buffer_update_exec_info_desc_t + value: "0x1005" --- #-------------------------------------------------------------------------- type: enum extend: true @@ -45,6 +76,30 @@ etors: desc: Event created by $xCommandBufferEnqueueExp value: "0x1000" --- #-------------------------------------------------------------------------- +type: enum +desc: "Command-buffer query information type" +class: $xCommandBuffer +name: $x_exp_command_buffer_info_t +typed_etors: True +etors: + - name: REFERENCE_COUNT + desc: | + [uint32_t] Reference count of the command-buffer object. + The reference count returned should be considered immediately stale. + It is unsuitable for general use in applications. This feature is provided for identifying memory leaks. +--- #-------------------------------------------------------------------------- +type: enum +desc: "Command-buffer command query information type" +class: $xCommandBuffer +name: $x_exp_command_buffer_command_info_t +typed_etors: True +etors: + - name: REFERENCE_COUNT + desc: | + [uint32_t] Reference count of the command-buffer object. + The reference count returned should be considered immediately stale. + It is unsuitable for general use in applications. This feature is provided for identifying memory leaks. +--- #-------------------------------------------------------------------------- type: macro desc: "The extension string which defines support for command-buffers which is returned when querying device extensions." name: $X_COMMAND_BUFFER_EXTENSION_STRING_EXP @@ -54,7 +109,118 @@ type: struct desc: "Command-Buffer Descriptor Type" name: $x_exp_command_buffer_desc_t base: $x_base_desc_t -members: [] +members: + - type: $x_bool_t + name: isUpdatable + desc: "[in] Commands in a finalized command-buffer can be updated." +--- #-------------------------------------------------------------------------- +type: struct +desc: "Descriptor type for updating a kernel command memobj argument." +base: $x_base_desc_t +name: $x_exp_command_buffer_update_memobj_arg_desc_t +members: + - type: uint32_t + name: argIndex + desc: "[in] Argument index." + - type: "const ur_kernel_arg_mem_obj_properties_t *" + name: pProperties + desc: "[in][optinal] Pointer to memory object properties." + - type: $x_mem_handle_t + name: hNewMemObjArg + desc: "[in][optional] Handle of memory object to set at argument index." +--- #-------------------------------------------------------------------------- +type: struct +desc: "Descriptor type for updating a kernel command pointer argument." +base: $x_base_desc_t +name: $x_exp_command_buffer_update_pointer_arg_desc_t +members: + - type: uint32_t + name: argIndex + desc: "[in] Argument index." + - type: "const ur_kernel_arg_pointer_properties_t *" + name: pProperties + desc: "[in][optinal] Pointer to USM pointer properties." + - type: "const void *" + name: pNewPointerArg + desc: "[in][optional] USM pointer to memory location holding the argument value to set at argument index." +--- #-------------------------------------------------------------------------- +type: struct +desc: "Descriptor type for updating a kernel command value argument." +base: $x_base_desc_t +name: $x_exp_command_buffer_update_value_arg_desc_t +members: + - type: uint32_t + name: argIndex + desc: "[in] Argument index." + - type: uint32_t + name: argSize + desc: "[in] Argument size." + - type: "const ur_kernel_arg_value_properties_t *" + name: pProperties + desc: "[in][optinal] Pointer to value properties." + - type: "const void *" + name: pNewValueArg + desc: "[in][optional] Argument value representing matching kernel arg type to set at argument index." +--- #-------------------------------------------------------------------------- +type: struct +desc: "Descriptor type for updating kernel command execution info." +base: $x_base_desc_t +name: $x_exp_command_buffer_update_exec_info_desc_t +members: + - type: ur_kernel_exec_info_t + name: propName + desc: "[in] Name of execution attribute." + - type: size_t + name: propSize + desc: "[in] Size of execution attribute." + - type: "const ur_kernel_exec_info_properties_t *" + name: pProperties + desc: "[in][optional] Pointer to execution info properties." + - type: "const void *" + name: pNewExecInfo + desc: "[in] Pointer to memory location holding the execution info value." +--- #-------------------------------------------------------------------------- +type: struct +desc: "Descriptor type for updating a kernel launch command." +base: $x_base_desc_t +name: $x_exp_command_buffer_update_kernel_launch_desc_t +members: + - type: uint32_t + name: numNewMemObjArgs + desc: "[in] Length of pNewMemObjArgList." + - type: uint32_t + name: numNewPointerArgs + desc: "[in] Length of pNewPointerArgList." + - type: uint32_t + name: numNewValueArgs + desc: "[in] Length of pNewValueArgList." + - type: uint32_t + name: numNewExecInfos + desc: "[in] Length of pNewExecInfoList." + - type: uint32_t + name: newWorkDim + desc: "[in] Number of work dimensions in the kernel ND-range, from 1-3." + - type: "const $x_exp_command_buffer_update_memobj_arg_desc_t*" + name: pNewMemObjArgList + desc: "[in][optional][range(0, numNewMemObjArgs)] An array describing the new kernel mem obj arguments for the command." + - type: "const $x_exp_command_buffer_update_pointer_arg_desc_t*" + name: pNewPointerArgList + desc: "[in][optional][range(0, numNewPointerArgs)] An array describing the new kernel pointer arguments for the command." + - type: "const $x_exp_command_buffer_update_value_arg_desc_t*" + name: pNewValueArgList + desc: "[in][optional][range(0, numNewValueArgs)] An array describing the new kernel value arguments for the command." + - type: "const $x_exp_command_buffer_update_exec_info_desc_t*" + name: pNewExecInfoList + desc: "[in][optional][range(0, numNewExecInfos)] An array describing the execution info objects for the command." + - type: "size_t*" + name: pNewGlobalWorkOffset + desc: "[in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned values that describe the offset used to calculate the global ID." + - type: "size_t*" + name: pNewGlobalWorkSize + desc: "[in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned values that describe the number of global work-items." + - type: "size_t*" + name: pNewLocalWorkSize + desc: "[in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned values that describe the number of work-items that make up a work-group. If nullptr, the runtime implementation will choose the work-group size." --- #-------------------------------------------------------------------------- type: typedef desc: "A value that identifies a command inside of a command-buffer, used for defining dependencies between commands in the same command-buffer." @@ -67,26 +233,31 @@ desc: "Handle of Command-Buffer object" class: $xCommandBuffer name: "$x_exp_command_buffer_handle_t" --- #-------------------------------------------------------------------------- +type: handle +desc: "Handle of a Command-Buffer command" +class: $xCommandBuffer +name: "$x_exp_command_buffer_command_handle_t" +--- #-------------------------------------------------------------------------- type: function desc: "Create a Command-Buffer object" class: $xCommandBuffer name: CreateExp decl: static details: - - "Create a command-buffer object" + - "Create a command-buffer object." params: - type: $x_context_handle_t name: hContext - desc: "[in] handle of the context object" + desc: "[in] Handle of the context object." - type: $x_device_handle_t name: hDevice - desc: "[in] handle of the device object" + desc: "[in] Handle of the device object." - type: "const $x_exp_command_buffer_desc_t*" name: pCommandBufferDesc - desc: "[in][optional] CommandBuffer descriptor" + desc: "[in][optional] command-buffer descriptor." - type: "$x_exp_command_buffer_handle_t*" name: phCommandBuffer - desc: "[out] pointer to Command-Buffer handle" + desc: "[out] Pointer to command-Buffer handle." returns: - $X_RESULT_ERROR_INVALID_CONTEXT - $X_RESULT_ERROR_INVALID_DEVICE @@ -100,7 +271,7 @@ name: RetainExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object" + desc: "[in] Handle of the command-buffer object." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_OUT_OF_RESOURCES @@ -113,7 +284,7 @@ name: ReleaseExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object" + desc: "[in] Handle of the command-buffer object." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_OUT_OF_RESOURCES @@ -126,26 +297,26 @@ name: FinalizeExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object" + desc: "[in] Handle of the command-buffer object." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a kernel execution command to a command-buffer object" +desc: "Append a kernel execution command to a command-buffer object." class: $xCommandBuffer name: AppendKernelLaunchExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object" + desc: "[in] Handle of the command-buffer object." - type: $x_kernel_handle_t name: hKernel - desc: "[in] kernel to append" + desc: "[in] Kernel to append." - type: uint32_t name: workDim - desc: "[in] dimension of the kernel execution" + desc: "[in] Dimension of the kernel execution." - type: "const size_t*" name: pGlobalWorkOffset desc: "[in] Offset to use when executing kernel." @@ -163,7 +334,10 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." + - type: "$x_exp_command_buffer_command_handle_t*" + name: phCommand + desc: "[out][optional] Handle to this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_KERNEL @@ -178,13 +352,13 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a USM memcpy command to a command-buffer object" +desc: "Append a USM memcpy command to a command-buffer object." class: $xCommandBuffer name: AppendUSMMemcpyExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: "void*" name: pDst desc: "[in] Location the data will be copied to." @@ -193,7 +367,7 @@ params: desc: "[in] The data to be copied." - type: "size_t" name: size - desc: "[in] The number of bytes to copy" + desc: "[in] The number of bytes to copy." - type: uint32_t name: numSyncPointsInWaitList desc: "[in] The number of sync points in the provided dependency list." @@ -202,7 +376,7 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_SIZE: @@ -217,7 +391,7 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a USM fill command to a command-buffer object" +desc: "Append a USM fill command to a command-buffer object." class: $xCommandBuffer name: AppendUSMFillExp params: @@ -262,13 +436,13 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a memory copy command to a command-buffer object" +desc: "Append a memory copy command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferCopyExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_mem_handle_t name: hSrcMem desc: "[in] The data to be copied." @@ -292,7 +466,7 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -304,25 +478,25 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a memory write command to a command-buffer object" +desc: "Append a memory write command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferWriteExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_mem_handle_t name: hBuffer - desc: "[in] handle of the buffer object." + desc: "[in] Handle of the buffer object." - type: "size_t" name: offset - desc: "[in] offset in bytes in the buffer object." + desc: "[in] Offset in bytes in the buffer object." - type: "size_t" name: size - desc: "[in] size in bytes of data being written." + desc: "[in] Size in bytes of data being written." - type: "const void*" name: pSrc - desc: "[in] pointer to host memory where data is to be written from." + desc: "[in] Pointer to host memory where data is to be written from." - type: uint32_t name: numSyncPointsInWaitList desc: "[in] The number of sync points in the provided dependency list." @@ -331,7 +505,7 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -343,25 +517,25 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a memory read command to a command-buffer object" +desc: "Append a memory read command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferReadExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_mem_handle_t name: hBuffer - desc: "[in] handle of the buffer object." + desc: "[in] Handle of the buffer object." - type: "size_t" name: offset - desc: "[in] offset in bytes in the buffer object." + desc: "[in] Offset in bytes in the buffer object." - type: "size_t" name: size - desc: "[in] size in bytes of data being written." + desc: "[in] Size in bytes of data being written." - type: "void*" name: pDst - desc: "[in] pointer to host memory where data is to be written to." + desc: "[in] Pointer to host memory where data is to be written to." - type: uint32_t name: numSyncPointsInWaitList desc: "[in] The number of sync points in the provided dependency list." @@ -370,7 +544,7 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -382,13 +556,13 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a rectangular memory copy command to a command-buffer object" +desc: "Append a rectangular memory copy command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferCopyRectExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_mem_handle_t name: hSrcMem desc: "[in] The data to be copied." @@ -424,7 +598,7 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: $x_exp_command_buffer_sync_point_t* name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -436,16 +610,16 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a rectangular memory write command to a command-buffer object" +desc: "Append a rectangular memory write command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferWriteRectExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_mem_handle_t name: hBuffer - desc: "[in] handle of the buffer object." + desc: "[in] Handle of the buffer object." - type: $x_rect_offset_t name: bufferOffset desc: "[in] 3D offset in the buffer." @@ -457,19 +631,19 @@ params: desc: "[in] 3D rectangular region descriptor: width, height, depth." - type: "size_t" name: bufferRowPitch - desc: "[in] length of each row in bytes in the buffer object." + desc: "[in] Length of each row in bytes in the buffer object." - type: "size_t" name: bufferSlicePitch - desc: "[in] length of each 2D slice in bytes in the buffer object being written." + desc: "[in] Length of each 2D slice in bytes in the buffer object being written." - type: "size_t" name: hostRowPitch - desc: "[in] length of each row in bytes in the host memory region pointed to by pSrc." + desc: "[in] Length of each row in bytes in the host memory region pointed to by pSrc." - type: "size_t" name: hostSlicePitch - desc: "[in] length of each 2D slice in bytes in the host memory region pointed to by pSrc." + desc: "[in] Length of each 2D slice in bytes in the host memory region pointed to by pSrc." - type: "void*" name: pSrc - desc: "[in] pointer to host memory where data is to be written from." + desc: "[in] Pointer to host memory where data is to be written from." - type: uint32_t name: numSyncPointsInWaitList desc: "[in] The number of sync points in the provided dependency list." @@ -478,7 +652,7 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: $x_exp_command_buffer_sync_point_t* name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -490,16 +664,16 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a rectangular memory read command to a command-buffer object" +desc: "Append a rectangular memory read command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferReadRectExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_mem_handle_t name: hBuffer - desc: "[in] handle of the buffer object." + desc: "[in] Handle of the buffer object." - type: $x_rect_offset_t name: bufferOffset desc: "[in] 3D offset in the buffer." @@ -511,19 +685,19 @@ params: desc: "[in] 3D rectangular region descriptor: width, height, depth." - type: "size_t" name: bufferRowPitch - desc: "[in] length of each row in bytes in the buffer object." + desc: "[in] Length of each row in bytes in the buffer object." - type: "size_t" name: bufferSlicePitch - desc: "[in] length of each 2D slice in bytes in the buffer object being read." + desc: "[in] Length of each 2D slice in bytes in the buffer object being read." - type: "size_t" name: hostRowPitch - desc: "[in] length of each row in bytes in the host memory region pointed to by pDst." + desc: "[in] Length of each row in bytes in the host memory region pointed to by pDst." - type: "size_t" name: hostSlicePitch - desc: "[in] length of each 2D slice in bytes in the host memory region pointed to by pDst." + desc: "[in] Length of each 2D slice in bytes in the host memory region pointed to by pDst." - type: "void*" name: pDst - desc: "[in] pointer to host memory where data is to be read into." + desc: "[in] Pointer to host memory where data is to be read into." - type: uint32_t name: numSyncPointsInWaitList desc: "[in] The number of sync points in the provided dependency list." @@ -532,7 +706,7 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: $x_exp_command_buffer_sync_point_t* name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -544,7 +718,7 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a memory fill command to a command-buffer object" +desc: "Append a memory fill command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferFillExp params: @@ -588,12 +762,12 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a USM Prefetch command to a command-buffer object" +desc: "Append a USM Prefetch command to a command-buffer object." class: $xCommandBuffer name: AppendUSMPrefetchExp details: - - "Prefetching may not be supported for all devices or allocation types. If memory prefetching - is not supported, the prefetch hint will be ignored." + - "Prefetching may not be supported for all devices or allocation types. If + memory prefetching is not supported, the prefetch hint will be ignored." params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer @@ -630,12 +804,13 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a USM Advise command to a command-buffer object" +desc: "Append a USM Advise command to a command-buffer object." class: $xCommandBuffer name: AppendUSMAdviseExp details: - - "Not all memory advice hints may be supported for all devices or allocation types. - If a memory advice hint is not supported, it will be ignored." + - "Not all memory advice hints may be supported for all devices or + allocation types. If a memory advice hint is not supported, it will be + ignored." params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer @@ -678,18 +853,18 @@ name: EnqueueExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_queue_handle_t name: hQueue - desc: "[in] the queue to submit this command-buffer for execution." + desc: "[in] The queue to submit this command-buffer for execution." - type: uint32_t name: numEventsInWaitList - desc: "[in] size of the event wait list" + desc: "[in] Size of the event wait list." - type: "const $x_event_handle_t*" name: phEventWaitList desc: | [in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the command-buffer execution. - If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + If nullptr, the numEventsInWaitList must be 0, indicating no wait events. - type: $x_event_handle_t* name: phEvent desc: | @@ -704,3 +879,124 @@ returns: - "If event objects in phEventWaitList are not valid events." - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES +--- #-------------------------------------------------------------------------- +type: function +desc: "Increment the command object's reference count." +class: $xCommandBuffer +name: RetainCommandExp +params: + - type: $x_exp_command_buffer_command_handle_t + name: hCommand + desc: "[in] Handle of the command-buffer command." +returns: + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP + - $X_RESULT_ERROR_OUT_OF_RESOURCES + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY +--- #-------------------------------------------------------------------------- +type: function +desc: "Decrement the command object's reference count and delete the command object if the reference count becomes zero." +class: $xCommandBuffer +name: ReleaseCommandExp +params: + - type: $x_exp_command_buffer_command_handle_t + name: hCommand + desc: "[in] Handle of the command-buffer command." +returns: + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP + - $X_RESULT_ERROR_OUT_OF_RESOURCES + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY +--- #-------------------------------------------------------------------------- +type: function +desc: "Update a kernel launch command in a finalized command-buffer." +class: $xCommandBuffer +name: UpdateKernelLaunchExp +params: + - type: $x_exp_command_buffer_command_handle_t + name: hCommand + desc: "[in] Handle of the command-buffer kernel command to update." + - type: "const $x_exp_command_buffer_update_kernel_launch_desc_t*" + name: pUpdateKernelLaunch + desc: "[in] Struct defining how the kernel command is to be updated." + +returns: + - $X_RESULT_ERROR_UNSUPPORTED_FEATURE: + - "If update functionality is not supported by the device." + - $X_RESULT_ERROR_INVALID_OPERATION: + - "If $x_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to." + - "If the command-buffer `hCommand` belongs to has not been finalized." + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP + - $X_RESULT_ERROR_INVALID_MEM_OBJECT + - $X_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX + - $X_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE + - $X_RESULT_ERROR_INVALID_ENUMERATION + - $X_RESULT_ERROR_INVALID_WORK_DIMENSION + - $X_RESULT_ERROR_INVALID_WORK_GROUP_SIZE + - $X_RESULT_ERROR_INVALID_VALUE + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY + - $X_RESULT_ERROR_OUT_OF_RESOURCES +--- #-------------------------------------------------------------------------- +type: function +desc: "Get command-buffer object information." +class: $xCommandBuffer +name: GetInfoExp +params: + - type: $x_exp_command_buffer_handle_t + name: hCommandBuffer + desc: "[in] handle of the command-buffer object" + - type: $x_exp_command_buffer_info_t + name: propName + desc: "[in] the name of the command-buffer property to query" + - type: size_t + name: propSize + desc: "[in] size in bytes of the command-buffer property value" + - type: void* + name: pPropValue + desc: "[out][optional][typename(propName, propSize)] value of the command-buffer property" + - type: size_t* + name: pPropSizeRet + desc: "[out][optional] bytes returned in command-buffer property" +returns: + - $X_RESULT_ERROR_UNSUPPORTED_ENUMERATION: + - "If `propName` is not supported by the adapter." + - $X_RESULT_ERROR_INVALID_SIZE: + - "`propSize == 0 && pPropValue != NULL`" + - "If `propSize` is less than the real number of bytes needed to return the info." + - $X_RESULT_ERROR_INVALID_NULL_POINTER: + - "`propSize != 0 && pPropValue == NULL`" + - "`pPropValue == NULL && pPropSizeRet == NULL`" + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP + - $X_RESULT_ERROR_OUT_OF_RESOURCES + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY +--- #-------------------------------------------------------------------------- +type: function +desc: "Get command-buffer object information." +class: $xCommandBuffer +name: CommandGetInfoExp +params: + - type: $x_exp_command_buffer_command_handle_t + name: hCommand + desc: "[in] handle of the command-buffer command object" + - type: $x_exp_command_buffer_command_info_t + name: propName + desc: "[in] the name of the command-buffer command property to query" + - type: size_t + name: propSize + desc: "[in] size in bytes of the command-buffer command property value" + - type: void* + name: pPropValue + desc: "[out][optional][typename(propName, propSize)] value of the command-buffer command property" + - type: size_t* + name: pPropSizeRet + desc: "[out][optional] bytes returned in command-buffer command property" +returns: + - $X_RESULT_ERROR_UNSUPPORTED_ENUMERATION: + - "If `propName` is not supported by the adapter." + - $X_RESULT_ERROR_INVALID_SIZE: + - "`propSize == 0 && pPropValue != NULL`" + - "If `propSize` is less than the real number of bytes needed to return the info." + - $X_RESULT_ERROR_INVALID_NULL_POINTER: + - "`propSize != 0 && pPropValue == NULL`" + - "`pPropValue == NULL && pPropSizeRet == NULL`" + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP + - $X_RESULT_ERROR_OUT_OF_RESOURCES + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY diff --git a/scripts/core/registry.yml b/scripts/core/registry.yml index 6195cd4980..363531580f 100644 --- a/scripts/core/registry.yml +++ b/scripts/core/registry.yml @@ -502,6 +502,9 @@ etors: - name: ADAPTER_GET_INFO desc: Enumerator for $xAdapterGetInfo value: '181' +- name: COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP + desc: Enumerator for $xCommandBufferUpdateKernelLaunchExp + value: '182' - name: PROGRAM_BUILD_EXP desc: Enumerator for $xProgramBuildExp value: '197' @@ -559,6 +562,18 @@ etors: - name: KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP desc: Enumerator for $xKernelSuggestMaxCooperativeGroupCountExp value: '215' +- name: COMMAND_BUFFER_RETAIN_COMMAND_EXP + desc: Enumerator for $xCommandBufferRetainCommandExp + value: '216' +- name: COMMAND_BUFFER_RELEASE_COMMAND_EXP + desc: Enumerator for $xCommandBufferReleaseCommandExp + value: '217' +- name: COMMAND_BUFFER_GET_INFO_EXP + desc: Enumerator for $xCommandBufferGetInfoExp + value: '218' +- name: COMMAND_BUFFER_COMMAND_GET_INFO_EXP + desc: Enumerator for $xCommandBufferCommandGetInfoExp + value: '219' --- type: enum desc: Defines structure types diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index a65530a1f1..3f7970df53 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -19,12 +19,38 @@ #include +namespace { +ur_result_t +commandBufferReleaseInternal(ur_exp_command_buffer_handle_t CommandBuffer) { + if (CommandBuffer->decrementInternalReferenceCount() != 0) { + return UR_RESULT_SUCCESS; + } + + delete CommandBuffer; + return UR_RESULT_SUCCESS; +} + +ur_result_t +commandHandleReleaseInternal(ur_exp_command_buffer_command_handle_t Command) { + if (Command->decrementInternalReferenceCount() != 0) { + return UR_RESULT_SUCCESS; + } + + // Decrement parent command-buffer internal ref count + commandBufferReleaseInternal(Command->CommandBuffer); + + delete Command; + return UR_RESULT_SUCCESS; +} +} // end anonymous namespace + ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_( - ur_context_handle_t hContext, ur_device_handle_t hDevice) - : Context(hContext), Device(hDevice), CudaGraph{nullptr}, - CudaGraphExec{nullptr}, RefCount{1}, NextSyncPoint{0} { - urContextRetain(hContext); - urDeviceRetain(hDevice); + ur_context_handle_t Context, ur_device_handle_t Device, bool IsUpdatable) + : Context(Context), Device(Device), + IsUpdatable(IsUpdatable), CudaGraph{nullptr}, CudaGraphExec{nullptr}, + RefCountInternal{1}, RefCountExternal{1}, NextSyncPoint{0} { + urContextRetain(Context); + urDeviceRetain(Device); } /// The ur_exp_command_buffer_handle_t_ destructor releases @@ -43,6 +69,33 @@ ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() { cuGraphExecDestroy(CudaGraphExec); } +ur_exp_command_buffer_command_handle_t_:: + ur_exp_command_buffer_command_handle_t_( + ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel, + std::shared_ptr Node, CUDA_KERNEL_NODE_PARAMS Params, + uint32_t WorkDim, const size_t *GlobalWorkOffsetPtr, + const size_t *GlobalWorkSizePtr, const size_t *LocalWorkSizePtr) + : CommandBuffer(CommandBuffer), Kernel(Kernel), Node(Node), Params(Params), + WorkDim(WorkDim), RefCountInternal(1), RefCountExternal(1) { + CommandBuffer->incrementInternalReferenceCount(); + + const size_t CopySize = sizeof(size_t) * WorkDim; + std::memcpy(GlobalWorkOffset, GlobalWorkOffsetPtr, CopySize); + std::memcpy(GlobalWorkSize, GlobalWorkSizePtr, CopySize); + // Local work size may be nullptr + if (LocalWorkSizePtr) { + std::memcpy(LocalWorkSize, LocalWorkSizePtr, CopySize); + } else { + std::memset(LocalWorkSize, 0, sizeof(size_t) * 3); + } + + if (WorkDim < 3) { + const size_t ZeroSize = sizeof(size_t) * (3 - WorkDim); + std::memset(GlobalWorkOffset + WorkDim, 0, ZeroSize); + std::memset(GlobalWorkSize + WorkDim, 0, ZeroSize); + } +} + /// Helper function for finding the Cuda Nodes associated with the /// commands in a command-buffer, each event is pointed to by a sync-point in /// the wait list. @@ -136,7 +189,7 @@ static ur_result_t enqueueCommandBufferFillHelper( // Get sync point and register the cuNode with it. *SyncPoint = - CommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + CommandBuffer->addSyncPoint(std::make_shared(GraphNode)); } else { // CUDA has no memset functions that allow setting values more than 4 @@ -174,7 +227,7 @@ static ur_result_t enqueueCommandBufferFillHelper( CommandBuffer->Device->getContext())); // Get sync point and register the cuNode with it. - *SyncPoint = CommandBuffer->AddSyncPoint( + *SyncPoint = CommandBuffer->addSyncPoint( std::make_shared(GraphNode)); } } @@ -188,10 +241,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_exp_command_buffer_desc_t *pCommandBufferDesc, ur_exp_command_buffer_handle_t *phCommandBuffer) { - (void)pCommandBufferDesc; + + const bool IsUpdatable = + pCommandBufferDesc ? pCommandBufferDesc->isUpdatable : false; try { - *phCommandBuffer = new ur_exp_command_buffer_handle_t_(hContext, hDevice); + *phCommandBuffer = + new ur_exp_command_buffer_handle_t_(hContext, hDevice, IsUpdatable); } catch (const std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; } catch (...) { @@ -209,17 +265,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer) { - hCommandBuffer->incrementReferenceCount(); + hCommandBuffer->incrementInternalReferenceCount(); + hCommandBuffer->incrementExternalReferenceCount(); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) { - if (hCommandBuffer->decrementReferenceCount() != 0) - return UR_RESULT_SUCCESS; + if (hCommandBuffer->decrementExternalReferenceCount() == 0) { + // External ref count has reached zero, internal release of created + // commands. + for (auto Command : hCommandBuffer->CommandHandles) { + commandHandleReleaseInternal(Command); + } + } - delete hCommandBuffer; - return UR_RESULT_SUCCESS; + return commandBufferReleaseInternal(hCommandBuffer); } UR_APIEXPORT ur_result_t UR_APICALL @@ -250,7 +311,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + ur_exp_command_buffer_sync_point_t *pSyncPoint, + ur_exp_command_buffer_command_handle_t *phCommand) { // Preconditions UR_ASSERT(hCommandBuffer->Context == hKernel->getContext(), UR_RESULT_ERROR_INVALID_KERNEL); @@ -277,7 +339,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( DepsList.data(), DepsList.size())); // Get sync point and register the cuNode with it. - *pSyncPoint = hCommandBuffer->AddSyncPoint( + *pSyncPoint = hCommandBuffer->addSyncPoint( std::make_shared(GraphNode)); } catch (ur_result_t Err) { Result = Err; @@ -324,8 +386,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( hKernel->clearLocalSize(); // Get sync point and register the cuNode with it. - *pSyncPoint = - hCommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + auto NodeSP = std::make_shared(GraphNode); + if (pSyncPoint) { + *pSyncPoint = hCommandBuffer->addSyncPoint(NodeSP); + } + + auto NewCommand = new ur_exp_command_buffer_command_handle_t_{ + hCommandBuffer, hKernel, NodeSP, NodeParams, + workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize}; + + NewCommand->incrementInternalReferenceCount(); + hCommandBuffer->CommandHandles.push_back(NewCommand); + + if (phCommand) { + *phCommand = NewCommand; + } + } catch (ur_result_t Err) { Result = Err; } @@ -359,7 +435,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( // Get sync point and register the cuNode with it. *pSyncPoint = - hCommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + hCommandBuffer->addSyncPoint(std::make_shared(GraphNode)); } catch (ur_result_t Err) { Result = Err; } @@ -403,7 +479,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( // Get sync point and register the cuNode with it. *pSyncPoint = - hCommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + hCommandBuffer->addSyncPoint(std::make_shared(GraphNode)); } catch (ur_result_t Err) { Result = Err; } @@ -444,7 +520,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( // Get sync point and register the cuNode with it. *pSyncPoint = - hCommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + hCommandBuffer->addSyncPoint(std::make_shared(GraphNode)); } catch (ur_result_t Err) { Result = Err; } @@ -482,7 +558,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( // Get sync point and register the cuNode with it. *pSyncPoint = - hCommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + hCommandBuffer->addSyncPoint(std::make_shared(GraphNode)); } catch (ur_result_t Err) { Result = Err; } @@ -519,7 +595,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( // Get sync point and register the cuNode with it. *pSyncPoint = - hCommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + hCommandBuffer->addSyncPoint(std::make_shared(GraphNode)); } catch (ur_result_t Err) { Result = Err; } @@ -561,7 +637,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( // Get sync point and register the cuNode with it. *pSyncPoint = - hCommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + hCommandBuffer->addSyncPoint(std::make_shared(GraphNode)); } catch (ur_result_t Err) { Result = Err; } @@ -603,7 +679,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( // Get sync point and register the cuNode with it. *pSyncPoint = - hCommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + hCommandBuffer->addSyncPoint(std::make_shared(GraphNode)); } catch (ur_result_t Err) { Result = Err; } @@ -633,7 +709,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( // Get sync point and register the cuNode with it. *pSyncPoint = - hCommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + hCommandBuffer->addSyncPoint(std::make_shared(GraphNode)); setErrorMessage("Prefetch hint ignored and replaced with empty node as " "prefetch is not supported by CUDA Graph backend", @@ -668,7 +744,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( // Get sync point and register the cuNode with it. *pSyncPoint = - hCommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + hCommandBuffer->addSyncPoint(std::make_shared(GraphNode)); setErrorMessage("Memory advice ignored and replaced with empty node as " "memory advice is not supported by CUDA Graph backend", @@ -762,3 +838,190 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( return Result; } + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainCommandExp( + ur_exp_command_buffer_command_handle_t hCommand) { + hCommand->incrementExternalReferenceCount(); + hCommand->incrementInternalReferenceCount(); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( + ur_exp_command_buffer_command_handle_t hCommand) { + hCommand->decrementExternalReferenceCount(); + return commandHandleReleaseInternal(hCommand); +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t hCommand, + const ur_exp_command_buffer_update_kernel_launch_desc_t + *pUpdateKernelLaunch) { + // Update requires command-buffer to be finalized + ur_exp_command_buffer_handle_t CommandBuffer = hCommand->CommandBuffer; + if (!CommandBuffer->CudaGraphExec) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + + // Update requires command-buffer to be created with update enabled + if (!CommandBuffer->IsUpdatable) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + + // Kernel corresponding to the command to update + ur_kernel_handle_t Kernel = hCommand->Kernel; + + // Update pointer arguments to the kernel + uint32_t NumPointerArgs = pUpdateKernelLaunch->numNewPointerArgs; + const ur_exp_command_buffer_update_pointer_arg_desc_t *ArgPointerList = + pUpdateKernelLaunch->pNewPointerArgList; + for (uint32_t i = 0; i < NumPointerArgs; i++) { + const auto &PointerArgDesc = ArgPointerList[i]; + uint32_t ArgIndex = PointerArgDesc.argIndex; + const void *ArgValue = PointerArgDesc.pNewPointerArg; + + ur_result_t Result = UR_RESULT_SUCCESS; + try { + Kernel->setKernelArg(ArgIndex, sizeof(ArgValue), ArgValue); + } catch (ur_result_t Err) { + Result = Err; + return Result; + } + } + + // Update memobj arguments to the kernel + uint32_t NumMemobjArgs = pUpdateKernelLaunch->numNewMemObjArgs; + const ur_exp_command_buffer_update_memobj_arg_desc_t *ArgMemobjList = + pUpdateKernelLaunch->pNewMemObjArgList; + for (uint32_t i = 0; i < NumMemobjArgs; i++) { + const auto &MemobjArgDesc = ArgMemobjList[i]; + uint32_t ArgIndex = MemobjArgDesc.argIndex; + ur_mem_handle_t ArgValue = MemobjArgDesc.hNewMemObjArg; + + ur_result_t Result = UR_RESULT_SUCCESS; + try { + if (ArgValue == nullptr) { + Kernel->setKernelArg(ArgIndex, 0, nullptr); + } else { + CUdeviceptr CuPtr = std::get(ArgValue->Mem).get(); + Kernel->setKernelArg(ArgIndex, sizeof(CUdeviceptr), (void *)&CuPtr); + } + } catch (ur_result_t Err) { + Result = Err; + return Result; + } + } + + // Update value arguments to the kernel + uint32_t NumValueArgs = pUpdateKernelLaunch->numNewValueArgs; + const ur_exp_command_buffer_update_value_arg_desc_t *ArgValueList = + pUpdateKernelLaunch->pNewValueArgList; + for (uint32_t i = 0; i < NumValueArgs; i++) { + const auto &ValueArgDesc = ArgValueList[i]; + uint32_t ArgIndex = ValueArgDesc.argIndex; + size_t ArgSize = ValueArgDesc.argSize; + const void *ArgValue = ValueArgDesc.pNewValueArg; + + ur_result_t Result = UR_RESULT_SUCCESS; + + try { + Kernel->setKernelArg(ArgIndex, ArgSize, ArgValue); + } catch (ur_result_t Err) { + Result = Err; + return Result; + } + } + + // Set the updated ND range + const uint32_t NewWorkDim = pUpdateKernelLaunch->newWorkDim; + if (NewWorkDim != 0) { + UR_ASSERT(NewWorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + UR_ASSERT(NewWorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + hCommand->WorkDim = NewWorkDim; + } + + if (pUpdateKernelLaunch->pNewGlobalWorkOffset) { + hCommand->setGlobalOffset(pUpdateKernelLaunch->pNewGlobalWorkOffset); + } + + if (pUpdateKernelLaunch->pNewGlobalWorkSize) { + hCommand->setGlobalSize(pUpdateKernelLaunch->pNewGlobalWorkSize); + } + + if (pUpdateKernelLaunch->pNewLocalWorkSize) { + hCommand->setLocalSize(pUpdateKernelLaunch->pNewLocalWorkSize); + } + + size_t *GlobalWorkOffset = hCommand->GlobalWorkOffset; + size_t *GlobalWorkSize = hCommand->GlobalWorkSize; + + const bool ProvidedLocalSize = hCommand->LocalWorkSize[0] != 0 || + hCommand->LocalWorkSize[1] != 0 || + hCommand->LocalWorkSize[2] != 0; + // If no worksize is provided make sure we pass nullptr to setKernelParams so + // it can guess the local work size. + size_t *LocalWorkSize = ProvidedLocalSize ? hCommand->LocalWorkSize : nullptr; + uint32_t WorkDim = hCommand->WorkDim; + + // Set the number of threads per block to the number of threads per warp + // by default unless user has provided a better number + size_t ThreadsPerBlock[3] = {32u, 1u, 1u}; + size_t BlocksPerGrid[3] = {1u, 1u, 1u}; + CUfunction CuFunc = Kernel->get(); + ur_context_handle_t Context = CommandBuffer->Context; + ur_device_handle_t Device = CommandBuffer->Device; + auto Result = setKernelParams(Context, Device, WorkDim, GlobalWorkOffset, + GlobalWorkSize, LocalWorkSize, Kernel, CuFunc, + ThreadsPerBlock, BlocksPerGrid); + if (Result != UR_RESULT_SUCCESS) { + return Result; + } + + CUDA_KERNEL_NODE_PARAMS &Params = hCommand->Params; + + Params.func = CuFunc; + Params.gridDimX = BlocksPerGrid[0]; + Params.gridDimY = BlocksPerGrid[1]; + Params.gridDimZ = BlocksPerGrid[2]; + Params.blockDimX = ThreadsPerBlock[0]; + Params.blockDimY = ThreadsPerBlock[1]; + Params.blockDimZ = ThreadsPerBlock[2]; + Params.sharedMemBytes = Kernel->getLocalSize(); + Params.kernelParams = const_cast(Kernel->getArgIndices().data()); + + CUgraphNode Node = *(hCommand->Node); + CUgraphExec CudaGraphExec = CommandBuffer->CudaGraphExec; + UR_CHECK_ERROR(cuGraphExecKernelNodeSetParams(CudaGraphExec, Node, &Params)); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( + ur_exp_command_buffer_handle_t hCommandBuffer, + ur_exp_command_buffer_info_t propName, size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + + switch (propName) { + case UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT: + return ReturnValue(hCommandBuffer->getExternalReferenceCount()); + default: + assert(!"Command-buffer info request not implemented"); + } + + return UR_RESULT_ERROR_INVALID_ENUMERATION; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( + ur_exp_command_buffer_command_handle_t hCommand, + ur_exp_command_buffer_command_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) { + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + + switch (propName) { + case UR_EXP_COMMAND_BUFFER_COMMAND_INFO_REFERENCE_COUNT: + return ReturnValue(hCommand->getExternalReferenceCount()); + default: + assert(!"Command-buffer command info request not implemented"); + } + + return UR_RESULT_ERROR_INVALID_ENUMERATION; +} diff --git a/source/adapters/cuda/command_buffer.hpp b/source/adapters/cuda/command_buffer.hpp index 18264410c4..e2b09059bf 100644 --- a/source/adapters/cuda/command_buffer.hpp +++ b/source/adapters/cuda/command_buffer.hpp @@ -175,20 +175,91 @@ static inline const char *getUrResultString(ur_result_t Result) { fprintf(stderr, "UR <--- %s(%s)\n", #Call, getUrResultString(Result)); \ } +// Handle to a kernel command. +// +// Struct that stores all the information related to a kernel command in a +// command-buffer, such that the command can be recreated. When handles can +// be returned from other command types this struct will need refactored. +struct ur_exp_command_buffer_command_handle_t_ { + ur_exp_command_buffer_command_handle_t_( + ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel, + std::shared_ptr Node, CUDA_KERNEL_NODE_PARAMS Params, + uint32_t WorkDim, const size_t *GlobalWorkOffsetPtr, + const size_t *GlobalWorkSizePtr, const size_t *LocalWorkSizePtr); + + void setGlobalOffset(const size_t *GlobalWorkOffsetPtr) { + const size_t CopySize = sizeof(size_t) * WorkDim; + std::memcpy(GlobalWorkOffset, GlobalWorkOffsetPtr, CopySize); + if (WorkDim < 3) { + const size_t ZeroSize = sizeof(size_t) * (3 - WorkDim); + std::memset(GlobalWorkOffset + WorkDim, 0, ZeroSize); + } + } + + void setGlobalSize(const size_t *GlobalWorkSizePtr) { + const size_t CopySize = sizeof(size_t) * WorkDim; + std::memcpy(GlobalWorkSize, GlobalWorkSizePtr, CopySize); + if (WorkDim < 3) { + const size_t ZeroSize = sizeof(size_t) * (3 - WorkDim); + std::memset(GlobalWorkSize + WorkDim, 0, ZeroSize); + } + } + + void setLocalSize(const size_t *LocalWorkSizePtr) { + const size_t CopySize = sizeof(size_t) * WorkDim; + std::memcpy(LocalWorkSize, LocalWorkSizePtr, CopySize); + if (WorkDim < 3) { + const size_t ZeroSize = sizeof(size_t) * (3 - WorkDim); + std::memset(LocalWorkSize + WorkDim, 0, ZeroSize); + } + } + + uint32_t incrementInternalReferenceCount() noexcept { + return ++RefCountInternal; + } + uint32_t decrementInternalReferenceCount() noexcept { + return --RefCountInternal; + } + + uint32_t incrementExternalReferenceCount() noexcept { + return ++RefCountExternal; + } + uint32_t decrementExternalReferenceCount() noexcept { + return --RefCountExternal; + } + uint32_t getExternalReferenceCount() const noexcept { + return RefCountExternal; + } + + ur_exp_command_buffer_handle_t CommandBuffer; + ur_kernel_handle_t Kernel; + std::shared_ptr Node; + CUDA_KERNEL_NODE_PARAMS Params; + + uint32_t WorkDim; + size_t GlobalWorkOffset[3]; + size_t GlobalWorkSize[3]; + size_t LocalWorkSize[3]; + +private: + std::atomic_uint32_t RefCountInternal; + std::atomic_uint32_t RefCountExternal; +}; + struct ur_exp_command_buffer_handle_t_ { - ur_exp_command_buffer_handle_t_(ur_context_handle_t hContext, - ur_device_handle_t hDevice); + ur_exp_command_buffer_handle_t_(ur_context_handle_t Context, + ur_device_handle_t Device, bool IsUpdatable); ~ur_exp_command_buffer_handle_t_(); - void RegisterSyncPoint(ur_exp_command_buffer_sync_point_t SyncPoint, + void registerSyncPoint(ur_exp_command_buffer_sync_point_t SyncPoint, std::shared_ptr CuNode) { SyncPoints[SyncPoint] = std::move(CuNode); NextSyncPoint++; } - ur_exp_command_buffer_sync_point_t GetNextSyncPoint() const { + ur_exp_command_buffer_sync_point_t getNextSyncPoint() const { return NextSyncPoint; } @@ -196,23 +267,46 @@ struct ur_exp_command_buffer_handle_t_ { // @param CuNode Node to register as next sync point // @return Pointer to the sync that registers the Node ur_exp_command_buffer_sync_point_t - AddSyncPoint(std::shared_ptr CuNode) { + addSyncPoint(std::shared_ptr CuNode) { ur_exp_command_buffer_sync_point_t SyncPoint = NextSyncPoint; - RegisterSyncPoint(SyncPoint, std::move(CuNode)); + registerSyncPoint(SyncPoint, std::move(CuNode)); return SyncPoint; } + uint32_t incrementInternalReferenceCount() noexcept { + return ++RefCountInternal; + } + uint32_t decrementInternalReferenceCount() noexcept { + return --RefCountInternal; + } + uint32_t getInternalReferenceCount() const noexcept { + return RefCountInternal; + } + + uint32_t incrementExternalReferenceCount() noexcept { + return ++RefCountExternal; + } + uint32_t decrementExternalReferenceCount() noexcept { + return --RefCountExternal; + } + uint32_t getExternalReferenceCount() const noexcept { + return RefCountExternal; + } + // UR context associated with this command-buffer ur_context_handle_t Context; // Device associated with this command buffer ur_device_handle_t Device; + // Whether commands in the command-buffer can be updated + bool IsUpdatable; // Cuda Graph handle CUgraph CudaGraph; // Cuda Graph Exec handle CUgraphExec CudaGraphExec; // Atomic variable counting the number of reference to this command_buffer // using std::atomic prevents data race when incrementing/decrementing. - std::atomic_uint32_t RefCount; + std::atomic_uint32_t RefCountInternal; + std::atomic_uint32_t RefCountExternal; // Map of sync_points to ur_events std::unordered_map CommandHandles; }; diff --git a/source/adapters/cuda/device.cpp b/source/adapters/cuda/device.cpp index 49feced282..b33ad6c792 100644 --- a/source/adapters/cuda/device.cpp +++ b/source/adapters/cuda/device.cpp @@ -1035,6 +1035,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: + return ReturnValue(true); + default: break; } diff --git a/source/adapters/cuda/ur_interface_loader.cpp b/source/adapters/cuda/ur_interface_loader.cpp index f31ffe6d87..670d6c02e9 100644 --- a/source/adapters/cuda/ur_interface_loader.cpp +++ b/source/adapters/cuda/ur_interface_loader.cpp @@ -294,6 +294,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnAppendMemBufferFillExp = urCommandBufferAppendMemBufferFillExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; + pDdiTable->pfnUpdateKernelLaunchExp = urCommandBufferUpdateKernelLaunchExp; + pDdiTable->pfnGetInfoExp = urCommandBufferGetInfoExp; + pDdiTable->pfnCommandGetInfoExp = urCommandBufferCommandGetInfoExp; + pDdiTable->pfnReleaseCommandExp = urCommandBufferReleaseCommandExp; + pDdiTable->pfnRetainCommandExp = urCommandBufferRetainCommandExp; return retVal; } diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index 54a6fa2f4e..0d239bc432 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -46,7 +46,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t, ur_kernel_handle_t, uint32_t, const size_t *, const size_t *, const size_t *, uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *) { + ur_exp_command_buffer_sync_point_t *, + ur_exp_command_buffer_command_handle_t *) { detail::ur::die("Experimental Command-buffer feature is not " "implemented for HIP adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; @@ -162,3 +163,31 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( "implemented for HIP adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferRetainCommandExp(ur_exp_command_buffer_command_handle_t) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferReleaseCommandExp(ur_exp_command_buffer_command_handle_t) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t, + const ur_exp_command_buffer_update_kernel_launch_desc_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( + ur_exp_command_buffer_handle_t, ur_exp_command_buffer_info_t, size_t, + void *, size_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( + ur_exp_command_buffer_command_handle_t, + ur_exp_command_buffer_command_info_t, size_t, void *, size_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/hip/device.cpp b/source/adapters/hip/device.cpp index 08d4e87ae4..bc67fcee71 100644 --- a/source/adapters/hip/device.cpp +++ b/source/adapters/hip/device.cpp @@ -841,6 +841,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_ASYNC_BARRIER: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: + return ReturnValue(false); + default: break; } diff --git a/source/adapters/hip/ur_interface_loader.cpp b/source/adapters/hip/ur_interface_loader.cpp index 7707e78425..cc7a5e1e9f 100644 --- a/source/adapters/hip/ur_interface_loader.cpp +++ b/source/adapters/hip/ur_interface_loader.cpp @@ -291,6 +291,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; pDdiTable->pfnAppendMemBufferFillExp = urCommandBufferAppendMemBufferFillExp; + pDdiTable->pfnUpdateKernelLaunchExp = urCommandBufferUpdateKernelLaunchExp; + pDdiTable->pfnGetInfoExp = urCommandBufferGetInfoExp; + pDdiTable->pfnCommandGetInfoExp = urCommandBufferCommandGetInfoExp; + pDdiTable->pfnReleaseCommandExp = urCommandBufferReleaseCommandExp; + pDdiTable->pfnRetainCommandExp = urCommandBufferRetainCommandExp; return retVal; } diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 56ffbe0145..7dc2a42fd6 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -511,7 +511,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const size_t *GlobalWorkSize, const size_t *LocalWorkSize, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, - ur_exp_command_buffer_sync_point_t *SyncPoint) { + ur_exp_command_buffer_sync_point_t *SyncPoint, + ur_exp_command_buffer_command_handle_t *) { // Lock automatically releases when this goes out of scope. std::scoped_lock Lock( Kernel->Mutex, Kernel->Program->Mutex); @@ -980,3 +981,41 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( return UR_RESULT_SUCCESS; } + +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferRetainCommandExp(ur_exp_command_buffer_command_handle_t) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferReleaseCommandExp(ur_exp_command_buffer_command_handle_t) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t, + const ur_exp_command_buffer_update_kernel_launch_desc_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( + ur_exp_command_buffer_handle_t hCommandBuffer, + ur_exp_command_buffer_info_t propName, size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + + switch (propName) { + case UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT: + return ReturnValue(uint32_t{hCommandBuffer->RefCount.load()}); + default: + assert(!"Command-buffer info request not implemented"); + } + + return UR_RESULT_ERROR_INVALID_ENUMERATION; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( + ur_exp_command_buffer_command_handle_t, + ur_exp_command_buffer_command_info_t, size_t, void *, size_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index af80f1905f..918b04400a 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -915,7 +915,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( return ze2urResult(errc); return ReturnValue(UrRootDev); } - + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: + return ReturnValue(true); + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: + return ReturnValue(false); default: urPrint("Unsupported ParamName in urGetDeviceInfo\n"); urPrint("ParamName=%d(0x%x)\n", ParamName, ParamName); diff --git a/source/adapters/level_zero/ur_interface_loader.cpp b/source/adapters/level_zero/ur_interface_loader.cpp index 74d0706b31..6fdf197904 100644 --- a/source/adapters/level_zero/ur_interface_loader.cpp +++ b/source/adapters/level_zero/ur_interface_loader.cpp @@ -341,6 +341,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnAppendMemBufferFillExp = urCommandBufferAppendMemBufferFillExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; + pDdiTable->pfnUpdateKernelLaunchExp = urCommandBufferUpdateKernelLaunchExp; + pDdiTable->pfnGetInfoExp = urCommandBufferGetInfoExp; + pDdiTable->pfnCommandGetInfoExp = urCommandBufferCommandGetInfoExp; + pDdiTable->pfnReleaseCommandExp = urCommandBufferReleaseCommandExp; + pDdiTable->pfnRetainCommandExp = urCommandBufferRetainCommandExp; return retVal; } diff --git a/source/adapters/native_cpu/command_buffer.cpp b/source/adapters/native_cpu/command_buffer.cpp index 50b38c9d52..fde6c03b86 100644 --- a/source/adapters/native_cpu/command_buffer.cpp +++ b/source/adapters/native_cpu/command_buffer.cpp @@ -50,7 +50,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t, ur_kernel_handle_t, uint32_t, const size_t *, const size_t *, const size_t *, uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *) { + ur_exp_command_buffer_sync_point_t *, + ur_exp_command_buffer_command_handle_t *) { detail::ur::die("Experimental Command-buffer feature is not " "implemented for the NativeCPU adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; @@ -162,3 +163,31 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( ur_exp_command_buffer_sync_point_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferRetainCommandExp(ur_exp_command_buffer_command_handle_t) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferReleaseCommandExp(ur_exp_command_buffer_command_handle_t) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t, + const ur_exp_command_buffer_update_kernel_launch_desc_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( + ur_exp_command_buffer_handle_t, ur_exp_command_buffer_info_t, size_t, + void *, size_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( + ur_exp_command_buffer_command_handle_t, + ur_exp_command_buffer_command_info_t, size_t, void *, size_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/native_cpu/device.cpp b/source/adapters/native_cpu/device.cpp index 68dafdfc1c..dfabfb81e5 100644 --- a/source/adapters/native_cpu/device.cpp +++ b/source/adapters/native_cpu/device.cpp @@ -308,6 +308,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, CASE_UR_UNSUPPORTED(UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH); case UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT: return ReturnValue(false); + + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: + return ReturnValue(false); + default: DIE_NO_IMPLEMENTATION; } diff --git a/source/adapters/native_cpu/ur_interface_loader.cpp b/source/adapters/native_cpu/ur_interface_loader.cpp index 9408101927..0c48ee1fb3 100644 --- a/source/adapters/native_cpu/ur_interface_loader.cpp +++ b/source/adapters/native_cpu/ur_interface_loader.cpp @@ -283,6 +283,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendMemBufferWriteRectExp = urCommandBufferAppendMemBufferWriteRectExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; + pDdiTable->pfnUpdateKernelLaunchExp = urCommandBufferUpdateKernelLaunchExp; + pDdiTable->pfnGetInfoExp = urCommandBufferGetInfoExp; + pDdiTable->pfnCommandGetInfoExp = urCommandBufferCommandGetInfoExp; + pDdiTable->pfnReleaseCommandExp = urCommandBufferReleaseCommandExp; + pDdiTable->pfnRetainCommandExp = urCommandBufferRetainCommandExp; return retVal; } diff --git a/source/adapters/null/ur_nullddi.cpp b/source/adapters/null/ur_nullddi.cpp index 439246658a..3ca48743ac 100644 --- a/source/adapters/null/ur_nullddi.cpp +++ b/source/adapters/null/ur_nullddi.cpp @@ -4674,12 +4674,12 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferCreateExp __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. const ur_exp_command_buffer_desc_t - *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor + *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. ur_exp_command_buffer_handle_t - *phCommandBuffer ///< [out] pointer to Command-Buffer handle + *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4703,7 +4703,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( /// @brief Intercept function for urCommandBufferRetainExp __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4724,7 +4724,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( /// @brief Intercept function for urCommandBufferReleaseExp __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4745,7 +4745,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( /// @brief Intercept function for urCommandBufferFinalizeExp __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4766,9 +4766,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// @brief Intercept function for urCommandBufferAppendKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * @@ -4779,8 +4779,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4791,9 +4793,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( result = pfnAppendKernelLaunchExp( hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint); + pSyncPointWaitList, pSyncPoint, phCommand); } else { // generic implementation + if (nullptr != phCommand) { + *phCommand = + reinterpret_cast( + d_context.get()); + } } return result; @@ -4805,16 +4812,16 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// @brief Intercept function for urCommandBufferAppendUSMMemcpyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4873,7 +4880,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -4883,8 +4890,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4908,18 +4915,18 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. const void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4943,17 +4950,17 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4977,7 +4984,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t @@ -4994,8 +5001,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -5020,31 +5027,31 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -5069,29 +5076,29 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -5218,15 +5225,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( /// @brief Intercept function for urCommandBufferEnqueueExp __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_queue_handle_t - hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. @@ -5250,6 +5256,137 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferRetainCommandExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferRetainCommandExp( + ur_exp_command_buffer_command_handle_t + hCommand ///< [in] Handle of the command-buffer command. + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnRetainCommandExp = + d_context.urDdiTable.CommandBufferExp.pfnRetainCommandExp; + if (nullptr != pfnRetainCommandExp) { + result = pfnRetainCommandExp(hCommand); + } else { + // generic implementation + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferReleaseCommandExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( + ur_exp_command_buffer_command_handle_t + hCommand ///< [in] Handle of the command-buffer command. + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnReleaseCommandExp = + d_context.urDdiTable.CommandBufferExp.pfnReleaseCommandExp; + if (nullptr != pfnReleaseCommandExp) { + result = pfnReleaseCommandExp(hCommand); + } else { + // generic implementation + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferUpdateKernelLaunchExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t * + pUpdateKernelLaunch ///< [in] Struct defining how the kernel command is to be updated. + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnUpdateKernelLaunchExp = + d_context.urDdiTable.CommandBufferExp.pfnUpdateKernelLaunchExp; + if (nullptr != pfnUpdateKernelLaunchExp) { + result = pfnUpdateKernelLaunchExp(hCommand, pUpdateKernelLaunch); + } else { + // generic implementation + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferGetInfoExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferGetInfoExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object + ur_exp_command_buffer_info_t + propName, ///< [in] the name of the command-buffer property to query + size_t + propSize, ///< [in] size in bytes of the command-buffer property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the + ///< command-buffer property + size_t * + pPropSizeRet ///< [out][optional] bytes returned in command-buffer property + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnGetInfoExp = d_context.urDdiTable.CommandBufferExp.pfnGetInfoExp; + if (nullptr != pfnGetInfoExp) { + result = pfnGetInfoExp(hCommandBuffer, propName, propSize, pPropValue, + pPropSizeRet); + } else { + // generic implementation + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferCommandGetInfoExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] handle of the command-buffer command object + ur_exp_command_buffer_command_info_t + propName, ///< [in] the name of the command-buffer command property to query + size_t + propSize, ///< [in] size in bytes of the command-buffer command property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the + ///< command-buffer command property + size_t * + pPropSizeRet ///< [out][optional] bytes returned in command-buffer command property + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnCommandGetInfoExp = + d_context.urDdiTable.CommandBufferExp.pfnCommandGetInfoExp; + if (nullptr != pfnCommandGetInfoExp) { + result = pfnCommandGetInfoExp(hCommand, propName, propSize, pPropValue, + pPropSizeRet); + } else { + // generic implementation + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( @@ -5714,6 +5851,17 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnEnqueueExp = driver::urCommandBufferEnqueueExp; + pDdiTable->pfnRetainCommandExp = driver::urCommandBufferRetainCommandExp; + + pDdiTable->pfnReleaseCommandExp = driver::urCommandBufferReleaseCommandExp; + + pDdiTable->pfnUpdateKernelLaunchExp = + driver::urCommandBufferUpdateKernelLaunchExp; + + pDdiTable->pfnGetInfoExp = driver::urCommandBufferGetInfoExp; + + pDdiTable->pfnCommandGetInfoExp = driver::urCommandBufferCommandGetInfoExp; + return result; } catch (...) { return exceptionToResult(std::current_exception()); diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 74cdd8a03d..88c661b4ae 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -104,7 +104,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + ur_exp_command_buffer_sync_point_t *pSyncPoint, + ur_exp_command_buffer_command_handle_t *) { cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); cl_ext::clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR = nullptr; @@ -356,3 +357,67 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( return UR_RESULT_SUCCESS; } + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainCommandExp( + [[maybe_unused]] ur_exp_command_buffer_command_handle_t hCommand) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( + [[maybe_unused]] ur_exp_command_buffer_command_handle_t hCommand) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + [[maybe_unused]] ur_exp_command_buffer_command_handle_t hCommand, + [[maybe_unused]] const ur_exp_command_buffer_update_kernel_launch_desc_t + *pUpdateKernelLaunch) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( + ur_exp_command_buffer_handle_t hCommandBuffer, + ur_exp_command_buffer_info_t propName, size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { + + cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_ext::clGetCommandBufferInfoKHR_fn clGetCommandBufferInfoKHR = nullptr; + cl_int Res = + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clGetCommandBufferInfoKHRCache, + cl_ext::GetCommandBufferInfoName, &clGetCommandBufferInfoKHR); + + if (!clGetCommandBufferInfoKHR || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + if (propName != UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (pPropSizeRet) { + *pPropSizeRet = sizeof(cl_uint); + } + + cl_uint ref_count; + CL_RETURN_ON_FAILURE(clGetCommandBufferInfoKHR( + hCommandBuffer->CLCommandBuffer, CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR, + sizeof(ref_count), &ref_count, nullptr)); + + if (pPropValue) { + if (propSize != sizeof(cl_uint)) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + static_assert(sizeof(cl_uint) == sizeof(uint32_t)); + *static_cast(pPropValue) = static_cast(ref_count); + } + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( + [[maybe_unused]] ur_exp_command_buffer_command_handle_t hCommand, + [[maybe_unused]] ur_exp_command_buffer_command_info_t propName, + [[maybe_unused]] size_t propSize, [[maybe_unused]] void *pPropValue, + [[maybe_unused]] size_t *pPropSizeRet) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/opencl/common.hpp b/source/adapters/opencl/common.hpp index 0cb19694a6..0667cd3d17 100644 --- a/source/adapters/opencl/common.hpp +++ b/source/adapters/opencl/common.hpp @@ -214,6 +214,7 @@ CONSTFIX char CommandCopyBufferName[] = "clCommandCopyBufferKHR"; CONSTFIX char CommandCopyBufferRectName[] = "clCommandCopyBufferRectKHR"; CONSTFIX char CommandFillBufferName[] = "clCommandFillBufferKHR"; CONSTFIX char EnqueueCommandBufferName[] = "clEnqueueCommandBufferKHR"; +CONSTFIX char GetCommandBufferInfoName[] = "clGetCommandBufferInfoKHR"; #undef CONSTFIX @@ -300,6 +301,10 @@ cl_int(CL_API_CALL *)(cl_uint num_queues, cl_command_queue *queues, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event); +using clGetCommandBufferInfoKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)( + cl_command_buffer_khr command_buffer, cl_command_buffer_info_khr param_name, + size_t param_value_size, void *param_value, size_t *param_value_size_ret); + template struct FuncPtrCache { std::map Map; std::mutex Mutex; @@ -338,6 +343,7 @@ struct ExtFuncPtrCacheT { FuncPtrCache clCommandCopyBufferRectKHRCache; FuncPtrCache clCommandFillBufferKHRCache; FuncPtrCache clEnqueueCommandBufferKHRCache; + FuncPtrCache clGetCommandBufferInfoKHRCache; }; // A raw pointer is used here since the lifetime of this map has to be tied to // piTeardown to avoid issues with static destruction order (a user application diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index 5b0d5332db..115b9b2e09 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -951,6 +951,24 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_ASYNC_BARRIER: { return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } + + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: { + cl_device_id Dev = cl_adapter::cast(hDevice); + size_t ExtSize = 0; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize)); + + std::string ExtStr(ExtSize, '\0'); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, ExtSize, + ExtStr.data(), nullptr)); + + std::string SupportedExtensions(ExtStr.c_str()); + return ReturnValue(ExtStr.find("cl_khr_command_buffer") != + std::string::npos); + } + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: { + return ReturnValue(false); + } default: { return UR_RESULT_ERROR_INVALID_ENUMERATION; } diff --git a/source/adapters/opencl/ur_interface_loader.cpp b/source/adapters/opencl/ur_interface_loader.cpp index ac2c33475b..eb64df5c6f 100644 --- a/source/adapters/opencl/ur_interface_loader.cpp +++ b/source/adapters/opencl/ur_interface_loader.cpp @@ -301,6 +301,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnAppendMemBufferFillExp = urCommandBufferAppendMemBufferFillExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; + pDdiTable->pfnUpdateKernelLaunchExp = urCommandBufferUpdateKernelLaunchExp; + pDdiTable->pfnGetInfoExp = urCommandBufferGetInfoExp; + pDdiTable->pfnCommandGetInfoExp = urCommandBufferCommandGetInfoExp; + pDdiTable->pfnReleaseCommandExp = urCommandBufferReleaseCommandExp; + pDdiTable->pfnRetainCommandExp = urCommandBufferRetainCommandExp; return retVal; } diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index 5867d295ae..ca0c67b217 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -5050,12 +5050,12 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferCreateExp __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. const ur_exp_command_buffer_desc_t - *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor + *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. ur_exp_command_buffer_handle_t - *phCommandBuffer ///< [out] pointer to Command-Buffer handle + *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ) { auto pfnCreateExp = context.urDdiTable.CommandBufferExp.pfnCreateExp; @@ -5082,7 +5082,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( /// @brief Intercept function for urCommandBufferRetainExp __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { auto pfnRetainExp = context.urDdiTable.CommandBufferExp.pfnRetainExp; @@ -5107,7 +5107,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( /// @brief Intercept function for urCommandBufferReleaseExp __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { auto pfnReleaseExp = context.urDdiTable.CommandBufferExp.pfnReleaseExp; @@ -5132,7 +5132,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( /// @brief Intercept function for urCommandBufferFinalizeExp __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { auto pfnFinalizeExp = context.urDdiTable.CommandBufferExp.pfnFinalizeExp; @@ -5158,9 +5158,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// @brief Intercept function for urCommandBufferAppendKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * @@ -5171,8 +5171,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { auto pfnAppendKernelLaunchExp = context.urDdiTable.CommandBufferExp.pfnAppendKernelLaunchExp; @@ -5190,15 +5192,16 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( &pLocalWorkSize, &numSyncPointsInWaitList, &pSyncPointWaitList, - &pSyncPoint}; + &pSyncPoint, + &phCommand}; uint64_t instance = context.notify_begin( UR_FUNCTION_COMMAND_BUFFER_APPEND_KERNEL_LAUNCH_EXP, "urCommandBufferAppendKernelLaunchExp", ¶ms); ur_result_t result = pfnAppendKernelLaunchExp( hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, - pSyncPoint); + pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, + phCommand); context.notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_KERNEL_LAUNCH_EXP, "urCommandBufferAppendKernelLaunchExp", ¶ms, &result, @@ -5211,16 +5214,16 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// @brief Intercept function for urCommandBufferAppendUSMMemcpyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendUSMMemcpyExp = context.urDdiTable.CommandBufferExp.pfnAppendUSMMemcpyExp; @@ -5294,7 +5297,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -5304,8 +5307,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferCopyExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyExp; @@ -5343,18 +5346,18 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. const void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferWriteExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteExp; @@ -5391,17 +5394,17 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferReadExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferReadExp; @@ -5438,7 +5441,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t @@ -5455,8 +5458,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferCopyRectExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyRectExp; @@ -5500,31 +5503,31 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferWriteRectExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteRectExp; @@ -5568,29 +5571,29 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferReadRectExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferReadRectExp; @@ -5774,15 +5777,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( /// @brief Intercept function for urCommandBufferEnqueueExp __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_queue_handle_t - hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. @@ -5809,6 +5811,165 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferRetainCommandExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferRetainCommandExp( + ur_exp_command_buffer_command_handle_t + hCommand ///< [in] Handle of the command-buffer command. +) { + auto pfnRetainCommandExp = + context.urDdiTable.CommandBufferExp.pfnRetainCommandExp; + + if (nullptr == pfnRetainCommandExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_command_buffer_retain_command_exp_params_t params = {&hCommand}; + uint64_t instance = + context.notify_begin(UR_FUNCTION_COMMAND_BUFFER_RETAIN_COMMAND_EXP, + "urCommandBufferRetainCommandExp", ¶ms); + + ur_result_t result = pfnRetainCommandExp(hCommand); + + context.notify_end(UR_FUNCTION_COMMAND_BUFFER_RETAIN_COMMAND_EXP, + "urCommandBufferRetainCommandExp", ¶ms, &result, + instance); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferReleaseCommandExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( + ur_exp_command_buffer_command_handle_t + hCommand ///< [in] Handle of the command-buffer command. +) { + auto pfnReleaseCommandExp = + context.urDdiTable.CommandBufferExp.pfnReleaseCommandExp; + + if (nullptr == pfnReleaseCommandExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_command_buffer_release_command_exp_params_t params = {&hCommand}; + uint64_t instance = + context.notify_begin(UR_FUNCTION_COMMAND_BUFFER_RELEASE_COMMAND_EXP, + "urCommandBufferReleaseCommandExp", ¶ms); + + ur_result_t result = pfnReleaseCommandExp(hCommand); + + context.notify_end(UR_FUNCTION_COMMAND_BUFFER_RELEASE_COMMAND_EXP, + "urCommandBufferReleaseCommandExp", ¶ms, &result, + instance); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferUpdateKernelLaunchExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t * + pUpdateKernelLaunch ///< [in] Struct defining how the kernel command is to be updated. +) { + auto pfnUpdateKernelLaunchExp = + context.urDdiTable.CommandBufferExp.pfnUpdateKernelLaunchExp; + + if (nullptr == pfnUpdateKernelLaunchExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_command_buffer_update_kernel_launch_exp_params_t params = { + &hCommand, &pUpdateKernelLaunch}; + uint64_t instance = context.notify_begin( + UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP, + "urCommandBufferUpdateKernelLaunchExp", ¶ms); + + ur_result_t result = + pfnUpdateKernelLaunchExp(hCommand, pUpdateKernelLaunch); + + context.notify_end(UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP, + "urCommandBufferUpdateKernelLaunchExp", ¶ms, &result, + instance); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferGetInfoExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferGetInfoExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object + ur_exp_command_buffer_info_t + propName, ///< [in] the name of the command-buffer property to query + size_t + propSize, ///< [in] size in bytes of the command-buffer property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the + ///< command-buffer property + size_t * + pPropSizeRet ///< [out][optional] bytes returned in command-buffer property +) { + auto pfnGetInfoExp = context.urDdiTable.CommandBufferExp.pfnGetInfoExp; + + if (nullptr == pfnGetInfoExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_command_buffer_get_info_exp_params_t params = { + &hCommandBuffer, &propName, &propSize, &pPropValue, &pPropSizeRet}; + uint64_t instance = + context.notify_begin(UR_FUNCTION_COMMAND_BUFFER_GET_INFO_EXP, + "urCommandBufferGetInfoExp", ¶ms); + + ur_result_t result = pfnGetInfoExp(hCommandBuffer, propName, propSize, + pPropValue, pPropSizeRet); + + context.notify_end(UR_FUNCTION_COMMAND_BUFFER_GET_INFO_EXP, + "urCommandBufferGetInfoExp", ¶ms, &result, instance); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferCommandGetInfoExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] handle of the command-buffer command object + ur_exp_command_buffer_command_info_t + propName, ///< [in] the name of the command-buffer command property to query + size_t + propSize, ///< [in] size in bytes of the command-buffer command property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the + ///< command-buffer command property + size_t * + pPropSizeRet ///< [out][optional] bytes returned in command-buffer command property +) { + auto pfnCommandGetInfoExp = + context.urDdiTable.CommandBufferExp.pfnCommandGetInfoExp; + + if (nullptr == pfnCommandGetInfoExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_command_buffer_command_get_info_exp_params_t params = { + &hCommand, &propName, &propSize, &pPropValue, &pPropSizeRet}; + uint64_t instance = + context.notify_begin(UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP, + "urCommandBufferCommandGetInfoExp", ¶ms); + + ur_result_t result = pfnCommandGetInfoExp(hCommand, propName, propSize, + pPropValue, pPropSizeRet); + + context.notify_end(UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP, + "urCommandBufferCommandGetInfoExp", ¶ms, &result, + instance); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( @@ -6389,6 +6550,25 @@ __urdlllocal ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( dditable.pfnEnqueueExp = pDdiTable->pfnEnqueueExp; pDdiTable->pfnEnqueueExp = ur_tracing_layer::urCommandBufferEnqueueExp; + dditable.pfnRetainCommandExp = pDdiTable->pfnRetainCommandExp; + pDdiTable->pfnRetainCommandExp = + ur_tracing_layer::urCommandBufferRetainCommandExp; + + dditable.pfnReleaseCommandExp = pDdiTable->pfnReleaseCommandExp; + pDdiTable->pfnReleaseCommandExp = + ur_tracing_layer::urCommandBufferReleaseCommandExp; + + dditable.pfnUpdateKernelLaunchExp = pDdiTable->pfnUpdateKernelLaunchExp; + pDdiTable->pfnUpdateKernelLaunchExp = + ur_tracing_layer::urCommandBufferUpdateKernelLaunchExp; + + dditable.pfnGetInfoExp = pDdiTable->pfnGetInfoExp; + pDdiTable->pfnGetInfoExp = ur_tracing_layer::urCommandBufferGetInfoExp; + + dditable.pfnCommandGetInfoExp = pDdiTable->pfnCommandGetInfoExp; + pDdiTable->pfnCommandGetInfoExp = + ur_tracing_layer::urCommandBufferCommandGetInfoExp; + return result; } /////////////////////////////////////////////////////////////////////////////// diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index e4212212b4..5bac5253e0 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -7629,12 +7629,12 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferCreateExp __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. const ur_exp_command_buffer_desc_t - *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor + *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. ur_exp_command_buffer_handle_t - *phCommandBuffer ///< [out] pointer to Command-Buffer handle + *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ) { auto pfnCreateExp = context.urDdiTable.CommandBufferExp.pfnCreateExp; @@ -7676,7 +7676,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( /// @brief Intercept function for urCommandBufferRetainExp __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { auto pfnRetainExp = context.urDdiTable.CommandBufferExp.pfnRetainExp; @@ -7699,7 +7699,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( /// @brief Intercept function for urCommandBufferReleaseExp __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { auto pfnReleaseExp = context.urDdiTable.CommandBufferExp.pfnReleaseExp; @@ -7722,7 +7722,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( /// @brief Intercept function for urCommandBufferFinalizeExp __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { auto pfnFinalizeExp = context.urDdiTable.CommandBufferExp.pfnFinalizeExp; @@ -7745,9 +7745,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// @brief Intercept function for urCommandBufferAppendKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * @@ -7758,8 +7758,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { auto pfnAppendKernelLaunchExp = context.urDdiTable.CommandBufferExp.pfnAppendKernelLaunchExp; @@ -7805,8 +7807,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_result_t result = pfnAppendKernelLaunchExp( hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, - pSyncPoint); + pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, + phCommand); return result; } @@ -7815,16 +7817,16 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// @brief Intercept function for urCommandBufferAppendUSMMemcpyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendUSMMemcpyExp = context.urDdiTable.CommandBufferExp.pfnAppendUSMMemcpyExp; @@ -7939,7 +7941,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -7949,8 +7951,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferCopyExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyExp; @@ -8002,18 +8004,18 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. const void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferWriteExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteExp; @@ -8060,17 +8062,17 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferReadExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferReadExp; @@ -8117,7 +8119,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t @@ -8134,8 +8136,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferCopyRectExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyRectExp; @@ -8188,31 +8190,31 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferWriteRectExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteRectExp; @@ -8260,29 +8262,29 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferReadRectExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferReadRectExp; @@ -8499,15 +8501,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( /// @brief Intercept function for urCommandBufferEnqueueExp __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_queue_handle_t - hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. @@ -8555,6 +8556,184 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferRetainCommandExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferRetainCommandExp( + ur_exp_command_buffer_command_handle_t + hCommand ///< [in] Handle of the command-buffer command. +) { + auto pfnRetainCommandExp = + context.urDdiTable.CommandBufferExp.pfnRetainCommandExp; + + if (nullptr == pfnRetainCommandExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (context.enableParameterValidation) { + if (NULL == hCommand) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + } + + ur_result_t result = pfnRetainCommandExp(hCommand); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferReleaseCommandExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( + ur_exp_command_buffer_command_handle_t + hCommand ///< [in] Handle of the command-buffer command. +) { + auto pfnReleaseCommandExp = + context.urDdiTable.CommandBufferExp.pfnReleaseCommandExp; + + if (nullptr == pfnReleaseCommandExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (context.enableParameterValidation) { + if (NULL == hCommand) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + } + + ur_result_t result = pfnReleaseCommandExp(hCommand); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferUpdateKernelLaunchExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t * + pUpdateKernelLaunch ///< [in] Struct defining how the kernel command is to be updated. +) { + auto pfnUpdateKernelLaunchExp = + context.urDdiTable.CommandBufferExp.pfnUpdateKernelLaunchExp; + + if (nullptr == pfnUpdateKernelLaunchExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (context.enableParameterValidation) { + if (NULL == hCommand) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == pUpdateKernelLaunch) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + } + + ur_result_t result = + pfnUpdateKernelLaunchExp(hCommand, pUpdateKernelLaunch); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferGetInfoExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferGetInfoExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object + ur_exp_command_buffer_info_t + propName, ///< [in] the name of the command-buffer property to query + size_t + propSize, ///< [in] size in bytes of the command-buffer property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the + ///< command-buffer property + size_t * + pPropSizeRet ///< [out][optional] bytes returned in command-buffer property +) { + auto pfnGetInfoExp = context.urDdiTable.CommandBufferExp.pfnGetInfoExp; + + if (nullptr == pfnGetInfoExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (context.enableParameterValidation) { + if (NULL == hCommandBuffer) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (propSize != 0 && pPropValue == NULL) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (pPropValue == NULL && pPropSizeRet == NULL) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT < propName) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (propSize == 0 && pPropValue != NULL) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + } + + ur_result_t result = pfnGetInfoExp(hCommandBuffer, propName, propSize, + pPropValue, pPropSizeRet); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferCommandGetInfoExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] handle of the command-buffer command object + ur_exp_command_buffer_command_info_t + propName, ///< [in] the name of the command-buffer command property to query + size_t + propSize, ///< [in] size in bytes of the command-buffer command property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the + ///< command-buffer command property + size_t * + pPropSizeRet ///< [out][optional] bytes returned in command-buffer command property +) { + auto pfnCommandGetInfoExp = + context.urDdiTable.CommandBufferExp.pfnCommandGetInfoExp; + + if (nullptr == pfnCommandGetInfoExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (context.enableParameterValidation) { + if (NULL == hCommand) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (propSize != 0 && pPropValue == NULL) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (pPropValue == NULL && pPropSizeRet == NULL) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_EXP_COMMAND_BUFFER_COMMAND_INFO_REFERENCE_COUNT < propName) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (propSize == 0 && pPropValue != NULL) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + } + + ur_result_t result = pfnCommandGetInfoExp(hCommand, propName, propSize, + pPropValue, pPropSizeRet); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( @@ -9263,6 +9442,25 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( dditable.pfnEnqueueExp = pDdiTable->pfnEnqueueExp; pDdiTable->pfnEnqueueExp = ur_validation_layer::urCommandBufferEnqueueExp; + dditable.pfnRetainCommandExp = pDdiTable->pfnRetainCommandExp; + pDdiTable->pfnRetainCommandExp = + ur_validation_layer::urCommandBufferRetainCommandExp; + + dditable.pfnReleaseCommandExp = pDdiTable->pfnReleaseCommandExp; + pDdiTable->pfnReleaseCommandExp = + ur_validation_layer::urCommandBufferReleaseCommandExp; + + dditable.pfnUpdateKernelLaunchExp = pDdiTable->pfnUpdateKernelLaunchExp; + pDdiTable->pfnUpdateKernelLaunchExp = + ur_validation_layer::urCommandBufferUpdateKernelLaunchExp; + + dditable.pfnGetInfoExp = pDdiTable->pfnGetInfoExp; + pDdiTable->pfnGetInfoExp = ur_validation_layer::urCommandBufferGetInfoExp; + + dditable.pfnCommandGetInfoExp = pDdiTable->pfnCommandGetInfoExp; + pDdiTable->pfnCommandGetInfoExp = + ur_validation_layer::urCommandBufferCommandGetInfoExp; + return result; } diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index 43326ff5d0..822448cae8 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -32,6 +32,7 @@ ur_exp_image_mem_factory_t ur_exp_image_mem_factory; ur_exp_interop_mem_factory_t ur_exp_interop_mem_factory; ur_exp_interop_semaphore_factory_t ur_exp_interop_semaphore_factory; ur_exp_command_buffer_factory_t ur_exp_command_buffer_factory; +ur_exp_command_buffer_command_factory_t ur_exp_command_buffer_command_factory; /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urAdapterGet @@ -6526,12 +6527,12 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferCreateExp __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. const ur_exp_command_buffer_desc_t - *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor + *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. ur_exp_command_buffer_handle_t - *phCommandBuffer ///< [out] pointer to Command-Buffer handle + *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6572,7 +6573,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( /// @brief Intercept function for urCommandBufferRetainExp __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6600,7 +6601,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( /// @brief Intercept function for urCommandBufferReleaseExp __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6628,7 +6629,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( /// @brief Intercept function for urCommandBufferFinalizeExp __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6656,9 +6657,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// @brief Intercept function for urCommandBufferAppendKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * @@ -6669,8 +6670,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6693,10 +6696,26 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( hKernel = reinterpret_cast(hKernel)->handle; // forward to device-platform - result = pfnAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, - pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint); + result = pfnAppendKernelLaunchExp( + hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, + phCommand); + + if (UR_RESULT_SUCCESS != result) { + return result; + } + + try { + // convert platform handle to loader handle + if (nullptr != phCommand) { + *phCommand = + reinterpret_cast( + ur_exp_command_buffer_command_factory.getInstance( + *phCommand, dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } return result; } @@ -6705,16 +6724,16 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// @brief Intercept function for urCommandBufferAppendUSMMemcpyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6787,7 +6806,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -6797,8 +6816,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6835,18 +6854,18 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. const void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6880,17 +6899,17 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6924,7 +6943,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t @@ -6941,8 +6960,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6980,31 +6999,31 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -7039,29 +7058,29 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -7222,15 +7241,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( /// @brief Intercept function for urCommandBufferEnqueueExp __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_queue_handle_t - hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. @@ -7283,6 +7301,194 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferRetainCommandExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferRetainCommandExp( + ur_exp_command_buffer_command_handle_t + hCommand ///< [in] Handle of the command-buffer command. +) { + ur_result_t result = UR_RESULT_SUCCESS; + + // extract platform's function pointer table + auto dditable = + reinterpret_cast(hCommand) + ->dditable; + auto pfnRetainCommandExp = + dditable->ur.CommandBufferExp.pfnRetainCommandExp; + if (nullptr == pfnRetainCommandExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hCommand = + reinterpret_cast(hCommand) + ->handle; + + // forward to device-platform + result = pfnRetainCommandExp(hCommand); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferReleaseCommandExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( + ur_exp_command_buffer_command_handle_t + hCommand ///< [in] Handle of the command-buffer command. +) { + ur_result_t result = UR_RESULT_SUCCESS; + + // extract platform's function pointer table + auto dditable = + reinterpret_cast(hCommand) + ->dditable; + auto pfnReleaseCommandExp = + dditable->ur.CommandBufferExp.pfnReleaseCommandExp; + if (nullptr == pfnReleaseCommandExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hCommand = + reinterpret_cast(hCommand) + ->handle; + + // forward to device-platform + result = pfnReleaseCommandExp(hCommand); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferUpdateKernelLaunchExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t * + pUpdateKernelLaunch ///< [in] Struct defining how the kernel command is to be updated. +) { + ur_result_t result = UR_RESULT_SUCCESS; + + // extract platform's function pointer table + auto dditable = + reinterpret_cast(hCommand) + ->dditable; + auto pfnUpdateKernelLaunchExp = + dditable->ur.CommandBufferExp.pfnUpdateKernelLaunchExp; + if (nullptr == pfnUpdateKernelLaunchExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hCommand = + reinterpret_cast(hCommand) + ->handle; + + // Deal with any struct parameters that have handle members we need to convert. + auto pUpdateKernelLaunchLocal = *pUpdateKernelLaunch; + + std::vector + pUpdateKernelLaunchpNewMemObjArgList; + for (uint32_t i = 0; i < pUpdateKernelLaunch->numNewMemObjArgs; i++) { + ur_exp_command_buffer_update_memobj_arg_desc_t NewRangeStruct = + pUpdateKernelLaunchLocal.pNewMemObjArgList[i]; + if (NewRangeStruct.hNewMemObjArg) { + NewRangeStruct.hNewMemObjArg = reinterpret_cast( + NewRangeStruct.hNewMemObjArg) + ->handle; + } + + pUpdateKernelLaunchpNewMemObjArgList.push_back(NewRangeStruct); + } + pUpdateKernelLaunchLocal.pNewMemObjArgList = + pUpdateKernelLaunchpNewMemObjArgList.data(); + + // Now that we've converted all the members update the param pointers + pUpdateKernelLaunch = &pUpdateKernelLaunchLocal; + + // forward to device-platform + result = pfnUpdateKernelLaunchExp(hCommand, pUpdateKernelLaunch); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferGetInfoExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferGetInfoExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object + ur_exp_command_buffer_info_t + propName, ///< [in] the name of the command-buffer property to query + size_t + propSize, ///< [in] size in bytes of the command-buffer property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the + ///< command-buffer property + size_t * + pPropSizeRet ///< [out][optional] bytes returned in command-buffer property +) { + ur_result_t result = UR_RESULT_SUCCESS; + + // extract platform's function pointer table + auto dditable = + reinterpret_cast(hCommandBuffer) + ->dditable; + auto pfnGetInfoExp = dditable->ur.CommandBufferExp.pfnGetInfoExp; + if (nullptr == pfnGetInfoExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hCommandBuffer = + reinterpret_cast(hCommandBuffer) + ->handle; + + // forward to device-platform + result = pfnGetInfoExp(hCommandBuffer, propName, propSize, pPropValue, + pPropSizeRet); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferCommandGetInfoExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] handle of the command-buffer command object + ur_exp_command_buffer_command_info_t + propName, ///< [in] the name of the command-buffer command property to query + size_t + propSize, ///< [in] size in bytes of the command-buffer command property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the + ///< command-buffer command property + size_t * + pPropSizeRet ///< [out][optional] bytes returned in command-buffer command property +) { + ur_result_t result = UR_RESULT_SUCCESS; + + // extract platform's function pointer table + auto dditable = + reinterpret_cast(hCommand) + ->dditable; + auto pfnCommandGetInfoExp = + dditable->ur.CommandBufferExp.pfnCommandGetInfoExp; + if (nullptr == pfnCommandGetInfoExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hCommand = + reinterpret_cast(hCommand) + ->handle; + + // forward to device-platform + result = pfnCommandGetInfoExp(hCommand, propName, propSize, pPropValue, + pPropSizeRet); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( @@ -7898,6 +8104,15 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = ur_loader::urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnEnqueueExp = ur_loader::urCommandBufferEnqueueExp; + pDdiTable->pfnRetainCommandExp = + ur_loader::urCommandBufferRetainCommandExp; + pDdiTable->pfnReleaseCommandExp = + ur_loader::urCommandBufferReleaseCommandExp; + pDdiTable->pfnUpdateKernelLaunchExp = + ur_loader::urCommandBufferUpdateKernelLaunchExp; + pDdiTable->pfnGetInfoExp = ur_loader::urCommandBufferGetInfoExp; + pDdiTable->pfnCommandGetInfoExp = + ur_loader::urCommandBufferCommandGetInfoExp; } else { // return pointers directly to platform's DDIs *pDdiTable = ur_loader::context->platforms.front() diff --git a/source/loader/ur_ldrddi.hpp b/source/loader/ur_ldrddi.hpp index 4edbabbd8b..d98b99a655 100644 --- a/source/loader/ur_ldrddi.hpp +++ b/source/loader/ur_ldrddi.hpp @@ -92,6 +92,12 @@ using ur_exp_command_buffer_factory_t = singleton_factory_t; +using ur_exp_command_buffer_command_object_t = + object_t; +using ur_exp_command_buffer_command_factory_t = + singleton_factory_t; + } // namespace ur_loader #endif /* UR_LOADER_LDRDDI_H */ diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index cd4a70c91e..1e9400aaa4 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -7016,7 +7016,7 @@ ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /// @brief Create a Command-Buffer object /// /// @details -/// - Create a command-buffer object +/// - Create a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7033,12 +7033,12 @@ ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. const ur_exp_command_buffer_desc_t - *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor + *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. ur_exp_command_buffer_handle_t - *phCommandBuffer ///< [out] pointer to Command-Buffer handle + *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ) try { auto pfnCreateExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnCreateExp; @@ -7066,7 +7066,7 @@ ur_result_t UR_APICALL urCommandBufferCreateExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY ur_result_t UR_APICALL urCommandBufferRetainExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) try { auto pfnRetainExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnRetainExp; @@ -7095,7 +7095,7 @@ ur_result_t UR_APICALL urCommandBufferRetainExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY ur_result_t UR_APICALL urCommandBufferReleaseExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) try { auto pfnReleaseExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnReleaseExp; @@ -7124,7 +7124,7 @@ ur_result_t UR_APICALL urCommandBufferReleaseExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferFinalizeExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) try { auto pfnFinalizeExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnFinalizeExp; @@ -7138,7 +7138,7 @@ ur_result_t UR_APICALL urCommandBufferFinalizeExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a kernel execution command to a command-buffer object +/// @brief Append a kernel execution command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7165,9 +7165,9 @@ ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * @@ -7178,8 +7178,10 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) try { auto pfnAppendKernelLaunchExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendKernelLaunchExp; @@ -7190,13 +7192,13 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( return pfnAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint); + pSyncPointWaitList, pSyncPoint, phCommand); } catch (...) { return exceptionToResult(std::current_exception()); } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM memcpy command to a command-buffer object +/// @brief Append a USM memcpy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7221,16 +7223,16 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendUSMMemcpyExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendUSMMemcpyExp; @@ -7246,7 +7248,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM fill command to a command-buffer object +/// @brief Append a USM fill command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7301,7 +7303,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory copy command to a command-buffer object +/// @brief Append a memory copy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7322,7 +7324,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -7332,8 +7334,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendMemBufferCopyExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyExp; @@ -7349,7 +7351,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory write command to a command-buffer object +/// @brief Append a memory write command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7371,18 +7373,18 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. const void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendMemBufferWriteExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteExp; @@ -7398,7 +7400,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory read command to a command-buffer object +/// @brief Append a memory read command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7420,17 +7422,17 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendMemBufferReadExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendMemBufferReadExp; @@ -7446,7 +7448,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory copy command to a command-buffer object +/// @brief Append a rectangular memory copy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7467,7 +7469,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t @@ -7484,8 +7486,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendMemBufferCopyRectExp = ur_lib::context->urDdiTable.CommandBufferExp @@ -7503,7 +7505,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory write command to a command-buffer object +/// @brief Append a rectangular memory write command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7525,31 +7527,31 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendMemBufferWriteRectExp = ur_lib::context->urDdiTable.CommandBufferExp @@ -7567,7 +7569,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory read command to a command-buffer object +/// @brief Append a rectangular memory read command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7589,29 +7591,29 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendMemBufferReadRectExp = ur_lib::context->urDdiTable.CommandBufferExp @@ -7629,7 +7631,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory fill command to a command-buffer object +/// @brief Append a memory fill command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7681,7 +7683,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM Prefetch command to a command-buffer object +/// @brief Append a USM Prefetch command to a command-buffer object. /// /// @details /// - Prefetching may not be supported for all devices or allocation types. @@ -7737,7 +7739,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM Advise command to a command-buffer object +/// @brief Append a USM Advise command to a command-buffer object. /// /// @details /// - Not all memory advice hints may be supported for all devices or @@ -7814,15 +7816,14 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_queue_handle_t - hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. @@ -7839,6 +7840,203 @@ ur_result_t UR_APICALL urCommandBufferEnqueueExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Increment the command object's reference count. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +ur_result_t UR_APICALL urCommandBufferRetainCommandExp( + ur_exp_command_buffer_command_handle_t + hCommand ///< [in] Handle of the command-buffer command. + ) try { + auto pfnRetainCommandExp = + ur_lib::context->urDdiTable.CommandBufferExp.pfnRetainCommandExp; + if (nullptr == pfnRetainCommandExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnRetainCommandExp(hCommand); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Decrement the command object's reference count and delete the command +/// object if the reference count becomes zero. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( + ur_exp_command_buffer_command_handle_t + hCommand ///< [in] Handle of the command-buffer command. + ) try { + auto pfnReleaseCommandExp = + ur_lib::context->urDdiTable.CommandBufferExp.pfnReleaseCommandExp; + if (nullptr == pfnReleaseCommandExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnReleaseCommandExp(hCommand); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Update a kernel launch command in a finalized command-buffer. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pUpdateKernelLaunch` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If update functionality is not supported by the device. +/// - ::UR_RESULT_ERROR_INVALID_OPERATION +/// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. +/// + If the command-buffer `hCommand` belongs to has not been finalized. +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX +/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION +/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t * + pUpdateKernelLaunch ///< [in] Struct defining how the kernel command is to be updated. + ) try { + auto pfnUpdateKernelLaunchExp = + ur_lib::context->urDdiTable.CommandBufferExp.pfnUpdateKernelLaunchExp; + if (nullptr == pfnUpdateKernelLaunchExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnUpdateKernelLaunchExp(hCommand, pUpdateKernelLaunch); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Get command-buffer object information. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT < propName` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION +/// + If `propName` is not supported by the adapter. +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `propSize == 0 && pPropValue != NULL` +/// + If `propSize` is less than the real number of bytes needed to return the info. +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `propSize != 0 && pPropValue == NULL` +/// + `pPropValue == NULL && pPropSizeRet == NULL` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +ur_result_t UR_APICALL urCommandBufferGetInfoExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object + ur_exp_command_buffer_info_t + propName, ///< [in] the name of the command-buffer property to query + size_t + propSize, ///< [in] size in bytes of the command-buffer property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the + ///< command-buffer property + size_t * + pPropSizeRet ///< [out][optional] bytes returned in command-buffer property + ) try { + auto pfnGetInfoExp = + ur_lib::context->urDdiTable.CommandBufferExp.pfnGetInfoExp; + if (nullptr == pfnGetInfoExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnGetInfoExp(hCommandBuffer, propName, propSize, pPropValue, + pPropSizeRet); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Get command-buffer object information. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_EXP_COMMAND_BUFFER_COMMAND_INFO_REFERENCE_COUNT < propName` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION +/// + If `propName` is not supported by the adapter. +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `propSize == 0 && pPropValue != NULL` +/// + If `propSize` is less than the real number of bytes needed to return the info. +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `propSize != 0 && pPropValue == NULL` +/// + `pPropValue == NULL && pPropSizeRet == NULL` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] handle of the command-buffer command object + ur_exp_command_buffer_command_info_t + propName, ///< [in] the name of the command-buffer command property to query + size_t + propSize, ///< [in] size in bytes of the command-buffer command property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the + ///< command-buffer command property + size_t * + pPropSizeRet ///< [out][optional] bytes returned in command-buffer command property + ) try { + auto pfnCommandGetInfoExp = + ur_lib::context->urDdiTable.CommandBufferExp.pfnCommandGetInfoExp; + if (nullptr == pfnCommandGetInfoExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnCommandGetInfoExp(hCommand, propName, propSize, pPropValue, + pPropSizeRet); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Enqueue a command to execute a cooperative kernel /// diff --git a/source/loader/ur_print.cpp b/source/loader/ur_print.cpp index 4503999c50..1d8b3ca9af 100644 --- a/source/loader/ur_print.cpp +++ b/source/loader/ur_print.cpp @@ -938,6 +938,22 @@ ur_result_t urPrintExpInteropSemaphoreDesc( return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t urPrintExpCommandBufferInfo(enum ur_exp_command_buffer_info_t value, + char *buffer, const size_t buff_size, + size_t *out_size) { + std::stringstream ss; + ss << value; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintExpCommandBufferCommandInfo( + enum ur_exp_command_buffer_command_info_t value, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << value; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintExpCommandBufferDesc(const struct ur_exp_command_buffer_desc_t params, char *buffer, const size_t buff_size, @@ -947,6 +963,46 @@ urPrintExpCommandBufferDesc(const struct ur_exp_command_buffer_desc_t params, return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t urPrintExpCommandBufferUpdateMemobjArgDesc( + const struct ur_exp_command_buffer_update_memobj_arg_desc_t params, + char *buffer, const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintExpCommandBufferUpdatePointerArgDesc( + const struct ur_exp_command_buffer_update_pointer_arg_desc_t params, + char *buffer, const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintExpCommandBufferUpdateValueArgDesc( + const struct ur_exp_command_buffer_update_value_arg_desc_t params, + char *buffer, const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintExpCommandBufferUpdateExecInfoDesc( + const struct ur_exp_command_buffer_update_exec_info_desc_t params, + char *buffer, const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintExpCommandBufferUpdateKernelLaunchDesc( + const struct ur_exp_command_buffer_update_kernel_launch_desc_t params, + char *buffer, const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintExpPeerInfo(enum ur_exp_peer_info_t value, char *buffer, const size_t buff_size, size_t *out_size) { std::stringstream ss; @@ -1279,6 +1335,46 @@ ur_result_t urPrintCommandBufferEnqueueExpParams( return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t urPrintCommandBufferRetainCommandExpParams( + const struct ur_command_buffer_retain_command_exp_params_t *params, + char *buffer, const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintCommandBufferReleaseCommandExpParams( + const struct ur_command_buffer_release_command_exp_params_t *params, + char *buffer, const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintCommandBufferUpdateKernelLaunchExpParams( + const struct ur_command_buffer_update_kernel_launch_exp_params_t *params, + char *buffer, const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintCommandBufferGetInfoExpParams( + const struct ur_command_buffer_get_info_exp_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintCommandBufferCommandGetInfoExpParams( + const struct ur_command_buffer_command_get_info_exp_params_t *params, + char *buffer, const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintContextCreateParams(const struct ur_context_create_params_t *params, char *buffer, const size_t buff_size, diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 26f24aba08..5ee68ce529 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -5952,7 +5952,7 @@ ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /// @brief Create a Command-Buffer object /// /// @details -/// - Create a command-buffer object +/// - Create a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -5969,12 +5969,12 @@ ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. const ur_exp_command_buffer_desc_t - *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor + *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. ur_exp_command_buffer_handle_t - *phCommandBuffer ///< [out] pointer to Command-Buffer handle + *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; @@ -5995,7 +5995,7 @@ ur_result_t UR_APICALL urCommandBufferCreateExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY ur_result_t UR_APICALL urCommandBufferRetainExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; @@ -6017,7 +6017,7 @@ ur_result_t UR_APICALL urCommandBufferRetainExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY ur_result_t UR_APICALL urCommandBufferReleaseExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; @@ -6039,14 +6039,14 @@ ur_result_t UR_APICALL urCommandBufferReleaseExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferFinalizeExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a kernel execution command to a command-buffer object +/// @brief Append a kernel execution command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6073,9 +6073,9 @@ ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * @@ -6086,15 +6086,17 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM memcpy command to a command-buffer object +/// @brief Append a USM memcpy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6119,23 +6121,23 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM fill command to a command-buffer object +/// @brief Append a USM fill command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6181,7 +6183,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory copy command to a command-buffer object +/// @brief Append a memory copy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6202,7 +6204,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -6212,15 +6214,15 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory write command to a command-buffer object +/// @brief Append a memory write command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6242,25 +6244,25 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. const void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory read command to a command-buffer object +/// @brief Append a memory read command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6282,24 +6284,24 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory copy command to a command-buffer object +/// @brief Append a rectangular memory copy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6320,7 +6322,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t @@ -6337,15 +6339,15 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory write command to a command-buffer object +/// @brief Append a rectangular memory write command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6367,38 +6369,38 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory read command to a command-buffer object +/// @brief Append a rectangular memory read command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6420,36 +6422,36 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory fill command to a command-buffer object +/// @brief Append a memory fill command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6492,7 +6494,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM Prefetch command to a command-buffer object +/// @brief Append a USM Prefetch command to a command-buffer object. /// /// @details /// - Prefetching may not be supported for all devices or allocation types. @@ -6539,7 +6541,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM Advise command to a command-buffer object +/// @brief Append a USM Advise command to a command-buffer object. /// /// @details /// - Not all memory advice hints may be supported for all devices or @@ -6607,15 +6609,14 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_queue_handle_t - hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. @@ -6624,6 +6625,166 @@ ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Increment the command object's reference count. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +ur_result_t UR_APICALL urCommandBufferRetainCommandExp( + ur_exp_command_buffer_command_handle_t + hCommand ///< [in] Handle of the command-buffer command. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Decrement the command object's reference count and delete the command +/// object if the reference count becomes zero. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( + ur_exp_command_buffer_command_handle_t + hCommand ///< [in] Handle of the command-buffer command. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Update a kernel launch command in a finalized command-buffer. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pUpdateKernelLaunch` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If update functionality is not supported by the device. +/// - ::UR_RESULT_ERROR_INVALID_OPERATION +/// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. +/// + If the command-buffer `hCommand` belongs to has not been finalized. +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX +/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION +/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t * + pUpdateKernelLaunch ///< [in] Struct defining how the kernel command is to be updated. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Get command-buffer object information. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT < propName` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION +/// + If `propName` is not supported by the adapter. +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `propSize == 0 && pPropValue != NULL` +/// + If `propSize` is less than the real number of bytes needed to return the info. +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `propSize != 0 && pPropValue == NULL` +/// + `pPropValue == NULL && pPropSizeRet == NULL` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +ur_result_t UR_APICALL urCommandBufferGetInfoExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object + ur_exp_command_buffer_info_t + propName, ///< [in] the name of the command-buffer property to query + size_t + propSize, ///< [in] size in bytes of the command-buffer property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the + ///< command-buffer property + size_t * + pPropSizeRet ///< [out][optional] bytes returned in command-buffer property +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Get command-buffer object information. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_EXP_COMMAND_BUFFER_COMMAND_INFO_REFERENCE_COUNT < propName` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION +/// + If `propName` is not supported by the adapter. +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `propSize == 0 && pPropValue != NULL` +/// + If `propSize` is less than the real number of bytes needed to return the info. +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `propSize != 0 && pPropValue == NULL` +/// + `pPropValue == NULL && pPropSizeRet == NULL` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] handle of the command-buffer command object + ur_exp_command_buffer_command_info_t + propName, ///< [in] the name of the command-buffer command property to query + size_t + propSize, ///< [in] size in bytes of the command-buffer command property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the + ///< command-buffer command property + size_t * + pPropSizeRet ///< [out][optional] bytes returned in command-buffer command property +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Enqueue a command to execute a cooperative kernel /// diff --git a/test/conformance/CMakeLists.txt b/test/conformance/CMakeLists.txt index 44f9c43efb..ac48f3a313 100644 --- a/test/conformance/CMakeLists.txt +++ b/test/conformance/CMakeLists.txt @@ -118,6 +118,7 @@ if(UR_DPCXX) add_subdirectory(kernel) add_subdirectory(program) add_subdirectory(enqueue) + add_subdirectory(exp_command_buffer) else() message(WARNING "UR_DPCXX is not defined, the following conformance test executables \ diff --git a/test/conformance/device_code/CMakeLists.txt b/test/conformance/device_code/CMakeLists.txt index 202109c3b5..450733d5ed 100644 --- a/test/conformance/device_code/CMakeLists.txt +++ b/test/conformance/device_code/CMakeLists.txt @@ -94,6 +94,9 @@ add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/image_copy.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/mean.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/spec_constant.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/usm_ll.cpp) +add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/saxpy.cpp) +add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/saxpy_usm.cpp) +add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/indexers_usm.cpp) set(KERNEL_HEADER ${UR_CONFORMANCE_DEVICE_BINARIES_DIR}/kernel_entry_points.h) add_custom_command(OUTPUT ${KERNEL_HEADER} diff --git a/test/conformance/device_code/indexers_usm.cpp b/test/conformance/device_code/indexers_usm.cpp new file mode 100644 index 0000000000..76b0751730 --- /dev/null +++ b/test/conformance/device_code/indexers_usm.cpp @@ -0,0 +1,38 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include + +int main() { + const cl::sycl::range<3> global_range(8, 8, 8); + const cl::sycl::range<3> local_range(2, 2, 2); + const cl::sycl::id<3> global_offset(4, 4, 4); + const cl::sycl::nd_range<3> nd_range(global_range, local_range, + global_offset); + + cl::sycl::queue sycl_queue; + const size_t elements_per_work_item = 6; + int *ptr = cl::sycl::malloc_shared(global_range[0] * global_range[1] * + global_range[2] * + elements_per_work_item, + sycl_queue); + + sycl_queue.submit([&](cl::sycl::handler &cgh) { + cgh.parallel_for( + nd_range, [ptr](cl::sycl::nd_item<3> index) { + int *wi_ptr = + ptr + index.get_global_linear_id() * elements_per_work_item; + + wi_ptr[0] = index.get_global_id(0); + wi_ptr[1] = index.get_global_id(1); + wi_ptr[2] = index.get_global_id(2); + + wi_ptr[3] = index.get_local_id(0); + wi_ptr[4] = index.get_local_id(1); + wi_ptr[5] = index.get_local_id(2); + }); + }); + return 0; +} diff --git a/test/conformance/device_code/saxpy.cpp b/test/conformance/device_code/saxpy.cpp new file mode 100644 index 0000000000..593e8e2435 --- /dev/null +++ b/test/conformance/device_code/saxpy.cpp @@ -0,0 +1,33 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include + +int main() { + size_t array_size = 16; + std::vector X(array_size, 1); + std::vector Y(array_size, 2); + std::vector Z(array_size, 0); + uint32_t A = 42; + auto x_buff = + cl::sycl::buffer(X.data(), cl::sycl::range<1>(array_size)); + auto y_buff = + cl::sycl::buffer(Y.data(), cl::sycl::range<1>(array_size)); + auto z_buff = + cl::sycl::buffer(Z.data(), cl::sycl::range<1>(array_size)); + + cl::sycl::queue sycl_queue; + sycl_queue.submit([&](cl::sycl::handler &cgh) { + auto x_acc = x_buff.get_access(cgh); + auto y_acc = y_buff.get_access(cgh); + auto z_acc = z_buff.get_access(cgh); + cgh.parallel_for(cl::sycl::range<1>{array_size}, + [=](cl::sycl::item<1> itemId) { + auto i = itemId.get_id(0); + z_acc[i] = A * x_acc[i] + y_acc[i]; + }); + }); + return 0; +} diff --git a/test/conformance/device_code/saxpy_usm.cpp b/test/conformance/device_code/saxpy_usm.cpp new file mode 100644 index 0000000000..8772a7e25d --- /dev/null +++ b/test/conformance/device_code/saxpy_usm.cpp @@ -0,0 +1,25 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include + +int main() { + size_t array_size = 16; + + cl::sycl::queue sycl_queue; + uint32_t *X = cl::sycl::malloc_shared(array_size, sycl_queue); + uint32_t *Y = cl::sycl::malloc_shared(array_size, sycl_queue); + uint32_t *Z = cl::sycl::malloc_shared(array_size, sycl_queue); + uint32_t A = 42; + + sycl_queue.submit([&](cl::sycl::handler &cgh) { + cgh.parallel_for(cl::sycl::range<1>{array_size}, + [=](cl::sycl::item<1> itemId) { + auto i = itemId.get_id(0); + Z[i] = A * X[i] + Y[i]; + }); + }); + return 0; +} diff --git a/test/conformance/exp_command_buffer/CMakeLists.txt b/test/conformance/exp_command_buffer/CMakeLists.txt new file mode 100644 index 0000000000..a8ecf793ab --- /dev/null +++ b/test/conformance/exp_command_buffer/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright (C) 2024 Intel Corporation +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +add_conformance_test_with_kernels_environment(exp_command_buffer + buffer_fill_kernel_update.cpp + usm_fill_kernel_update.cpp + buffer_saxpy_kernel_update.cpp + usm_saxpy_kernel_update.cpp + ndrange_update.cpp + release.cpp + retain.cpp + invalid_update.cpp +) diff --git a/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp new file mode 100644 index 0000000000..ea5295dc6b --- /dev/null +++ b/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp @@ -0,0 +1,404 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "fixtures.h" + +// Test that updating a command-buffer with a single kernel command +// taking USM arguments works correctly. +struct BufferFillCommandTest + : uur::command_buffer::urUpdatableCommandBufferExpExecutionTest { + void SetUp() override { + program_name = "fill"; + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::SetUp()); + + // First argument is buffer to fill (will also be hidden accessor arg) + AddBuffer1DArg(sizeof(val) * global_size, &buffer); + // Second argument is scalar to fill with. + AddPodArg(val); + + // Append kernel command to command-buffer and close command-buffer + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &global_size, &local_size, 0, nullptr, nullptr, &command_handle)); + ASSERT_NE(command_handle, nullptr); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + + void TearDown() override { + if (new_buffer) { + EXPECT_SUCCESS(urMemRelease(new_buffer)); + } + + if (command_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); + } + + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::TearDown()); + } + + static constexpr uint32_t val = 42; + static constexpr size_t local_size = 4; + static constexpr size_t global_size = 32; + static constexpr size_t global_offset = 0; + static constexpr size_t n_dimensions = 1; + static constexpr size_t buffer_size = sizeof(val) * global_size; + ur_mem_handle_t buffer = nullptr; + ur_mem_handle_t new_buffer = nullptr; + ur_exp_command_buffer_command_handle_t command_handle = nullptr; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(BufferFillCommandTest); + +// Update kernel arguments to fill with a new scalar value to a new output +// buffer. +TEST_P(BufferFillCommandTest, UpdateParameters) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + ValidateBuffer(buffer, buffer_size, val); + + // Create a new buffer to update kernel output parameter to + ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, + buffer_size, nullptr, &new_buffer)); + char zero = 0; + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, new_buffer, &zero, + sizeof(zero), 0, buffer_size, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Set argument index zero as new buffer + ur_exp_command_buffer_update_memobj_arg_desc_t new_output_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC, // stype + nullptr, // pNext + 0, // argIndex + nullptr, // pProperties + new_buffer, // hArgValue + }; + + // Set argument index 2 as new value to fill (index 1 is buffer accessor) + uint32_t new_val = 33; + ur_exp_command_buffer_update_value_arg_desc_t new_input_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + sizeof(new_val), // argSize + nullptr, // pProperties + &new_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 1, // numNewMemObjArgs + 0, // numNewPointerArgs + 1, // numNewValueArgs + 0, // numNewExecInfos + 0, // newWorkDim + &new_output_desc, // pNewMemObjArgList + nullptr, // pNewPointerArgList + &new_input_desc, // pNewValueArgList + nullptr, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + ValidateBuffer(new_buffer, buffer_size, new_val); +} + +// Test updating the global size so that the fill outputs to a larger buffer +TEST_P(BufferFillCommandTest, UpdateGlobalSize) { + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + ValidateBuffer(buffer, sizeof(val) * global_size, val); + + size_t new_global_size = 64; + const size_t new_buffer_size = sizeof(val) * new_global_size; + ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, + new_buffer_size, nullptr, &new_buffer)); + char zero = 0; + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, new_buffer, &zero, + sizeof(zero), 0, new_buffer_size, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + ur_exp_command_buffer_update_memobj_arg_desc_t new_output_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC, // stype + nullptr, // pNext + 0, // argIndex + nullptr, // pProperties + new_buffer, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 1, // numNewMemObjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + 0, // numNewExecInfos + 0, // newWorkDim + &new_output_desc, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + nullptr, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + &new_global_size, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + ValidateBuffer(new_buffer, new_buffer_size, val); +} + +// Test updating the input & output kernel arguments and global +// size, by calling update individually for each of these configurations. +TEST_P(BufferFillCommandTest, SeparateUpdateCalls) { + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + ValidateBuffer(buffer, sizeof(val) * global_size, val); + + size_t new_global_size = 64; + const size_t new_buffer_size = sizeof(val) * new_global_size; + ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, + new_buffer_size, nullptr, &new_buffer)); + char zero = 0; + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, new_buffer, &zero, + sizeof(zero), 0, new_buffer_size, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + ur_exp_command_buffer_update_memobj_arg_desc_t new_output_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC, // stype + nullptr, // pNext + 0, // argIndex + nullptr, // pProperties + new_buffer, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t output_update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 1, // numNewMemObjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + 0, // numNewExecInfos + 0, // newWorkDim + &new_output_desc, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + nullptr, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(command_handle, + &output_update_desc)); + + uint32_t new_val = 33; + ur_exp_command_buffer_update_value_arg_desc_t new_input_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + sizeof(new_val), // argSize + nullptr, // pProperties + &new_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t input_update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 1, // numNewValueArgs + 0, // numNewExecInfos + 0, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + &new_input_desc, // pNewValueArgList + nullptr, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(command_handle, + &input_update_desc)); + + ur_exp_command_buffer_update_kernel_launch_desc_t global_size_update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + 0, // numNewExecInfos + 0, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + nullptr, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + &new_global_size, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp( + command_handle, &global_size_update_desc)); + + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + ValidateBuffer(new_buffer, new_buffer_size, new_val); +} + +// Test calling update twice on the same command-handle updating the +// input value, and verifying that it's the second call which persists. +TEST_P(BufferFillCommandTest, OverrideUpdate) { + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + ValidateBuffer(buffer, sizeof(val) * global_size, val); + + uint32_t first_val = 33; + ur_exp_command_buffer_update_value_arg_desc_t first_input_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + sizeof(first_val), // argSize + nullptr, // pProperties + &first_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t first_update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 1, // numNewValueArgs + 0, // numNewExecInfos + 0, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + &first_input_desc, // pNewValueArgList + nullptr, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(command_handle, + &first_update_desc)); + + uint32_t second_val = -99; + ur_exp_command_buffer_update_value_arg_desc_t second_input_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + sizeof(second_val), // argSize + nullptr, // pProperties + &second_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t second_update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 1, // numNewValueArgs + 0, // numNewExecInfos + 0, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + &second_input_desc, // pNewValueArgList + nullptr, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(command_handle, + &second_update_desc)); + + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + ValidateBuffer(buffer, sizeof(val) * global_size, second_val); +} + +// Test calling update with multiple ur_exp_command_buffer_update_value_arg_desc_t +// instances updating the same argument, and checking that the last one in the +// list persists. +TEST_P(BufferFillCommandTest, OverrideArgList) { + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + ValidateBuffer(buffer, sizeof(val) * global_size, val); + + ur_exp_command_buffer_update_value_arg_desc_t input_descs[2]; + uint32_t first_val = 33; + input_descs[0] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + sizeof(first_val), // argSize + nullptr, // pProperties + &first_val, // hArgValue + }; + + uint32_t second_val = -99; + input_descs[1] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + sizeof(second_val), // argSize + nullptr, // pProperties + &second_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t second_update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 2, // numNewValueArgs + 0, // numNewExecInfos + 0, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + input_descs, // pNewValueArgList + nullptr, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(command_handle, + &second_update_desc)); + + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + ValidateBuffer(buffer, sizeof(val) * global_size, second_val); +} diff --git a/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp new file mode 100644 index 0000000000..879b3a9bc6 --- /dev/null +++ b/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp @@ -0,0 +1,178 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "fixtures.h" + +// Test that updating a command-buffer with a single kernel command +// taking buffer & scalar arguments works correctly. +struct BufferSaxpyKernelTest + : uur::command_buffer::urUpdatableCommandBufferExpExecutionTest { + void SetUp() override { + program_name = "saxpy"; + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::SetUp()); + + const size_t allocation_size = sizeof(uint32_t) * global_size; + for (auto &buffer : buffers) { + ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, + allocation_size, nullptr, + &buffer)); + ASSERT_NE(buffer, nullptr); + + std::vector init(allocation_size); + uur::generateMemFillPattern(init); + + ASSERT_SUCCESS(urEnqueueMemBufferWrite(queue, buffer, true, 0, + allocation_size, init.data(), + 0, nullptr, nullptr)); + } + + // Index 0 is output buffer + ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, 0, nullptr, buffers[0])); + // Index 1 is output accessor + struct { + size_t offsets[1] = {0}; + } accessor; + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(accessor), nullptr, + &accessor)); + + // Index 2 is A + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 2, sizeof(A), nullptr, &A)); + // Index 3 is X buffer + ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, 3, nullptr, buffers[1])); + + // Index 4 is X buffer accessor + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 4, sizeof(accessor), nullptr, + &accessor)); + // Index 5 is Y buffer + ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, 5, nullptr, buffers[2])); + + // Index 6 is Y buffer accessor + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 6, sizeof(accessor), nullptr, + &accessor)); + + // Append kernel command to command-buffer and close command-buffer + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &global_size, &local_size, 0, nullptr, nullptr, &command_handle)); + ASSERT_NE(command_handle, nullptr); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + + void Validate(ur_mem_handle_t output, ur_mem_handle_t X, ur_mem_handle_t Y, + uint32_t A, size_t length) { + + std::vector output_data(length, 0); + ASSERT_SUCCESS(urEnqueueMemBufferRead(queue, output, true, 0, length, + output_data.data(), 0, nullptr, + nullptr)); + + std::vector X_data(length, 0); + ASSERT_SUCCESS(urEnqueueMemBufferRead( + queue, X, true, 0, length, X_data.data(), 0, nullptr, nullptr)); + + std::vector Y_data(length, 0); + ASSERT_SUCCESS(urEnqueueMemBufferRead( + queue, Y, true, 0, length, Y_data.data(), 0, nullptr, nullptr)); + + for (size_t i = 0; i < length; i++) { + uint32_t result = A * X_data[i] + Y_data[i]; + ASSERT_EQ(result, output_data[i]); + } + } + + void TearDown() override { + for (auto &buffer : buffers) { + if (buffer) { + EXPECT_SUCCESS(urMemRelease(buffer)); + } + } + + if (command_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); + } + + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::TearDown()); + } + + static constexpr size_t local_size = 4; + static constexpr size_t global_size = 32; + static constexpr size_t global_offset = 0; + static constexpr size_t n_dimensions = 1; + static constexpr uint32_t A = 42; + std::array buffers = {nullptr, nullptr, nullptr, + nullptr}; + ur_exp_command_buffer_command_handle_t command_handle = nullptr; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(BufferSaxpyKernelTest); + +TEST_P(BufferSaxpyKernelTest, UpdateParameters) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + + ASSERT_SUCCESS(urQueueFinish(queue)); + Validate(buffers[0], buffers[1], buffers[2], A, global_size); + + ur_exp_command_buffer_update_memobj_arg_desc_t new_input_descs[2]; + // New X at index 3 + new_input_descs[0] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC, // stype + nullptr, // pNext + 3, // argIndex + nullptr, // pProperties + buffers[3], // hArgValue + }; + + // New Y at index 5 + new_input_descs[1] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC, // stype + nullptr, // pNext + 5, // argIndex + nullptr, // pProperties + buffers[4], // hArgValue + }; + + // A at index 2 + uint32_t new_A = 33; + ur_exp_command_buffer_update_value_arg_desc_t new_A_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext, + 2, // argIndex + sizeof(new_A), // argSize + nullptr, // pProperties + &new_A, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 2, // numNewMemObjArgs + 0, // numNewPointerArgs + 1, // numNewValueArgs + 0, // numNewExecInfos + 0, // newWorkDim + new_input_descs, // pNewMemObjArgList + nullptr, // pNewPointerArgList + &new_A_desc, // pNewValueArgList + nullptr, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + Validate(buffers[0], buffers[3], buffers[4], new_A, global_size); +} diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_cuda.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_cuda.match new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match new file mode 100644 index 0000000000..43b50bde41 --- /dev/null +++ b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match @@ -0,0 +1,18 @@ +{{OPT}}{{Segmentation fault|Aborted}} +{{OPT}}BufferFillCommandTest.UpdateParameters/AMD_HIP_BACKEND{{.*}} +{{OPT}}BufferFillCommandTest.UpdateGlobalSize/AMD_HIP_BACKEND{{.*}} +{{OPT}}BufferFillCommandTest.SeparateUpdateCalls/AMD_HIP_BACKEND{{.*}} +{{OPT}}BufferFillCommandTest.OverrideUpdate/AMD_HIP_BACKEND{{.*}} +{{OPT}}BufferFillCommandTest.OverrideArgList/AMD_HIP_BACKEND{{.*}} +{{OPT}}USMFillCommandTest.UpdateParameters/AMD_HIP_BACKEND{{.*}} +{{OPT}}USMFillCommandTest.UpdateExecInfo/AMD_HIP_BACKEND{{.*}} +{{OPT}}USMMultipleFillCommandTest.UpdateAllKernels/AMD_HIP_BACKEND{{.*}} +{{OPT}}BufferSaxpyKernelTest.UpdateParameters/AMD_HIP_BACKEND{{.*}} +{{OPT}}USMSaxpyKernelTest.UpdateParameters/AMD_HIP_BACKEND{{.*}} +{{OPT}}NDRangeUpdateTests.Update3D/AMD_HIP_BACKEND{{.*}} +{{OPT}}NDRangeUpdateTests.Update2D/AMD_HIP_BACKEND{{.*}} +{{OPT}}NDRangeUpdateTests.Update1D/AMD_HIP_BACKEND{{.*}} +{{OPT}}urCommandBufferReleaseCommandExpTest.Success/AMD_HIP_BACKEND{{.*}} +{{OPT}}urCommandBufferReleaseCommandExpTest.InvalidNullHandle/AMD_HIP_BACKEND{{.*}} +{{OPT}}urCommandBufferRetainCommandExpTest.Success/AMD_HIP_BACKEND{{.*}} +{{OPT}}urCommandBufferRetainCommandExpTest.InvalidNullHandle/AMD_HIP_BACKEND{{.*}} diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero.match new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match new file mode 100644 index 0000000000..b4991347ba --- /dev/null +++ b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match @@ -0,0 +1 @@ +{{OPT}}{{Segmentation fault|Aborted}} diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_opencl.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_opencl.match new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/conformance/exp_command_buffer/fixtures.h b/test/conformance/exp_command_buffer/fixtures.h new file mode 100644 index 0000000000..4e9bff35f9 --- /dev/null +++ b/test/conformance/exp_command_buffer/fixtures.h @@ -0,0 +1,175 @@ +// Copyright (C) 2022-2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#ifndef UR_CONFORMANCE_COMMAND_BUFFER_FIXTURES_H_INCLUDED +#define UR_CONFORMANCE_COMMAND_BUFFER_FIXTURES_H_INCLUDED + +#include + +namespace uur { +namespace command_buffer { + +struct urCommandBufferExpTest : uur::urContextTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(uur::urContextTest::SetUp()); + + size_t returned_size; + ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_EXTENSIONS, 0, + nullptr, &returned_size)); + + std::unique_ptr returned_extensions(new char[returned_size]); + + ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_EXTENSIONS, + returned_size, returned_extensions.get(), + nullptr)); + + std::string_view extensions_string(returned_extensions.get()); + bool command_buffer_support = + extensions_string.find(UR_COMMAND_BUFFER_EXTENSION_STRING_EXP) != + std::string::npos; + + if (!command_buffer_support) { + GTEST_SKIP() << "EXP command-buffer feature is not supported."; + } + + ASSERT_SUCCESS(urDeviceGetInfo( + device, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP, + sizeof(ur_bool_t), &updatable_command_buffer_support, nullptr)); + + // Create a command-buffer + ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, nullptr, + &cmd_buf_handle)); + ASSERT_NE(cmd_buf_handle, nullptr); + } + + void TearDown() override { + if (cmd_buf_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseExp(cmd_buf_handle)); + } + UUR_RETURN_ON_FATAL_FAILURE(uur::urContextTest::TearDown()); + } + + ur_exp_command_buffer_handle_t cmd_buf_handle = nullptr; + ur_bool_t updatable_command_buffer_support = false; +}; + +struct urCommandBufferExpExecutionTest : uur::urKernelExecutionTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(uur::urKernelExecutionTest::SetUp()); + + size_t returned_size; + ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_EXTENSIONS, 0, + nullptr, &returned_size)); + + std::unique_ptr returned_extensions(new char[returned_size]); + + ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_EXTENSIONS, + returned_size, returned_extensions.get(), + nullptr)); + + std::string_view extensions_string(returned_extensions.get()); + bool command_buffer_support = + extensions_string.find(UR_COMMAND_BUFFER_EXTENSION_STRING_EXP) != + std::string::npos; + + if (!command_buffer_support) { + GTEST_SKIP() << "EXP command-buffer feature is not supported."; + } + + ASSERT_SUCCESS(urDeviceGetInfo( + device, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP, + sizeof(ur_bool_t), &updatable_command_buffer_support, nullptr)); + + // Create a command-buffer + ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, nullptr, + &cmd_buf_handle)); + ASSERT_NE(cmd_buf_handle, nullptr); + } + + void TearDown() override { + if (cmd_buf_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseExp(cmd_buf_handle)); + } + UUR_RETURN_ON_FATAL_FAILURE(uur::urKernelExecutionTest::TearDown()); + } + + ur_exp_command_buffer_handle_t cmd_buf_handle = nullptr; + ur_bool_t updatable_command_buffer_support = false; +}; + +struct urUpdatableCommandBufferExpExecutionTest + : urCommandBufferExpExecutionTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urCommandBufferExpExecutionTest ::SetUp()); + + if (!updatable_command_buffer_support) { + GTEST_SKIP() << "Updating EXP command-buffers is not supported."; + } + + // Create a command-buffer with update enabled. + ur_exp_command_buffer_desc_t desc{ + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, nullptr, true}; + + ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, &desc, + &updatable_cmd_buf_handle)); + ASSERT_NE(updatable_cmd_buf_handle, nullptr); + } + + void TearDown() override { + if (updatable_cmd_buf_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseExp(updatable_cmd_buf_handle)); + } + UUR_RETURN_ON_FATAL_FAILURE( + urCommandBufferExpExecutionTest::TearDown()); + } + + ur_exp_command_buffer_handle_t updatable_cmd_buf_handle = nullptr; +}; + +struct urCommandBufferCommandExpTest + : urUpdatableCommandBufferExpExecutionTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::SetUp()); + + // Append 2 kernel commands to command-buffer and close command-buffer + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &global_size, &local_size, 0, nullptr, nullptr, &command_handle)); + ASSERT_NE(command_handle, nullptr); + + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &global_size, &local_size, 0, nullptr, nullptr, &command_handle_2)); + ASSERT_NE(command_handle_2, nullptr); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + + void TearDown() override { + if (command_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); + } + + if (command_handle_2) { + EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle_2)); + } + + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::TearDown()); + } + + static constexpr size_t local_size = 4; + static constexpr size_t global_size = 32; + static constexpr size_t global_offset = 0; + static constexpr size_t n_dimensions = 1; + + ur_exp_command_buffer_command_handle_t command_handle = nullptr; + ur_exp_command_buffer_command_handle_t command_handle_2 = nullptr; +}; +} // namespace command_buffer +} // namespace uur + +#endif // UR_CONFORMANCE_EVENT_COMMAND_BUFFER_H_INCLUDED diff --git a/test/conformance/exp_command_buffer/invalid_update.cpp b/test/conformance/exp_command_buffer/invalid_update.cpp new file mode 100644 index 0000000000..00cf04ea85 --- /dev/null +++ b/test/conformance/exp_command_buffer/invalid_update.cpp @@ -0,0 +1,161 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "fixtures.h" +#include + +// Negative tests that correct error codes are thrown on invalid update usage. +struct InvalidUpdateTest + : uur::command_buffer::urUpdatableCommandBufferExpExecutionTest { + void SetUp() override { + program_name = "fill_usm"; + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::SetUp()); + + ur_device_usm_access_capability_flags_t shared_usm_flags; + ASSERT_SUCCESS( + uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); + if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { + GTEST_SKIP() << "Shared USM is not supported."; + } + + // Allocate USM pointer to fill + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &shared_ptr)); + ASSERT_NE(shared_ptr, nullptr); + std::memset(shared_ptr, 0, allocation_size); + + // Index 0 is output + ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 0, nullptr, &shared_ptr)); + // Index 1 is input scalar + ASSERT_SUCCESS( + urKernelSetArgValue(kernel, 1, sizeof(val), nullptr, &val)); + + // Append kernel command to command-buffer + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &global_size, &local_size, 0, nullptr, nullptr, &command_handle)); + ASSERT_NE(command_handle, nullptr); + } + + void TearDown() override { + if (shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); + } + + if (command_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); + } + + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::TearDown()); + } + + static constexpr uint32_t val = 42; + static constexpr size_t local_size = 4; + static constexpr size_t global_size = 32; + static constexpr size_t global_offset = 0; + static constexpr size_t n_dimensions = 1; + static constexpr size_t allocation_size = sizeof(val) * global_size; + void *shared_ptr = nullptr; + ur_exp_command_buffer_command_handle_t command_handle = nullptr; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(InvalidUpdateTest); + +// Test error code is returned if command-buffer not finalized +TEST_P(InvalidUpdateTest, NotFinalizedCommandBuffer) { + // Set new value to use for fill at kernel index 1 + uint32_t new_val = 33; + ur_exp_command_buffer_update_value_arg_desc_t new_input_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 1, // argIndex + sizeof(new_val), // argSize + nullptr, // pProperties + &new_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 1, // numNewValueArgs + 0, // numNewExecInfos + 0, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + &new_input_desc, // pNewValueArgList + nullptr, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Update command to command-buffer that has not been finalized + ur_result_t result = + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc); + ASSERT_EQ(UR_RESULT_ERROR_INVALID_OPERATION, result); +} + +// Test error code is returned if command-buffer not created with isUpdatable +TEST_P(InvalidUpdateTest, NotUpdatableCommandBuffer) { + // Create a command-buffer without isUpdatable + ur_exp_command_buffer_handle_t test_cmd_buf_handle = nullptr; + ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, nullptr, + &test_cmd_buf_handle)); + EXPECT_NE(test_cmd_buf_handle, nullptr); + + // Append a kernel commands to command-buffer and close command-buffer + ur_exp_command_buffer_command_handle_t test_command_handle = nullptr; + EXPECT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + test_cmd_buf_handle, kernel, n_dimensions, &global_offset, &global_size, + &local_size, 0, nullptr, nullptr, &test_command_handle)); + EXPECT_NE(test_command_handle, nullptr); + + EXPECT_SUCCESS(urCommandBufferFinalizeExp(test_cmd_buf_handle)); + + // Set new value to use for fill at kernel index 1 + uint32_t new_val = 33; + ur_exp_command_buffer_update_value_arg_desc_t new_input_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 1, // argIndex + sizeof(new_val), // argSize + nullptr, // pProperties + &new_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 1, // numNewValueArgs + 0, // numNewExecInfos + 0, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + &new_input_desc, // pNewValueArgList + nullptr, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Update command to command-buffer that doesn't have updatable set should + // be an error + ur_result_t result = + urCommandBufferUpdateKernelLaunchExp(test_command_handle, &update_desc); + EXPECT_EQ(UR_RESULT_ERROR_INVALID_OPERATION, result); + + if (test_command_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(test_command_handle)); + } + if (test_cmd_buf_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseExp(test_cmd_buf_handle)); + } +} diff --git a/test/conformance/exp_command_buffer/ndrange_update.cpp b/test/conformance/exp_command_buffer/ndrange_update.cpp new file mode 100644 index 0000000000..e5631f9176 --- /dev/null +++ b/test/conformance/exp_command_buffer/ndrange_update.cpp @@ -0,0 +1,248 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "fixtures.h" +#include + +// Test that updating a command-buffer with a single kernel command +// in a way that changes the NDRange configuration. +struct NDRangeUpdateTest + : uur::command_buffer::urUpdatableCommandBufferExpExecutionTest { + void SetUp() override { + program_name = "indexers_usm"; + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::SetUp()); + + ur_device_usm_access_capability_flags_t shared_usm_flags; + ASSERT_SUCCESS( + uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); + if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { + GTEST_SKIP() << "Shared USM is not supported."; + } + + // Allocate a USM pointer for use as kernel output at index 0 + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &shared_ptr)); + ASSERT_NE(shared_ptr, nullptr); + std::memset(shared_ptr, 0, allocation_size); + + ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 0, nullptr, &shared_ptr)); + + // Add a 3 dimension kernel command to command-buffer and close + // command-buffer + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, + global_offset.data(), global_size.data(), local_size.data(), 0, + nullptr, nullptr, &command_handle)); + ASSERT_NE(command_handle, nullptr); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + + // For each work-item the kernel prints the global id and local id in each + // of the 3 dimensions to an offset in the output based on global linear + // id. + void Validate(std::array global_size, + std::array local_size, + std::array global_offset) { + // DPC++ swaps the X & Z dimension for 3 Dimensional kernels + // between those set by user and SPIR-V builtins. + // See `ReverseRangeDimensionsForKernel()` in commands.cpp + + std::swap(global_size[0], global_size[2]); + std::swap(local_size[0], local_size[2]); + std::swap(global_offset[0], global_offset[2]); + + // Verify global ID and local ID of each work item + for (size_t x = 0; x < global_size[0]; x++) { + for (size_t y = 0; y < global_size[1]; y++) { + for (size_t z = 0; z < global_size[2]; z++) { + const size_t global_linear_id = + z + (y * global_size[2]) + + (x * global_size[1] * global_size[0]); + int *wi_ptr = (int *)shared_ptr + + (elements_per_id * global_linear_id); + + const int global_id_x = wi_ptr[0]; + const int global_id_y = wi_ptr[1]; + const int global_id_z = wi_ptr[2]; + + EXPECT_EQ(global_id_x, x + global_offset[0]); + EXPECT_EQ(global_id_y, y + global_offset[1]); + EXPECT_EQ(global_id_z, z + global_offset[2]); + + const int local_id_x = wi_ptr[3]; + const int local_id_y = wi_ptr[4]; + const int local_id_z = wi_ptr[5]; + + EXPECT_EQ(local_id_x, x % local_size[0]); + EXPECT_EQ(local_id_y, y % local_size[1]); + EXPECT_EQ(local_id_z, z % local_size[2]); + } + } + } + } + + void TearDown() override { + if (shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); + } + + if (command_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); + } + + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::TearDown()); + } + + static constexpr size_t elements_per_id = 6; + static constexpr size_t n_dimensions = 3; + static constexpr std::array global_size = {8, 8, 8}; + static constexpr std::array local_size = {1, 2, 2}; + static constexpr std::array global_offset = {0, 4, 4}; + static constexpr size_t allocation_size = sizeof(int) * elements_per_id * + global_size[0] * global_size[1] * + global_size[2]; + void *shared_ptr = nullptr; + ur_exp_command_buffer_command_handle_t command_handle = nullptr; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(NDRangeUpdateTest); + +// Keep the kernel work dimensions as 3, and update local size and global +// offset. +TEST_P(NDRangeUpdateTest, Update3D) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + Validate(global_size, local_size, global_offset); + + // Set local size and global offset to update to + std::array new_local_size = {4, 2, 2}; + std::array new_global_offset = {3, 2, 1}; + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + 0, // numNewExecInfos + 3, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + nullptr, // pNewExecInfoList + new_global_offset.data(), // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + new_local_size.data(), // pNewLocalWorkSize + }; + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + Validate(global_size, new_local_size, new_global_offset); +} + +// Update the kernel work dimensions to 2, and update global size, local size, +// and global offset to new values. +TEST_P(NDRangeUpdateTest, Update2D) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + Validate(global_size, local_size, global_offset); + + // Set ND-Range configuration to update to + std::array new_global_size = {6, 6, 1}; + std::array new_local_size = {3, 3, 1}; + std::array new_global_offset = {3, 3, 0}; + + // Set dimensions as 2 + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + 0, // numNewExecInfos + 2, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + nullptr, // pNewExecInfoList + new_global_offset.data(), // pNewGlobalWorkOffset + new_global_size.data(), // pNewGlobalWorkSize + new_local_size.data(), // pNewLocalWorkSize + }; + + // Reset output to remove old values which will no longer have a + // work-item to overwrite them + std::memset(shared_ptr, 0, allocation_size); + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + Validate(new_global_size, new_local_size, new_global_offset); +} + +// Update the kernel work dimensions to 1, and check that previously +// set global size, local size, and global offset update accordingly. +TEST_P(NDRangeUpdateTest, Update1D) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + Validate(global_size, local_size, global_offset); + + // Set dimensions to 1 + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + 0, // numNewExecInfos + 1, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + nullptr, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Reset output to remove old values which will no longer have a + // work-item to overwrite them + std::memset(shared_ptr, 0, allocation_size); + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + std::array new_global_size = {global_size[0], 1, 1}; + std::array new_local_size = {local_size[0], 1, 1}; + std::array new_global_offset = {global_offset[0], 0, 0}; + Validate(new_global_size, new_local_size, new_global_offset); +} diff --git a/test/conformance/exp_command_buffer/release.cpp b/test/conformance/exp_command_buffer/release.cpp new file mode 100644 index 0000000000..47b6124f74 --- /dev/null +++ b/test/conformance/exp_command_buffer/release.cpp @@ -0,0 +1,74 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "fixtures.h" + +using urCommandBufferReleaseExpTest = + uur::command_buffer::urCommandBufferExpTest; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urCommandBufferReleaseExpTest); + +TEST_P(urCommandBufferReleaseExpTest, Success) { + ASSERT_SUCCESS(urCommandBufferRetainExp(cmd_buf_handle)); + + uint32_t prev_ref_count = 0; + ASSERT_SUCCESS( + uur::GetObjectReferenceCount(cmd_buf_handle, prev_ref_count)); + + ASSERT_SUCCESS(urCommandBufferReleaseExp(cmd_buf_handle)); + + uint32_t ref_count = 0; + ASSERT_SUCCESS(uur::GetObjectReferenceCount(cmd_buf_handle, ref_count)); + + ASSERT_GT(prev_ref_count, ref_count); +} + +TEST_P(urCommandBufferReleaseExpTest, InvalidNullHandle) { + ASSERT_EQ_RESULT(urCommandBufferReleaseExp(nullptr), + UR_RESULT_ERROR_INVALID_NULL_HANDLE); +} + +using urCommandBufferReleaseCommandExpTest = + uur::command_buffer::urCommandBufferCommandExpTest; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urCommandBufferReleaseCommandExpTest); + +TEST_P(urCommandBufferReleaseCommandExpTest, Success) { + ASSERT_SUCCESS(urCommandBufferRetainCommandExp(command_handle)); + + uint32_t prev_ref_count = 0; + ASSERT_SUCCESS( + uur::GetObjectReferenceCount(command_handle, prev_ref_count)); + + ASSERT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); + + uint32_t ref_count = 0; + ASSERT_SUCCESS(uur::GetObjectReferenceCount(command_handle, ref_count)); + + ASSERT_GT(prev_ref_count, ref_count); +} + +TEST_P(urCommandBufferReleaseCommandExpTest, ReleaseCmdBufBeforeHandle) { + ASSERT_SUCCESS(urCommandBufferReleaseExp(updatable_cmd_buf_handle)); + updatable_cmd_buf_handle = nullptr; + ASSERT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); + command_handle = nullptr; +} + +TEST_P(urCommandBufferReleaseCommandExpTest, ReleaseCmdBufMultipleHandles) { + ASSERT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); + command_handle = nullptr; + + ASSERT_SUCCESS(urCommandBufferReleaseExp(updatable_cmd_buf_handle)); + updatable_cmd_buf_handle = nullptr; + + ASSERT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle_2)); + command_handle_2 = nullptr; +} + +TEST_P(urCommandBufferReleaseCommandExpTest, InvalidNullHandle) { + ASSERT_EQ_RESULT(urCommandBufferReleaseCommandExp(nullptr), + UR_RESULT_ERROR_INVALID_NULL_HANDLE); +} diff --git a/test/conformance/exp_command_buffer/retain.cpp b/test/conformance/exp_command_buffer/retain.cpp new file mode 100644 index 0000000000..f2b716fa23 --- /dev/null +++ b/test/conformance/exp_command_buffer/retain.cpp @@ -0,0 +1,56 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "fixtures.h" + +using urCommandBufferRetainExpTest = + uur::command_buffer::urCommandBufferExpTest; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urCommandBufferRetainExpTest); + +TEST_P(urCommandBufferRetainExpTest, Success) { + uint32_t prev_ref_count = 0; + ASSERT_SUCCESS( + uur::GetObjectReferenceCount(cmd_buf_handle, prev_ref_count)); + + ASSERT_SUCCESS(urCommandBufferRetainExp(cmd_buf_handle)); + + uint32_t ref_count = 0; + ASSERT_SUCCESS(uur::GetObjectReferenceCount(cmd_buf_handle, ref_count)); + + ASSERT_LT(prev_ref_count, ref_count); + + ASSERT_SUCCESS(urCommandBufferReleaseExp(cmd_buf_handle)); +} + +TEST_P(urCommandBufferRetainExpTest, InvalidNullHandle) { + ASSERT_EQ_RESULT(urCommandBufferRetainExp(nullptr), + UR_RESULT_ERROR_INVALID_NULL_HANDLE); +} + +using urCommandBufferRetainCommandExpTest = + uur::command_buffer::urCommandBufferCommandExpTest; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urCommandBufferRetainCommandExpTest); + +TEST_P(urCommandBufferRetainCommandExpTest, Success) { + uint32_t prev_ref_count = 0; + ASSERT_SUCCESS( + uur::GetObjectReferenceCount(command_handle, prev_ref_count)); + + ASSERT_SUCCESS(urCommandBufferRetainCommandExp(command_handle)); + + uint32_t ref_count = 0; + ASSERT_SUCCESS(uur::GetObjectReferenceCount(command_handle, ref_count)); + + ASSERT_LT(prev_ref_count, ref_count); + + ASSERT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); +} + +TEST_P(urCommandBufferRetainCommandExpTest, InvalidNullHandle) { + ASSERT_EQ_RESULT(urCommandBufferRetainCommandExp(nullptr), + UR_RESULT_ERROR_INVALID_NULL_HANDLE); +} diff --git a/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp new file mode 100644 index 0000000000..7e6cab6ee3 --- /dev/null +++ b/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp @@ -0,0 +1,378 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "fixtures.h" +#include + +// Test that updating a command-buffer with a single kernel command +// taking USM arguments works correctly. +struct USMFillCommandTest + : uur::command_buffer::urUpdatableCommandBufferExpExecutionTest { + void SetUp() override { + program_name = "fill_usm"; + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::SetUp()); + + ur_device_usm_access_capability_flags_t shared_usm_flags; + ASSERT_SUCCESS( + uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); + if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { + GTEST_SKIP() << "Shared USM is not supported."; + } + + // Allocate USM pointer to fill + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &shared_ptr)); + ASSERT_NE(shared_ptr, nullptr); + std::memset(shared_ptr, 0, allocation_size); + + // Index 0 is output + ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 0, nullptr, &shared_ptr)); + // Index 1 is input scalar + ASSERT_SUCCESS( + urKernelSetArgValue(kernel, 1, sizeof(val), nullptr, &val)); + + // Append kernel command to command-buffer and close command-buffer + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &global_size, &local_size, 0, nullptr, nullptr, &command_handle)); + ASSERT_NE(command_handle, nullptr); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + + void Validate(uint32_t *pointer, size_t length, uint32_t val) { + for (size_t i = 0; i < length; i++) { + ASSERT_EQ(pointer[i], val); + } + } + + void TearDown() override { + if (shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); + } + + if (new_shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, new_shared_ptr)); + } + + if (command_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); + } + + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::TearDown()); + } + + static constexpr uint32_t val = 42; + static constexpr size_t local_size = 4; + static constexpr size_t global_size = 32; + static constexpr size_t global_offset = 0; + static constexpr size_t n_dimensions = 1; + static constexpr size_t allocation_size = sizeof(val) * global_size; + void *shared_ptr = nullptr; + void *new_shared_ptr = nullptr; + ur_exp_command_buffer_command_handle_t command_handle = nullptr; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(USMFillCommandTest); + +// Test using a different global size to fill and larger USM output buffer +TEST_P(USMFillCommandTest, UpdateParameters) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + Validate((uint32_t *)shared_ptr, global_size, val); + + // Allocate a new USM pointer of larger size + size_t new_global_size = 64; + const size_t new_allocation_size = sizeof(val) * new_global_size; + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + new_allocation_size, &new_shared_ptr)); + ASSERT_NE(new_shared_ptr, nullptr); + std::memset(new_shared_ptr, 0, new_allocation_size); + + // Set new USM pointer as kernel output at index 0 + ur_exp_command_buffer_update_pointer_arg_desc_t new_output_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 0, // argIndex + nullptr, // pProperties + &new_shared_ptr, // pArgValue + }; + + // Set new value to use for fill at kernel index 1 + uint32_t new_val = 33; + ur_exp_command_buffer_update_value_arg_desc_t new_input_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 1, // argIndex + sizeof(new_val), // argSize + nullptr, // pProperties + &new_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 1, // numNewPointerArgs + 1, // numNewValueArgs + 0, // numNewExecInfos + 0, // newWorkDim + nullptr, // pNewMemObjArgList + &new_output_desc, // pNewPointerArgList + &new_input_desc, // pNewValueArgList + nullptr, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + &new_global_size, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + Validate((uint32_t *)new_shared_ptr, new_global_size, new_val); +} + +// Test updating the kernel execution info +TEST_P(USMFillCommandTest, UpdateExecInfo) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + Validate((uint32_t *)shared_ptr, global_size, val); + + ur_exp_command_buffer_update_exec_info_desc_t new_exec_info_descs[3]; + + // Update direct access flag + bool indirect_access = false; + new_exec_info_descs[0] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC, // stype + nullptr, // pNext + UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS, // propName + sizeof(indirect_access), // propSize + nullptr, // pProperties + &indirect_access, // pPropValue + }; + + // Update cache config + ur_kernel_cache_config_t cache_config = UR_KERNEL_CACHE_CONFIG_DEFAULT; + new_exec_info_descs[1] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC, // stype + nullptr, // pNext + UR_KERNEL_EXEC_INFO_CACHE_CONFIG, // propName + sizeof(cache_config), // propSize + nullptr, // pProperties + &cache_config, // pPropValue + }; + + // Create a new USM allocation to set indirect access for + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &new_shared_ptr)); + ASSERT_NE(new_shared_ptr, nullptr); + void *pointers = {new_shared_ptr}; + new_exec_info_descs[2] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC, // stype + nullptr, // pNext + UR_KERNEL_EXEC_INFO_USM_PTRS, // propName + sizeof(pointers), // propSize + nullptr, // pProperties + &pointers, // pPropValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + 3, // numNewExecInfos + 0, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + new_exec_info_descs, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify results are correct, although exec info modifications should + // have no effect on output + Validate((uint32_t *)shared_ptr, global_size, val); +} + +// Test updating a command-buffer with multiple USM fill kernel commands +struct USMMultipleFillCommandTest + : uur::command_buffer::urUpdatableCommandBufferExpExecutionTest { + void SetUp() override { + program_name = "fill_usm"; + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::SetUp()); + + ur_device_usm_access_capability_flags_t shared_usm_flags; + ASSERT_SUCCESS( + uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); + if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { + GTEST_SKIP() << "Shared USM is not supported."; + } + + // Create a single USM allocation which will be used by all kernels + // by accessing at pointer offsets + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &shared_ptr)); + ASSERT_NE(shared_ptr, nullptr); + std::memset(shared_ptr, 0, allocation_size); + + // Append multiple kernel commands to command-buffer + for (size_t k = 0; k < num_kernels; k++) { + // Calculate offset into output allocation, and set as + // kernel output. + void *offset_ptr = (uint32_t *)shared_ptr + (k * elements); + ASSERT_SUCCESS( + urKernelSetArgPointer(kernel, 0, nullptr, &offset_ptr)); + + // Each kernel has a unique fill value + uint32_t fill_val = val + k; + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(fill_val), + nullptr, &fill_val)); + + // Append kernel and store returned handle + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &elements, &local_size, 0, nullptr, nullptr, + &command_handles[k])); + ASSERT_NE(command_handles[k], nullptr); + } + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + + void Validate(uint32_t *pointer, size_t length, uint32_t val) { + for (size_t i = 0; i < length; i++) { + ASSERT_EQ(pointer[i], val); + } + } + + void TearDown() override { + if (shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); + } + + if (new_shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, new_shared_ptr)); + } + + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::TearDown()); + } + + static constexpr uint32_t val = 42; + static constexpr size_t local_size = 4; + static constexpr size_t global_size = 64; + static constexpr size_t global_offset = 0; + static constexpr size_t n_dimensions = 1; + static constexpr size_t allocation_size = sizeof(val) * global_size; + static constexpr size_t num_kernels = 8; + static constexpr size_t elements = global_size / num_kernels; + + void *shared_ptr = nullptr; + void *new_shared_ptr = nullptr; + std::array + command_handles; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(USMMultipleFillCommandTest); + +// Test updating all the kernels commands in the command-buffer +TEST_P(USMMultipleFillCommandTest, UpdateAllKernels) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + uint32_t *output = (uint32_t *)shared_ptr; + for (size_t i = 0; i < global_size; i++) { + const uint32_t expected = val + (i / elements); + ASSERT_EQ(expected, output[i]); + } + + // Create a new USM allocation to update kernel outputs to + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &new_shared_ptr)); + ASSERT_NE(new_shared_ptr, nullptr); + std::memset(new_shared_ptr, 0, allocation_size); + + // Update each kernel in the command-buffer. + uint32_t new_val = 33; + for (size_t k = 0; k < num_kernels; k++) { + // Update output pointer to an offset into new USM allocation + void *offset_ptr = (uint32_t *)new_shared_ptr + (k * elements); + ur_exp_command_buffer_update_pointer_arg_desc_t new_output_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 0, // argIndex + nullptr, // pProperties + &offset_ptr, // pArgValue + }; + + // Update fill value + uint32_t new_fill_val = new_val + k; + ur_exp_command_buffer_update_value_arg_desc_t new_input_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 1, // argIndex + sizeof(int), // argSize + nullptr, // pProperties + &new_fill_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 1, // numNewPointerArgs + 1, // numNewValueArgs + 0, // numNewExecInfos + 0, // newWorkDim + nullptr, // pNewMemObjArgList + &new_output_desc, // pNewPointerArgList + &new_input_desc, // pNewValueArgList + nullptr, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(command_handles[k], + &update_desc)); + } + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + uint32_t *updated_output = (uint32_t *)new_shared_ptr; + for (size_t i = 0; i < global_size; i++) { + uint32_t expected = new_val + (i / elements); + ASSERT_EQ(expected, updated_output[i]) << i; + } +} diff --git a/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp new file mode 100644 index 0000000000..b3f9f93fe1 --- /dev/null +++ b/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp @@ -0,0 +1,164 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "fixtures.h" +#include + +// Test that updating a command-buffer with a single kernel command +// taking USM & scalar arguments works correctly. +struct USMSaxpyKernelTest + : uur::command_buffer::urUpdatableCommandBufferExpExecutionTest { + void SetUp() override { + program_name = "saxpy_usm"; + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::SetUp()); + + ur_device_usm_access_capability_flags_t shared_usm_flags; + ASSERT_SUCCESS( + uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); + if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { + GTEST_SKIP() << "Shared USM is not supported."; + } + + const size_t allocation_size = sizeof(uint32_t) * global_size; + for (auto &shared_ptr : shared_ptrs) { + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &shared_ptr)); + ASSERT_NE(shared_ptr, nullptr); + + std::vector pattern(allocation_size); + uur::generateMemFillPattern(pattern); + std::memcpy(shared_ptr, pattern.data(), allocation_size); + } + + // Index 0 is output + ASSERT_SUCCESS( + urKernelSetArgPointer(kernel, 0, nullptr, &shared_ptrs[0])); + // Index 1 is A + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(A), nullptr, &A)); + // Index 2 is X + ASSERT_SUCCESS( + urKernelSetArgPointer(kernel, 2, nullptr, &shared_ptrs[1])); + // Index 3 is Y + ASSERT_SUCCESS( + urKernelSetArgPointer(kernel, 3, nullptr, &shared_ptrs[2])); + + // Append kernel command to command-buffer and close command-buffer + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &global_size, &local_size, 0, nullptr, nullptr, &command_handle)); + ASSERT_NE(command_handle, nullptr); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + + void Validate(uint32_t *output, uint32_t *X, uint32_t *Y, uint32_t A, + size_t length) { + for (size_t i = 0; i < length; i++) { + uint32_t result = A * X[i] + Y[i]; + ASSERT_EQ(result, output[i]); + } + } + + void TearDown() override { + for (auto &shared_ptr : shared_ptrs) { + if (shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); + } + } + + if (command_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); + } + + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::TearDown()); + } + + static constexpr size_t local_size = 4; + static constexpr size_t global_size = 32; + static constexpr size_t global_offset = 0; + static constexpr size_t n_dimensions = 1; + static constexpr uint32_t A = 42; + std::array shared_ptrs = {nullptr, nullptr, nullptr, nullptr}; + ur_exp_command_buffer_command_handle_t command_handle = nullptr; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(USMSaxpyKernelTest); + +TEST_P(USMSaxpyKernelTest, UpdateParameters) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + uint32_t *output = (uint32_t *)shared_ptrs[0]; + uint32_t *X = (uint32_t *)shared_ptrs[1]; + uint32_t *Y = (uint32_t *)shared_ptrs[2]; + Validate(output, X, Y, A, global_size); + + // Update inputs + ur_exp_command_buffer_update_pointer_arg_desc_t new_input_descs[2]; + + // New X at index 2 + new_input_descs[0] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + nullptr, // pProperties + &shared_ptrs[3], // pArgValue + }; + + // New Y at index 3 + new_input_descs[1] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 3, // argIndex + nullptr, // pProperties + &shared_ptrs[4], // pArgValue + }; + + // New A at index 1 + uint32_t new_A = 33; + ur_exp_command_buffer_update_value_arg_desc_t new_A_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 1, // argIndex + sizeof(new_A), // argSize + nullptr, // pProperties + &new_A, // hArgValue + }; + + // Update kernel inputs + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 2, // numNewPointerArgs + 1, // numNewValueArgs + 0, // numNewExecInfos + 0, // newWorkDim + nullptr, // pNewMemObjArgList + new_input_descs, // pNewPointerArgList + &new_A_desc, // pNewValueArgList + nullptr, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + uint32_t *new_output = (uint32_t *)shared_ptrs[0]; + uint32_t *new_X = (uint32_t *)shared_ptrs[3]; + uint32_t *new_Y = (uint32_t *)shared_ptrs[4]; + Validate(new_output, new_X, new_Y, new_A, global_size); +} diff --git a/test/conformance/testing/include/uur/utils.h b/test/conformance/testing/include/uur/utils.h index 4b7649559f..79620e4b11 100644 --- a/test/conformance/testing/include/uur/utils.h +++ b/test/conformance/testing/include/uur/utils.h @@ -118,6 +118,21 @@ auto GetPoolInfo = return GetInfo(pool, info, urUSMPoolGetInfo, out_value); }; +template +auto GetCommandBufferInfo = [](ur_exp_command_buffer_handle_t cmd_buf, + ur_exp_command_buffer_info_t info, + T &out_value) { + return GetInfo(cmd_buf, info, urCommandBufferGetInfoExp, out_value); +}; + +template +auto GetCommandBufferCommandInfo = + [](ur_exp_command_buffer_command_handle_t command, + ur_exp_command_buffer_command_info_t info, T &out_value) { + return GetInfo(command, info, urCommandBufferCommandGetInfoExp, + out_value); + }; + template ur_result_t GetObjectReferenceCount(T object, uint32_t &out_ref_count) { if constexpr (std::is_same_v) { @@ -152,6 +167,16 @@ ur_result_t GetObjectReferenceCount(T object, uint32_t &out_ref_count) { return GetPoolInfo(object, UR_USM_POOL_INFO_REFERENCE_COUNT, out_ref_count); } + if constexpr (std::is_same_v) { + return GetCommandBufferInfo( + object, UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT, out_ref_count); + } + if constexpr (std::is_same_v) { + return GetCommandBufferCommandInfo( + object, UR_EXP_COMMAND_BUFFER_COMMAND_INFO_REFERENCE_COUNT, + out_ref_count); + } + return UR_RESULT_ERROR_INVALID_VALUE; } diff --git a/tools/urinfo/urinfo.hpp b/tools/urinfo/urinfo.hpp index ff024978ca..15894cafb8 100644 --- a/tools/urinfo/urinfo.hpp +++ b/tools/urinfo/urinfo.hpp @@ -329,6 +329,12 @@ inline void printDeviceInfos(ur_device_handle_t hDevice, printDeviceInfo(hDevice, UR_DEVICE_INFO_COMPOSITE_DEVICE); std::cout << prefix; + printDeviceInfo(hDevice, + UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP); + std::cout << prefix; + printDeviceInfo( + hDevice, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP); + std::cout << prefix; printDeviceInfo(hDevice, UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP); std::cout << prefix;