From a68244ab2b1d97b7900e00c0bf0a95a01ce6a405 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Fri, 9 Aug 2024 12:03:23 +0100 Subject: [PATCH] Add event update to command-buffers Expand the command-buffer experimental feature API so that it can be used to implement [SYCL-Graph dynamic events](https://github.com/reble/llvm/pull/372). This involves extending each command append entry-point to include the following extra parameters: * An output `ur_exp_command_buffer_command_handle_t`. * An Input `ur_event_handle_t` event wait-list of dependent events. * An output `ur_event_handle_t` event that is signaled when the command completes its next execution. New entry-points are also added to update the wait-list and signal event parameters of commands: * `urCommandBufferUpdateSignalEventExp` * `urCommandBufferUpdateWaitEventsExp` APIs implemented for CUDA adapter with CTS tests. --- include/ur_api.h | 375 ++++- include/ur_ddi.h | 84 +- include/ur_print.h | 16 + include/ur_print.hpp | 492 +++++- scripts/core/EXP-COMMAND-BUFFER.rst | 160 +- scripts/core/exp-command-buffer.yml | 292 +++- scripts/core/registry.yml | 80 +- source/adapters/cuda/command_buffer.cpp | 547 ++++++- source/adapters/cuda/command_buffer.hpp | 242 ++- source/adapters/cuda/device.cpp | 1 + source/adapters/cuda/ur_interface_loader.cpp | 3 +- source/adapters/hip/command_buffer.cpp | 111 +- source/adapters/hip/device.cpp | 2 + source/adapters/hip/ur_interface_loader.cpp | 2 + source/adapters/level_zero/command_buffer.cpp | 112 +- source/adapters/level_zero/device.cpp | 10 +- .../level_zero/ur_interface_loader.cpp | 2 + source/adapters/mock/ur_mockddi.cpp | 447 ++++- source/adapters/native_cpu/command_buffer.cpp | 64 +- source/adapters/native_cpu/device.cpp | 1 + .../native_cpu/ur_interface_loader.cpp | 2 + source/adapters/opencl/command_buffer.cpp | 92 +- source/adapters/opencl/device.cpp | 2 + .../adapters/opencl/ur_interface_loader.cpp | 2 + source/loader/layers/tracing/ur_trcddi.cpp | 367 ++++- source/loader/layers/validation/ur_valddi.cpp | 472 +++++- source/loader/loader.def.in | 4 + source/loader/loader.map.in | 4 + source/loader/ur_ldrddi.cpp | 718 +++++++- source/loader/ur_libapi.cpp | 377 ++++- source/loader/ur_print.cpp | 16 + source/ur_api.cpp | 311 +++- .../exp_command_buffer/CMakeLists.txt | 2 + .../buffer_fill_kernel_update.cpp | 3 +- .../buffer_saxpy_kernel_update.cpp | 3 +- .../exp_command_buffer/commands.cpp | 28 +- .../exp_command_buffer/event_sync.cpp | 1438 +++++++++++++++++ .../event_sync_kernel_command.cpp | 531 ++++++ ...command_buffer_adapter_level_zero_v2.match | 146 +- ...xp_command_buffer_adapter_native_cpu.match | 7 + test/conformance/exp_command_buffer/fill.cpp | 12 +- .../conformance/exp_command_buffer/fixtures.h | 6 +- .../exp_command_buffer/invalid_update.cpp | 9 +- .../exp_command_buffer/ndrange_update.cpp | 2 +- .../usm_fill_kernel_update.cpp | 7 +- .../usm_saxpy_kernel_update.cpp | 7 +- tools/urinfo/urinfo.hpp | 3 + 47 files changed, 7049 insertions(+), 565 deletions(-) create mode 100644 test/conformance/exp_command_buffer/event_sync.cpp create mode 100644 test/conformance/exp_command_buffer/event_sync_kernel_command.cpp diff --git a/include/ur_api.h b/include/ur_api.h index cc693c50f6..bdd4547657 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -150,7 +150,6 @@ typedef enum ur_function_t { UR_FUNCTION_COMMAND_BUFFER_RELEASE_EXP = 122, ///< Enumerator for ::urCommandBufferReleaseExp UR_FUNCTION_COMMAND_BUFFER_FINALIZE_EXP = 123, ///< Enumerator for ::urCommandBufferFinalizeExp UR_FUNCTION_COMMAND_BUFFER_APPEND_KERNEL_LAUNCH_EXP = 125, ///< Enumerator for ::urCommandBufferAppendKernelLaunchExp - UR_FUNCTION_COMMAND_BUFFER_ENQUEUE_EXP = 128, ///< Enumerator for ::urCommandBufferEnqueueExp UR_FUNCTION_USM_PITCHED_ALLOC_EXP = 132, ///< Enumerator for ::urUSMPitchedAllocExp UR_FUNCTION_BINDLESS_IMAGES_UNSAMPLED_IMAGE_HANDLE_DESTROY_EXP = 133, ///< Enumerator for ::urBindlessImagesUnsampledImageHandleDestroyExp UR_FUNCTION_BINDLESS_IMAGES_SAMPLED_IMAGE_HANDLE_DESTROY_EXP = 134, ///< Enumerator for ::urBindlessImagesSampledImageHandleDestroyExp @@ -199,17 +198,6 @@ typedef enum ur_function_t { UR_FUNCTION_LOADER_CONFIG_SET_CODE_LOCATION_CALLBACK = 200, ///< Enumerator for ::urLoaderConfigSetCodeLocationCallback UR_FUNCTION_LOADER_INIT = 201, ///< Enumerator for ::urLoaderInit UR_FUNCTION_LOADER_TEAR_DOWN = 202, ///< Enumerator for ::urLoaderTearDown - UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP = 203, ///< Enumerator for ::urCommandBufferAppendUSMMemcpyExp - UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_FILL_EXP = 204, ///< Enumerator for ::urCommandBufferAppendUSMFillExp - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_EXP = 205, ///< Enumerator for ::urCommandBufferAppendMemBufferCopyExp - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_EXP = 206, ///< Enumerator for ::urCommandBufferAppendMemBufferWriteExp - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_EXP = 207, ///< Enumerator for ::urCommandBufferAppendMemBufferReadExp - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_RECT_EXP = 208, ///< Enumerator for ::urCommandBufferAppendMemBufferCopyRectExp - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_RECT_EXP = 209, ///< Enumerator for ::urCommandBufferAppendMemBufferWriteRectExp - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_RECT_EXP = 210, ///< Enumerator for ::urCommandBufferAppendMemBufferReadRectExp - UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP = 211, ///< Enumerator for ::urCommandBufferAppendMemBufferFillExp - UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP = 212, ///< Enumerator for ::urCommandBufferAppendUSMPrefetchExp - UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_ADVISE_EXP = 213, ///< Enumerator for ::urCommandBufferAppendUSMAdviseExp UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP = 214, ///< Enumerator for ::urEnqueueCooperativeKernelLaunchExp UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP = 215, ///< Enumerator for ::urKernelSuggestMaxCooperativeGroupCountExp UR_FUNCTION_PROGRAM_GET_GLOBAL_VARIABLE_POINTER = 216, ///< Enumerator for ::urProgramGetGlobalVariablePointer @@ -227,7 +215,21 @@ typedef enum ur_function_t { UR_FUNCTION_ENQUEUE_NATIVE_COMMAND_EXP = 228, ///< Enumerator for ::urEnqueueNativeCommandExp UR_FUNCTION_LOADER_CONFIG_SET_MOCKING_ENABLED = 229, ///< Enumerator for ::urLoaderConfigSetMockingEnabled UR_FUNCTION_BINDLESS_IMAGES_RELEASE_EXTERNAL_MEMORY_EXP = 230, ///< Enumerator for ::urBindlessImagesReleaseExternalMemoryExp - UR_FUNCTION_BINDLESS_IMAGES_MAP_EXTERNAL_LINEAR_MEMORY_EXP = 231, ///< Enumerator for ::urBindlessImagesMapExternalLinearMemoryExp + UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP = 231, ///< Enumerator for ::urCommandBufferAppendUSMMemcpyExp + UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_FILL_EXP = 232, ///< Enumerator for ::urCommandBufferAppendUSMFillExp + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_EXP = 233, ///< Enumerator for ::urCommandBufferAppendMemBufferCopyExp + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_EXP = 234, ///< Enumerator for ::urCommandBufferAppendMemBufferWriteExp + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_EXP = 235, ///< Enumerator for ::urCommandBufferAppendMemBufferReadExp + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_RECT_EXP = 236, ///< Enumerator for ::urCommandBufferAppendMemBufferCopyRectExp + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_RECT_EXP = 237, ///< Enumerator for ::urCommandBufferAppendMemBufferWriteRectExp + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_RECT_EXP = 238, ///< Enumerator for ::urCommandBufferAppendMemBufferReadRectExp + UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP = 239, ///< Enumerator for ::urCommandBufferAppendMemBufferFillExp + UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP = 240, ///< Enumerator for ::urCommandBufferAppendUSMPrefetchExp + UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_ADVISE_EXP = 241, ///< Enumerator for ::urCommandBufferAppendUSMAdviseExp + UR_FUNCTION_COMMAND_BUFFER_ENQUEUE_EXP = 242, ///< Enumerator for ::urCommandBufferEnqueueExp + UR_FUNCTION_COMMAND_BUFFER_UPDATE_SIGNAL_EVENT_EXP = 243, ///< Enumerator for ::urCommandBufferUpdateSignalEventExp + UR_FUNCTION_COMMAND_BUFFER_UPDATE_WAIT_EVENTS_EXP = 244, ///< Enumerator for ::urCommandBufferUpdateWaitEventsExp + UR_FUNCTION_BINDLESS_IMAGES_MAP_EXTERNAL_LINEAR_MEMORY_EXP = 245, ///< Enumerator for ::urBindlessImagesMapExternalLinearMemoryExp /// @cond UR_FUNCTION_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -1629,8 +1631,10 @@ typedef enum ur_device_info_t { ///< `EnqueueDeviceGlobalVariableRead` entry points. UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP = 0x1000, ///< [::ur_bool_t] Returns true if the device supports the use of ///< command-buffers. - UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP = 0x1001, ///< [::ur_bool_t] Returns true if the device supports updating the kernel - ///< commands in a command-buffer. + UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP = 0x1001, ///< [::ur_bool_t] Returns true if the device supports updating commands in + ///< a finalized command-buffer. + UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP = 0x1002, ///< [::ur_bool_t] Returns true if the device supports using event objects + ///< for command synchronization outside of a command-buffer. UR_DEVICE_INFO_CLUSTER_LAUNCH_EXP = 0x1111, ///< [::ur_bool_t] return true if enqueue Cluster Launch is supported UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP = 0x2000, ///< [::ur_bool_t] returns true if the device supports the creation of ///< bindless images @@ -8208,7 +8212,7 @@ typedef struct ur_exp_command_buffer_update_memobj_arg_desc_t { ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC const void *pNext; ///< [in][optional] pointer to extension-specific structure uint32_t argIndex; ///< [in] Argument index. - const ur_kernel_arg_mem_obj_properties_t *pProperties; ///< [in][optinal] Pointer to memory object properties. + const ur_kernel_arg_mem_obj_properties_t *pProperties; ///< [in][optional] Pointer to memory object properties. ur_mem_handle_t hNewMemObjArg; ///< [in][optional] Handle of memory object to set at argument index. } ur_exp_command_buffer_update_memobj_arg_desc_t; @@ -8220,7 +8224,7 @@ typedef struct ur_exp_command_buffer_update_pointer_arg_desc_t { ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC const void *pNext; ///< [in][optional] pointer to extension-specific structure uint32_t argIndex; ///< [in] Argument index. - const ur_kernel_arg_pointer_properties_t *pProperties; ///< [in][optinal] Pointer to USM pointer properties. + const ur_kernel_arg_pointer_properties_t *pProperties; ///< [in][optional] Pointer to USM pointer properties. const void *pNewPointerArg; ///< [in][optional] USM pointer to memory location holding the argument ///< value to set at argument index. @@ -8234,7 +8238,7 @@ typedef struct ur_exp_command_buffer_update_value_arg_desc_t { const void *pNext; ///< [in][optional] pointer to extension-specific structure uint32_t argIndex; ///< [in] Argument index. uint32_t argSize; ///< [in] Argument size. - const ur_kernel_arg_value_properties_t *pProperties; ///< [in][optinal] Pointer to value properties. + const ur_kernel_arg_value_properties_t *pProperties; ///< [in][optional] Pointer to value properties. const void *pNewValueArg; ///< [in][optional] Argument value representing matching kernel arg type to ///< set at argument index. @@ -8391,6 +8395,13 @@ urCommandBufferFinalizeExp( /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` /// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL @@ -8404,7 +8415,14 @@ urCommandBufferAppendKernelLaunchExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t *pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t *phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. ur_exp_command_buffer_command_handle_t *phCommand ///< [out][optional] Handle to this command. ); @@ -8430,6 +8448,13 @@ urCommandBufferAppendKernelLaunchExp( /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` /// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL @@ -8441,7 +8466,15 @@ urCommandBufferAppendUSMMemcpyExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. + ur_exp_command_buffer_sync_point_t *pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t *phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t *phCommand ///< [out][optional] Handle to this command. ); /////////////////////////////////////////////////////////////////////////////// @@ -8468,6 +8501,13 @@ urCommandBufferAppendUSMMemcpyExp( /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` /// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL @@ -8480,7 +8520,15 @@ urCommandBufferAppendUSMFillExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. + ur_exp_command_buffer_sync_point_t *pSyncPoint, ///< [out][optional] sync point associated with this command. + ur_event_handle_t *phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t *phCommand ///< [out][optional] Handle to this command. ); /////////////////////////////////////////////////////////////////////////////// @@ -8501,6 +8549,13 @@ urCommandBufferAppendUSMFillExp( /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` /// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL @@ -8514,7 +8569,15 @@ urCommandBufferAppendMemBufferCopyExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. + ur_exp_command_buffer_sync_point_t *pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t *phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t *phCommand ///< [out][optional] Handle to this command. ); /////////////////////////////////////////////////////////////////////////////// @@ -8536,6 +8599,13 @@ urCommandBufferAppendMemBufferCopyExp( /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` /// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL @@ -8548,7 +8618,15 @@ urCommandBufferAppendMemBufferWriteExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. + ur_exp_command_buffer_sync_point_t *pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t *phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t *phCommand ///< [out][optional] Handle to this command. ); /////////////////////////////////////////////////////////////////////////////// @@ -8570,6 +8648,13 @@ urCommandBufferAppendMemBufferWriteExp( /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` /// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL @@ -8582,7 +8667,15 @@ urCommandBufferAppendMemBufferReadExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. + ur_exp_command_buffer_sync_point_t *pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t *phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t *phCommand ///< [out][optional] Handle to this command. ); /////////////////////////////////////////////////////////////////////////////// @@ -8603,6 +8696,13 @@ urCommandBufferAppendMemBufferReadExp( /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` /// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL @@ -8620,7 +8720,15 @@ urCommandBufferAppendMemBufferCopyRectExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. + ur_exp_command_buffer_sync_point_t *pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t *phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t *phCommand ///< [out][optional] Handle to this command. ); /////////////////////////////////////////////////////////////////////////////// @@ -8642,6 +8750,13 @@ urCommandBufferAppendMemBufferCopyRectExp( /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` /// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL @@ -8662,7 +8777,15 @@ urCommandBufferAppendMemBufferWriteRectExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. + ur_exp_command_buffer_sync_point_t *pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t *phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t *phCommand ///< [out][optional] Handle to this command. ); /////////////////////////////////////////////////////////////////////////////// @@ -8684,6 +8807,13 @@ urCommandBufferAppendMemBufferWriteRectExp( /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` /// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL @@ -8703,7 +8833,15 @@ urCommandBufferAppendMemBufferReadRectExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. + ur_exp_command_buffer_sync_point_t *pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t *phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t *phCommand ///< [out][optional] Handle to this command. ); /////////////////////////////////////////////////////////////////////////////// @@ -8727,6 +8865,13 @@ urCommandBufferAppendMemBufferReadRectExp( /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + If `offset + size` results in an out-of-bounds access. +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL @@ -8740,7 +8885,15 @@ urCommandBufferAppendMemBufferFillExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. + ur_exp_command_buffer_sync_point_t *pSyncPoint, ///< [out][optional] sync point associated with this command. + ur_event_handle_t *phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t *phCommand ///< [out][optional] Handle to this command. ); /////////////////////////////////////////////////////////////////////////////// @@ -8771,6 +8924,13 @@ urCommandBufferAppendMemBufferFillExp( /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `size == 0` /// + If `size` is higher than the allocation size of `pMemory` +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL @@ -8782,7 +8942,15 @@ urCommandBufferAppendUSMPrefetchExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. + ur_exp_command_buffer_sync_point_t *pSyncPoint, ///< [out][optional] sync point associated with this command. + ur_event_handle_t *phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t *phCommand ///< [out][optional] Handle to this command. ); /////////////////////////////////////////////////////////////////////////////// @@ -8813,6 +8981,13 @@ urCommandBufferAppendUSMPrefetchExp( /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `size == 0` /// + If `size` is higher than the allocation size of `pMemory` +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL @@ -8824,7 +8999,15 @@ urCommandBufferAppendUSMAdviseExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. + ur_exp_command_buffer_sync_point_t *pSyncPoint, ///< [out][optional] sync point associated with this command. + ur_event_handle_t *phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t *phCommand ///< [out][optional] Handle to this command. ); /////////////////////////////////////////////////////////////////////////////// @@ -8897,9 +9080,7 @@ urCommandBufferReleaseCommandExp( ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Update a kernel launch command in a finalized command-buffer. This -/// entry-point is synchronous and may block if the command-buffer is -/// executing when the entry-point is called. +/// @brief Update a kernel launch command in a finalized command-buffer. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8919,7 +9100,7 @@ urCommandBufferReleaseCommandExp( /// + If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL. /// + If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value when `hCommand` was created with a NULL local work size. /// + If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value when `hCommand` was created with a non-NULL local work size. -/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP - "If `hCommand` is not a kernel execution command." /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE @@ -8935,6 +9116,68 @@ urCommandBufferUpdateKernelLaunchExp( const ur_exp_command_buffer_update_kernel_launch_desc_t *pUpdateKernelLaunch ///< [in] Struct defining how the kernel command is to be updated. ); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Get a new event that will be signaled the next time the command in the +/// command-buffer executes. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == phSignalEvent` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If COMMAND_BUFFER_EVENT_SUPPORT_EXP is not supported by the device associated with `hCommand`. +/// - ::UR_RESULT_ERROR_INVALID_OPERATION +/// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. +/// + If the command-buffer `hCommand` belongs to has not been finalized. +/// + If no `phEvent` parameter as set on creation of the command associated with `hCommand`. +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferUpdateSignalEventExp( + ur_exp_command_buffer_command_handle_t hCommand, ///< [in] Handle of the command-buffer command to update. + ur_event_handle_t *phSignalEvent ///< [out] Event to be signaled. +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Set the list of wait events for a command to depend on to a list of +/// new events. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If COMMAND_BUFFER_EVENT_SUPPORT_EXP is not supported by the device associated with `hCommand`. +/// - ::UR_RESULT_ERROR_INVALID_OPERATION +/// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. +/// + If the command-buffer `hCommand` belongs to has not been finalized. +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// + If `numEventsInWaitList` does not match the number of wait events set when the command associated with `hCommand` was created. +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferUpdateWaitEventsExp( + ur_exp_command_buffer_command_handle_t hCommand, ///< [in] Handle of the command-buffer command to update. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t *phEventWaitList ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. +); + /////////////////////////////////////////////////////////////////////////////// /// @brief Get command-buffer object information. /// @@ -11517,7 +11760,10 @@ typedef struct ur_command_buffer_append_kernel_launch_exp_params_t { const size_t **ppLocalWorkSize; uint32_t *pnumSyncPointsInWaitList; const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; + uint32_t *pnumEventsInWaitList; + const ur_event_handle_t **pphEventWaitList; ur_exp_command_buffer_sync_point_t **ppSyncPoint; + ur_event_handle_t **pphEvent; ur_exp_command_buffer_command_handle_t **pphCommand; } ur_command_buffer_append_kernel_launch_exp_params_t; @@ -11532,7 +11778,11 @@ typedef struct ur_command_buffer_append_usm_memcpy_exp_params_t { size_t *psize; uint32_t *pnumSyncPointsInWaitList; const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; + uint32_t *pnumEventsInWaitList; + const ur_event_handle_t **pphEventWaitList; ur_exp_command_buffer_sync_point_t **ppSyncPoint; + ur_event_handle_t **pphEvent; + ur_exp_command_buffer_command_handle_t **pphCommand; } ur_command_buffer_append_usm_memcpy_exp_params_t; /////////////////////////////////////////////////////////////////////////////// @@ -11547,7 +11797,11 @@ typedef struct ur_command_buffer_append_usm_fill_exp_params_t { size_t *psize; uint32_t *pnumSyncPointsInWaitList; const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; + uint32_t *pnumEventsInWaitList; + const ur_event_handle_t **pphEventWaitList; ur_exp_command_buffer_sync_point_t **ppSyncPoint; + ur_event_handle_t **pphEvent; + ur_exp_command_buffer_command_handle_t **pphCommand; } ur_command_buffer_append_usm_fill_exp_params_t; /////////////////////////////////////////////////////////////////////////////// @@ -11563,7 +11817,11 @@ typedef struct ur_command_buffer_append_mem_buffer_copy_exp_params_t { size_t *psize; uint32_t *pnumSyncPointsInWaitList; const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; + uint32_t *pnumEventsInWaitList; + const ur_event_handle_t **pphEventWaitList; ur_exp_command_buffer_sync_point_t **ppSyncPoint; + ur_event_handle_t **pphEvent; + ur_exp_command_buffer_command_handle_t **pphCommand; } ur_command_buffer_append_mem_buffer_copy_exp_params_t; /////////////////////////////////////////////////////////////////////////////// @@ -11578,7 +11836,11 @@ typedef struct ur_command_buffer_append_mem_buffer_write_exp_params_t { const void **ppSrc; uint32_t *pnumSyncPointsInWaitList; const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; + uint32_t *pnumEventsInWaitList; + const ur_event_handle_t **pphEventWaitList; ur_exp_command_buffer_sync_point_t **ppSyncPoint; + ur_event_handle_t **pphEvent; + ur_exp_command_buffer_command_handle_t **pphCommand; } ur_command_buffer_append_mem_buffer_write_exp_params_t; /////////////////////////////////////////////////////////////////////////////// @@ -11593,7 +11855,11 @@ typedef struct ur_command_buffer_append_mem_buffer_read_exp_params_t { void **ppDst; uint32_t *pnumSyncPointsInWaitList; const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; + uint32_t *pnumEventsInWaitList; + const ur_event_handle_t **pphEventWaitList; ur_exp_command_buffer_sync_point_t **ppSyncPoint; + ur_event_handle_t **pphEvent; + ur_exp_command_buffer_command_handle_t **pphCommand; } ur_command_buffer_append_mem_buffer_read_exp_params_t; /////////////////////////////////////////////////////////////////////////////// @@ -11613,7 +11879,11 @@ typedef struct ur_command_buffer_append_mem_buffer_copy_rect_exp_params_t { size_t *pdstSlicePitch; uint32_t *pnumSyncPointsInWaitList; const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; + uint32_t *pnumEventsInWaitList; + const ur_event_handle_t **pphEventWaitList; ur_exp_command_buffer_sync_point_t **ppSyncPoint; + ur_event_handle_t **pphEvent; + ur_exp_command_buffer_command_handle_t **pphCommand; } ur_command_buffer_append_mem_buffer_copy_rect_exp_params_t; /////////////////////////////////////////////////////////////////////////////// @@ -11633,7 +11903,11 @@ typedef struct ur_command_buffer_append_mem_buffer_write_rect_exp_params_t { void **ppSrc; uint32_t *pnumSyncPointsInWaitList; const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; + uint32_t *pnumEventsInWaitList; + const ur_event_handle_t **pphEventWaitList; ur_exp_command_buffer_sync_point_t **ppSyncPoint; + ur_event_handle_t **pphEvent; + ur_exp_command_buffer_command_handle_t **pphCommand; } ur_command_buffer_append_mem_buffer_write_rect_exp_params_t; /////////////////////////////////////////////////////////////////////////////// @@ -11653,7 +11927,11 @@ typedef struct ur_command_buffer_append_mem_buffer_read_rect_exp_params_t { void **ppDst; uint32_t *pnumSyncPointsInWaitList; const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; + uint32_t *pnumEventsInWaitList; + const ur_event_handle_t **pphEventWaitList; ur_exp_command_buffer_sync_point_t **ppSyncPoint; + ur_event_handle_t **pphEvent; + ur_exp_command_buffer_command_handle_t **pphCommand; } ur_command_buffer_append_mem_buffer_read_rect_exp_params_t; /////////////////////////////////////////////////////////////////////////////// @@ -11669,7 +11947,11 @@ typedef struct ur_command_buffer_append_mem_buffer_fill_exp_params_t { size_t *psize; uint32_t *pnumSyncPointsInWaitList; const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; + uint32_t *pnumEventsInWaitList; + const ur_event_handle_t **pphEventWaitList; ur_exp_command_buffer_sync_point_t **ppSyncPoint; + ur_event_handle_t **pphEvent; + ur_exp_command_buffer_command_handle_t **pphCommand; } ur_command_buffer_append_mem_buffer_fill_exp_params_t; /////////////////////////////////////////////////////////////////////////////// @@ -11683,7 +11965,11 @@ typedef struct ur_command_buffer_append_usm_prefetch_exp_params_t { ur_usm_migration_flags_t *pflags; uint32_t *pnumSyncPointsInWaitList; const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; + uint32_t *pnumEventsInWaitList; + const ur_event_handle_t **pphEventWaitList; ur_exp_command_buffer_sync_point_t **ppSyncPoint; + ur_event_handle_t **pphEvent; + ur_exp_command_buffer_command_handle_t **pphCommand; } ur_command_buffer_append_usm_prefetch_exp_params_t; /////////////////////////////////////////////////////////////////////////////// @@ -11697,7 +11983,11 @@ typedef struct ur_command_buffer_append_usm_advise_exp_params_t { ur_usm_advice_flags_t *padvice; uint32_t *pnumSyncPointsInWaitList; const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; + uint32_t *pnumEventsInWaitList; + const ur_event_handle_t **pphEventWaitList; ur_exp_command_buffer_sync_point_t **ppSyncPoint; + ur_event_handle_t **pphEvent; + ur_exp_command_buffer_command_handle_t **pphCommand; } ur_command_buffer_append_usm_advise_exp_params_t; /////////////////////////////////////////////////////////////////////////////// @@ -11737,6 +12027,25 @@ typedef struct ur_command_buffer_update_kernel_launch_exp_params_t { const ur_exp_command_buffer_update_kernel_launch_desc_t **ppUpdateKernelLaunch; } ur_command_buffer_update_kernel_launch_exp_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urCommandBufferUpdateSignalEventExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_command_buffer_update_signal_event_exp_params_t { + ur_exp_command_buffer_command_handle_t *phCommand; + ur_event_handle_t **pphSignalEvent; +} ur_command_buffer_update_signal_event_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urCommandBufferUpdateWaitEventsExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_command_buffer_update_wait_events_exp_params_t { + ur_exp_command_buffer_command_handle_t *phCommand; + uint32_t *pnumEventsInWaitList; + const ur_event_handle_t **pphEventWaitList; +} ur_command_buffer_update_wait_events_exp_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urCommandBufferGetInfoExp /// @details Each entry is a pointer to the parameter passed to the function; diff --git a/include/ur_ddi.h b/include/ur_ddi.h index 13785a2d65..43614d967a 100644 --- a/include/ur_ddi.h +++ b/include/ur_ddi.h @@ -1933,7 +1933,10 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendKernelLaunchExp_t)( const size_t *, uint32_t, const ur_exp_command_buffer_sync_point_t *, + uint32_t, + const ur_event_handle_t *, ur_exp_command_buffer_sync_point_t *, + ur_event_handle_t *, ur_exp_command_buffer_command_handle_t *); /////////////////////////////////////////////////////////////////////////////// @@ -1945,7 +1948,11 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendUSMMemcpyExp_t)( size_t, uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *); + uint32_t, + const ur_event_handle_t *, + ur_exp_command_buffer_sync_point_t *, + ur_event_handle_t *, + ur_exp_command_buffer_command_handle_t *); /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urCommandBufferAppendUSMFillExp @@ -1957,7 +1964,11 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendUSMFillExp_t)( size_t, uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *); + uint32_t, + const ur_event_handle_t *, + ur_exp_command_buffer_sync_point_t *, + ur_event_handle_t *, + ur_exp_command_buffer_command_handle_t *); /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urCommandBufferAppendMemBufferCopyExp @@ -1970,7 +1981,11 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMemBufferCopyExp_t)( size_t, uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *); + uint32_t, + const ur_event_handle_t *, + ur_exp_command_buffer_sync_point_t *, + ur_event_handle_t *, + ur_exp_command_buffer_command_handle_t *); /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urCommandBufferAppendMemBufferWriteExp @@ -1982,7 +1997,11 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMemBufferWriteExp_t)( const void *, uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *); + uint32_t, + const ur_event_handle_t *, + ur_exp_command_buffer_sync_point_t *, + ur_event_handle_t *, + ur_exp_command_buffer_command_handle_t *); /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urCommandBufferAppendMemBufferReadExp @@ -1994,7 +2013,11 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMemBufferReadExp_t)( void *, uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *); + uint32_t, + const ur_event_handle_t *, + ur_exp_command_buffer_sync_point_t *, + ur_event_handle_t *, + ur_exp_command_buffer_command_handle_t *); /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urCommandBufferAppendMemBufferCopyRectExp @@ -2011,7 +2034,11 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMemBufferCopyRectExp_t) size_t, uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *); + uint32_t, + const ur_event_handle_t *, + ur_exp_command_buffer_sync_point_t *, + ur_event_handle_t *, + ur_exp_command_buffer_command_handle_t *); /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urCommandBufferAppendMemBufferWriteRectExp @@ -2028,7 +2055,11 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMemBufferWriteRectExp_t void *, uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *); + uint32_t, + const ur_event_handle_t *, + ur_exp_command_buffer_sync_point_t *, + ur_event_handle_t *, + ur_exp_command_buffer_command_handle_t *); /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urCommandBufferAppendMemBufferReadRectExp @@ -2045,7 +2076,11 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMemBufferReadRectExp_t) void *, uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *); + uint32_t, + const ur_event_handle_t *, + ur_exp_command_buffer_sync_point_t *, + ur_event_handle_t *, + ur_exp_command_buffer_command_handle_t *); /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urCommandBufferAppendMemBufferFillExp @@ -2058,7 +2093,11 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendMemBufferFillExp_t)( size_t, uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *); + uint32_t, + const ur_event_handle_t *, + ur_exp_command_buffer_sync_point_t *, + ur_event_handle_t *, + ur_exp_command_buffer_command_handle_t *); /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urCommandBufferAppendUSMPrefetchExp @@ -2069,7 +2108,11 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendUSMPrefetchExp_t)( ur_usm_migration_flags_t, uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *); + uint32_t, + const ur_event_handle_t *, + ur_exp_command_buffer_sync_point_t *, + ur_event_handle_t *, + ur_exp_command_buffer_command_handle_t *); /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urCommandBufferAppendUSMAdviseExp @@ -2080,7 +2123,11 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendUSMAdviseExp_t)( ur_usm_advice_flags_t, uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *); + uint32_t, + const ur_event_handle_t *, + ur_exp_command_buffer_sync_point_t *, + ur_event_handle_t *, + ur_exp_command_buffer_command_handle_t *); /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urCommandBufferEnqueueExp @@ -2107,6 +2154,19 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferUpdateKernelLaunchExp_t)( ur_exp_command_buffer_command_handle_t, const ur_exp_command_buffer_update_kernel_launch_desc_t *); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urCommandBufferUpdateSignalEventExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferUpdateSignalEventExp_t)( + ur_exp_command_buffer_command_handle_t, + ur_event_handle_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urCommandBufferUpdateWaitEventsExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferUpdateWaitEventsExp_t)( + ur_exp_command_buffer_command_handle_t, + uint32_t, + const ur_event_handle_t *); + /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urCommandBufferGetInfoExp typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferGetInfoExp_t)( @@ -2148,6 +2208,8 @@ typedef struct ur_command_buffer_exp_dditable_t { ur_pfnCommandBufferRetainCommandExp_t pfnRetainCommandExp; ur_pfnCommandBufferReleaseCommandExp_t pfnReleaseCommandExp; ur_pfnCommandBufferUpdateKernelLaunchExp_t pfnUpdateKernelLaunchExp; + ur_pfnCommandBufferUpdateSignalEventExp_t pfnUpdateSignalEventExp; + ur_pfnCommandBufferUpdateWaitEventsExp_t pfnUpdateWaitEventsExp; ur_pfnCommandBufferGetInfoExp_t pfnGetInfoExp; ur_pfnCommandBufferCommandGetInfoExp_t pfnCommandGetInfoExp; } ur_command_buffer_exp_dditable_t; diff --git a/include/ur_print.h b/include/ur_print.h index c70e661fb1..db36da6bf3 100644 --- a/include/ur_print.h +++ b/include/ur_print.h @@ -2450,6 +2450,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintCommandBufferReleaseCommandExpParams( /// - `buff_size < out_size` UR_APIEXPORT ur_result_t UR_APICALL urPrintCommandBufferUpdateKernelLaunchExpParams(const struct ur_command_buffer_update_kernel_launch_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_command_buffer_update_signal_event_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintCommandBufferUpdateSignalEventExpParams(const struct ur_command_buffer_update_signal_event_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_command_buffer_update_wait_events_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintCommandBufferUpdateWaitEventsExpParams(const struct ur_command_buffer_update_wait_events_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_command_buffer_get_info_exp_params_t struct /// @returns diff --git a/include/ur_print.hpp b/include/ur_print.hpp index 9aeb5e3341..c09c1070b1 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -711,9 +711,6 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) { case UR_FUNCTION_COMMAND_BUFFER_APPEND_KERNEL_LAUNCH_EXP: os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_KERNEL_LAUNCH_EXP"; break; - case UR_FUNCTION_COMMAND_BUFFER_ENQUEUE_EXP: - os << "UR_FUNCTION_COMMAND_BUFFER_ENQUEUE_EXP"; - break; case UR_FUNCTION_USM_PITCHED_ALLOC_EXP: os << "UR_FUNCTION_USM_PITCHED_ALLOC_EXP"; break; @@ -858,39 +855,6 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) { case UR_FUNCTION_LOADER_TEAR_DOWN: os << "UR_FUNCTION_LOADER_TEAR_DOWN"; break; - case UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP: - os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP"; - break; - case UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_FILL_EXP: - os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_FILL_EXP"; - break; - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_EXP: - os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_EXP"; - break; - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_EXP: - os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_EXP"; - break; - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_EXP: - os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_EXP"; - break; - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_RECT_EXP: - os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_RECT_EXP"; - break; - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_RECT_EXP: - os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_RECT_EXP"; - break; - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_RECT_EXP: - os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_RECT_EXP"; - break; - case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP: - os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP"; - break; - case UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP: - os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP"; - break; - case UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_ADVISE_EXP: - os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_ADVISE_EXP"; - break; case UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP: os << "UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP"; break; @@ -942,6 +906,48 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) { case UR_FUNCTION_BINDLESS_IMAGES_RELEASE_EXTERNAL_MEMORY_EXP: os << "UR_FUNCTION_BINDLESS_IMAGES_RELEASE_EXTERNAL_MEMORY_EXP"; break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP"; + break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_FILL_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_FILL_EXP"; + break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_EXP"; + break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_EXP"; + break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_EXP"; + break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_RECT_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_RECT_EXP"; + break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_RECT_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_RECT_EXP"; + break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_RECT_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_RECT_EXP"; + break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP"; + break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP"; + break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_ADVISE_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_ADVISE_EXP"; + break; + case UR_FUNCTION_COMMAND_BUFFER_ENQUEUE_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_ENQUEUE_EXP"; + break; + case UR_FUNCTION_COMMAND_BUFFER_UPDATE_SIGNAL_EVENT_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_UPDATE_SIGNAL_EVENT_EXP"; + break; + case UR_FUNCTION_COMMAND_BUFFER_UPDATE_WAIT_EVENTS_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_UPDATE_WAIT_EVENTS_EXP"; + break; case UR_FUNCTION_BINDLESS_IMAGES_MAP_EXTERNAL_LINEAR_MEMORY_EXP: os << "UR_FUNCTION_BINDLESS_IMAGES_MAP_EXTERNAL_LINEAR_MEMORY_EXP"; break; @@ -2544,6 +2550,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) { case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: os << "UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP"; break; + case UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP: + os << "UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP"; + break; case UR_DEVICE_INFO_CLUSTER_LAUNCH_EXP: os << "UR_DEVICE_INFO_CLUSTER_LAUNCH_EXP"; break; @@ -4061,6 +4070,18 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info os << ")"; } break; + case UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP: { + const ur_bool_t *tptr = (const ur_bool_t *)ptr; + if (sizeof(ur_bool_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; case UR_DEVICE_INFO_CLUSTER_LAUNCH_EXP: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { @@ -15930,12 +15951,35 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur::details::printPtr(os, *(params->ppSyncPointWaitList)); + os << ", "; + os << ".numEventsInWaitList = "; + + os << *(params->pnumEventsInWaitList); + + os << ", "; + os << ".phEventWaitList = {"; + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; + os << ", "; os << ".pSyncPoint = "; ur::details::printPtr(os, *(params->ppSyncPoint)); + os << ", "; + os << ".phEvent = "; + + ur::details::printPtr(os, + *(params->pphEvent)); + os << ", "; os << ".phCommand = "; @@ -15984,12 +16028,41 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur::details::printPtr(os, *(params->ppSyncPointWaitList)); + os << ", "; + os << ".numEventsInWaitList = "; + + os << *(params->pnumEventsInWaitList); + + os << ", "; + os << ".phEventWaitList = {"; + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; + os << ", "; os << ".pSyncPoint = "; ur::details::printPtr(os, *(params->ppSyncPoint)); + os << ", "; + os << ".phEvent = "; + + ur::details::printPtr(os, + *(params->pphEvent)); + + os << ", "; + os << ".phCommand = "; + + ur::details::printPtr(os, + *(params->pphCommand)); + return os; } @@ -16037,12 +16110,41 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur::details::printPtr(os, *(params->ppSyncPointWaitList)); + os << ", "; + os << ".numEventsInWaitList = "; + + os << *(params->pnumEventsInWaitList); + + os << ", "; + os << ".phEventWaitList = {"; + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; + os << ", "; os << ".pSyncPoint = "; ur::details::printPtr(os, *(params->ppSyncPoint)); + os << ", "; + os << ".phEvent = "; + + ur::details::printPtr(os, + *(params->pphEvent)); + + os << ", "; + os << ".phCommand = "; + + ur::details::printPtr(os, + *(params->pphCommand)); + return os; } @@ -16095,12 +16197,41 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur::details::printPtr(os, *(params->ppSyncPointWaitList)); + os << ", "; + os << ".numEventsInWaitList = "; + + os << *(params->pnumEventsInWaitList); + + os << ", "; + os << ".phEventWaitList = {"; + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; + os << ", "; os << ".pSyncPoint = "; ur::details::printPtr(os, *(params->ppSyncPoint)); + os << ", "; + os << ".phEvent = "; + + ur::details::printPtr(os, + *(params->pphEvent)); + + os << ", "; + os << ".phCommand = "; + + ur::details::printPtr(os, + *(params->pphCommand)); + return os; } @@ -16148,12 +16279,41 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur::details::printPtr(os, *(params->ppSyncPointWaitList)); + os << ", "; + os << ".numEventsInWaitList = "; + + os << *(params->pnumEventsInWaitList); + + os << ", "; + os << ".phEventWaitList = {"; + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; + os << ", "; os << ".pSyncPoint = "; ur::details::printPtr(os, *(params->ppSyncPoint)); + os << ", "; + os << ".phEvent = "; + + ur::details::printPtr(os, + *(params->pphEvent)); + + os << ", "; + os << ".phCommand = "; + + ur::details::printPtr(os, + *(params->pphCommand)); + return os; } @@ -16201,12 +16361,41 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur::details::printPtr(os, *(params->ppSyncPointWaitList)); + os << ", "; + os << ".numEventsInWaitList = "; + + os << *(params->pnumEventsInWaitList); + + os << ", "; + os << ".phEventWaitList = {"; + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; + os << ", "; os << ".pSyncPoint = "; ur::details::printPtr(os, *(params->ppSyncPoint)); + os << ", "; + os << ".phEvent = "; + + ur::details::printPtr(os, + *(params->pphEvent)); + + os << ", "; + os << ".phCommand = "; + + ur::details::printPtr(os, + *(params->pphCommand)); + return os; } @@ -16279,12 +16468,41 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur::details::printPtr(os, *(params->ppSyncPointWaitList)); + os << ", "; + os << ".numEventsInWaitList = "; + + os << *(params->pnumEventsInWaitList); + + os << ", "; + os << ".phEventWaitList = {"; + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; + os << ", "; os << ".pSyncPoint = "; ur::details::printPtr(os, *(params->ppSyncPoint)); + os << ", "; + os << ".phEvent = "; + + ur::details::printPtr(os, + *(params->pphEvent)); + + os << ", "; + os << ".phCommand = "; + + ur::details::printPtr(os, + *(params->pphCommand)); + return os; } @@ -16357,12 +16575,41 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur::details::printPtr(os, *(params->ppSyncPointWaitList)); + os << ", "; + os << ".numEventsInWaitList = "; + + os << *(params->pnumEventsInWaitList); + + os << ", "; + os << ".phEventWaitList = {"; + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; + os << ", "; os << ".pSyncPoint = "; ur::details::printPtr(os, *(params->ppSyncPoint)); + os << ", "; + os << ".phEvent = "; + + ur::details::printPtr(os, + *(params->pphEvent)); + + os << ", "; + os << ".phCommand = "; + + ur::details::printPtr(os, + *(params->pphCommand)); + return os; } @@ -16435,12 +16682,41 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur::details::printPtr(os, *(params->ppSyncPointWaitList)); + os << ", "; + os << ".numEventsInWaitList = "; + + os << *(params->pnumEventsInWaitList); + + os << ", "; + os << ".phEventWaitList = {"; + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; + os << ", "; os << ".pSyncPoint = "; ur::details::printPtr(os, *(params->ppSyncPoint)); + os << ", "; + os << ".phEvent = "; + + ur::details::printPtr(os, + *(params->pphEvent)); + + os << ", "; + os << ".phCommand = "; + + ur::details::printPtr(os, + *(params->pphCommand)); + return os; } @@ -16493,12 +16769,41 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur::details::printPtr(os, *(params->ppSyncPointWaitList)); + os << ", "; + os << ".numEventsInWaitList = "; + + os << *(params->pnumEventsInWaitList); + + os << ", "; + os << ".phEventWaitList = {"; + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; + os << ", "; os << ".pSyncPoint = "; ur::details::printPtr(os, *(params->ppSyncPoint)); + os << ", "; + os << ".phEvent = "; + + ur::details::printPtr(os, + *(params->pphEvent)); + + os << ", "; + os << ".phCommand = "; + + ur::details::printPtr(os, + *(params->pphCommand)); + return os; } @@ -16541,12 +16846,41 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur::details::printPtr(os, *(params->ppSyncPointWaitList)); + os << ", "; + os << ".numEventsInWaitList = "; + + os << *(params->pnumEventsInWaitList); + + os << ", "; + os << ".phEventWaitList = {"; + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; + os << ", "; os << ".pSyncPoint = "; ur::details::printPtr(os, *(params->ppSyncPoint)); + os << ", "; + os << ".phEvent = "; + + ur::details::printPtr(os, + *(params->pphEvent)); + + os << ", "; + os << ".phCommand = "; + + ur::details::printPtr(os, + *(params->pphCommand)); + return os; } @@ -16589,12 +16923,41 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur::details::printPtr(os, *(params->ppSyncPointWaitList)); + os << ", "; + os << ".numEventsInWaitList = "; + + os << *(params->pnumEventsInWaitList); + + os << ", "; + os << ".phEventWaitList = {"; + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; + os << ", "; os << ".pSyncPoint = "; ur::details::printPtr(os, *(params->ppSyncPoint)); + os << ", "; + os << ".phEvent = "; + + ur::details::printPtr(os, + *(params->pphEvent)); + + os << ", "; + os << ".phCommand = "; + + ur::details::printPtr(os, + *(params->pphCommand)); + return os; } @@ -16689,6 +17052,57 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct return os; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_command_buffer_update_signal_event_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_command_buffer_update_signal_event_exp_params_t *params) { + + os << ".hCommand = "; + + ur::details::printPtr(os, + *(params->phCommand)); + + os << ", "; + os << ".phSignalEvent = "; + + ur::details::printPtr(os, + *(params->pphSignalEvent)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_command_buffer_update_wait_events_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_command_buffer_update_wait_events_exp_params_t *params) { + + os << ".hCommand = "; + + ur::details::printPtr(os, + *(params->phCommand)); + + os << ", "; + os << ".numEventsInWaitList = "; + + os << *(params->pnumEventsInWaitList); + + os << ", "; + os << ".phEventWaitList = {"; + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; + + return os; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_command_buffer_get_info_exp_params_t type /// @returns @@ -17963,6 +18377,12 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, ur_function_ case UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP: { os << (const struct ur_command_buffer_update_kernel_launch_exp_params_t *)params; } break; + case UR_FUNCTION_COMMAND_BUFFER_UPDATE_SIGNAL_EVENT_EXP: { + os << (const struct ur_command_buffer_update_signal_event_exp_params_t *)params; + } break; + case UR_FUNCTION_COMMAND_BUFFER_UPDATE_WAIT_EVENTS_EXP: { + os << (const struct ur_command_buffer_update_wait_events_exp_params_t *)params; + } break; case UR_FUNCTION_COMMAND_BUFFER_GET_INFO_EXP: { os << (const struct ur_command_buffer_get_info_exp_params_t *)params; } break; diff --git a/scripts/core/EXP-COMMAND-BUFFER.rst b/scripts/core/EXP-COMMAND-BUFFER.rst index c23519cf67..8aa839ceaa 100644 --- a/scripts/core/EXP-COMMAND-BUFFER.rst +++ b/scripts/core/EXP-COMMAND-BUFFER.rst @@ -78,6 +78,7 @@ Command-Buffers are tied to a specific ${x}_context_handle_t and ${x}_device_handle_t. ${x}CommandBufferCreateExp optionally takes a descriptor to provide additional properties for how the command-buffer should be constructed. The members defined in ${x}_exp_command_buffer_desc_t are: + * ``isUpdatable``, which should be set to ``true`` to support :ref:`updating command-buffer commands`. * ``isInOrder``, which should be set to ``true`` to enable commands enqueued to @@ -95,12 +96,13 @@ Commands can be appended to a command-buffer by calling any of the command-buffer append functions. Typically these closely mimic the existing enqueue functions in the Core API in terms of their command-specific parameters. However, they differ in that they take a command-buffer handle instead of a -queue handle, and the dependencies and return parameters are sync-points instead -of event handles. +queue handle. Dependencies are also expressed differently, in that internal +command-buffer dependencies are expressed with sync-points. While event handles +are used to express synchronization external to the command-buffer. -The entry-point for appending a kernel launch command also returns an optional -handle to the command being appended. This handle can be used to update the -command configuration between command-buffer executions, see the section on +The entry-points for appending commands also return an optional handle to the +command being appended. This handle can be used to update the command +configuration between command-buffer executions, see the section on :ref:`updating command-buffer commands`. Currently only the following commands are supported: @@ -122,7 +124,7 @@ It is planned to eventually support any command type from the Core API which can actually be appended to the equivalent adapter native constructs. Sync-Points --------------------------------------------------------------------------------- +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ A sync-point is a value which represents a command inside of a command-buffer which is returned from command-buffer append function calls. These can be @@ -138,14 +140,61 @@ were obtained from. ${x}_exp_command_buffer_sync_point_t syncPoint; ${x}CommandBufferAppendUSMMemcpyExp(hCommandBuffer, pDst, pSrc, size, 0, - nullptr, &syncPoint); + nullptr, 0, nullptr, &syncPoint, nullptr, + nullptr); // Append a kernel launch with syncPoint as a dependency, ignore returned // sync-point ${x}CommandBufferAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, 1, &syncPoint, - nullptr, nullptr); + nullptr, 0, nullptr, nullptr, + nullptr); + +Command Synchronization With Events +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +When appending commands to a command-buffer an optional ``phEventWaitList`` +input parameter is available for passing a list of ${x}_event_handle_t objects +the command should wait on. As well as an optional ``phEvent`` output parameter +to get a ${x}_event_handle_t object that will be signaled on completion of the +command execution. It is the users responsibility to release the returned +``phEvent`` with ${x}EventRelease. + +This returned signal event is only valid for the last execution of the command, +and prior to the first execution of the command-buffer it represents an empty +submission that is considered complete. When a user calls +${x}CommandBufferEnqueueExp all the signal events returned from the individual +commands in the command-buffer are synchronously reset to a non-complete state +prior to the asynchronous commands beginning. + +The wait event parameter allows commands in a command-buffer to depend on the +completion of UR commands submitted to a queue which are external +to a command-buffer. While the output signal event parameter allows individual +commands in a command-buffer to trigger external queue commands. Using returned +signal events as wait events inside the same command-buffer is also valid usage. + +It is possible for commands in different command-buffer objects to synchronize +using the event mechanism. This is only guaranteed to behave correctly in the one +directional synchronization case, where the signal events of one +command-buffer's commands are used as a wait events of another command-buffer's +commands. Such a relationship defines a permanent dependency between the +command-buffers which does not need to be updated using +:ref:`command event update` to preserve synchronization on future enqueues of +the command-buffer. + +Bi-directional sync between individual commands in two separate command-buffers +is however not guaranteed to behave correctly. This is due to the completion +state of the command events only being reset when a command-buffer is enqueued. +It is therefore possible for the first command-buffer enqueued to execute its +wait node that needs to have its event reset by the enqueue of the second +command-buffer, before the code path returns to user code for the user to +enqueue the second command-buffer. Resulting in the first command-buffer's +wait node completing too early for the intended overall executing ordering. + +.. important:: + Support for using ``phEventWaitList`` & ``phEvent`` parameters requires a device + to support ${X}_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP. Enqueueing Command-Buffers -------------------------------------------------------------------------------- @@ -162,14 +211,25 @@ enqueued or executed simultaneously, and submissions may be serialized. ${x}CommandBufferEnqueueExp(hCommandBuffer, hQueue, 0, nullptr, &executionEvent); + Updating Command-Buffer Commands -------------------------------------------------------------------------------- An adapter implementing the command-buffer experimental feature can optionally -support updating the configuration of kernel commands recorded to a -command-buffer. Support for this is reported by returning true in the +support updating the configuration of commands recorded to an already finalized +command-buffer. This device support is reported by the ${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP query. +All update entry-points are synchronous and may block if the command-buffer is +executing when the entry-point is called. + +Kernel Argument Update +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +Kernel commands can have the ND-Range & parameter arguments of the command +updated when a device supports +${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP. + Updating kernel commands is done by passing the new kernel configuration to ${x}CommandBufferUpdateKernelLaunchExp along with the command handle of the kernel command to update. Configurations that can be changed are the @@ -191,8 +251,9 @@ parameters to the kernel and the execution ND-Range. ${x}_exp_command_buffer_command_handle_t hCommand; ${x}CommandBufferAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, 0, nullptr, - nullptr, &hCommand); + pLocalWorkSize, 0, nullptr, 0, + nullptr, nullptr, nullptr, + &hCommand); // Close the command-buffer before updating ${x}CommandBufferFinalizeExp(hCommandBuffer); @@ -237,6 +298,76 @@ parameters to the kernel and the execution ND-Range. // Perform the update ${x}CommandBufferUpdateKernelLaunchExp(hCommand, &update); +Command Event Update +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +Once a command-buffer has been finalized the wait-list parameter of the command +can be updated with ${x}CommandBufferUpdateWaitEventsExp. The number of wait +events for a command must stay consistent, therefore the number of events +passed to ${x}CommandBufferUpdateWaitEventsExp must be the same as when the +command was created. + +The ${x}CommandBufferUpdateSignalEventExp entry-points can be used to update +the signal event of a command. This returns a new event that will be signaled +on the next execution of the command in the command-buffer. It may be that +this is backed by the same native event object as the original signal event, +provided that the backend provides a way to reset or reuse events between +command-buffer executions. + +As ${x}_event_handle_t objects for queue submissions can only be signaled once, +and not reset, this update mechanism allows command synchronization to be +refreshed between command-buffer executions with regular command-queue events +that haven't yet been signaled. + +It is the users responsibility to release the returned ``phEvent`` with +${x}EventRelease. To update a command signal event with +${x}CommandBufferUpdateSignalEventExp there must also have been a non-null +``phEvent`` parameter passed on command creation. + +.. important:: + Support for updating ``phEventWaitList`` & ``phEvent`` parameters requires a device + to support both ${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP and + ${X}_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP. + +.. parsed-literal:: + + // Create a command-buffer with update enabled. + ${x}_exp_command_buffer_desc_t desc { + ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, + nullptr, + true // isUpdatable + }; + ${x}_exp_command_buffer_handle_t hCommandBuffer; + ${x}CommandBufferCreateExp(hContext, hDevice, &desc, &hCommandBuffer); + + // Append a kernel command with 2 events to wait on, and returning an + // event that will be signaled. + ${x}_event_handle_t hSignalEvent; + ${x}_event_handle_t hWaitEvents[2] = {...}; + ${x}_exp_command_buffer_command_handle_t hCommand; + ${x}CommandBufferAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, + pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize, 0, nullptr, 2, + hWaitEvents, nullptr, &hSignalEvent, + &hCommand); + + // Close the command-buffer before updating + ${x}CommandBufferFinalizeExp(hCommandBuffer); + + // Enqueue command-buffer + ${x}CommandBufferEnqueueExp(hCommandBuffer, hQueue, 0, nullptr, nullptr); + + // Wait for command-buffer to finish + ${x}QueueFinish(hQueue); + + // Update signal event + ${x}_event_handle_t hNewSignalEvent; + ${x}CommandBufferUpdateSignalEventExp(hCommand, &hNewSignalEvent); + + // Update wait events to a new event + ${x}_event_handle_t hNewWaitEvents = ...; + {x}CommandBufferUpdateWaitEventsExp(hCommand, 1, &hNewWaitEvents); + API -------------------------------------------------------------------------------- @@ -250,6 +381,7 @@ Enums * ${x}_device_info_t * ${X}_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP * ${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP + * ${X}_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP * ${x}_result_t * ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP * ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -320,6 +452,8 @@ Functions * ${x}CommandBufferRetainCommandExp * ${x}CommandBufferReleaseCommandExp * ${x}CommandBufferUpdateKernelLaunchExp +* ${x}CommandBufferUpdateSignalEventExp +* ${x}CommandBufferUpdateWaitEventsExp * ${x}CommandBufferGetInfoExp * ${x}CommandBufferCommandGetInfoExp @@ -340,6 +474,8 @@ Changelog +-----------+-------------------------------------------------------+ | 1.4 | Add function definitions for kernel command update | +-----------+-------------------------------------------------------+ +| 1.5 | Command level synchronization with event objects | ++-----------+-------------------------------------------------------+ Contributors -------------------------------------------------------------------------------- diff --git a/scripts/core/exp-command-buffer.yml b/scripts/core/exp-command-buffer.yml index 72b4e63f74..9a0c16dfff 100644 --- a/scripts/core/exp-command-buffer.yml +++ b/scripts/core/exp-command-buffer.yml @@ -23,7 +23,11 @@ etors: desc: "[$x_bool_t] Returns true if the device supports the use of command-buffers." - name: COMMAND_BUFFER_UPDATE_SUPPORT_EXP value: "0x1001" - desc: "[$x_bool_t] Returns true if the device supports updating the kernel commands in a command-buffer." + desc: "[$x_bool_t] Returns true if the device supports updating commands in a finalized command-buffer." + - name: COMMAND_BUFFER_EVENT_SUPPORT_EXP + value: "0x1002" + desc: "[$x_bool_t] Returns true if the device supports using event objects for command synchronization outside of a command-buffer." + --- #-------------------------------------------------------------------------- type: enum extend: true @@ -127,7 +131,7 @@ members: desc: "[in] Argument index." - type: "const ur_kernel_arg_mem_obj_properties_t *" name: pProperties - desc: "[in][optinal] Pointer to memory object properties." + desc: "[in][optional] Pointer to memory object properties." - type: $x_mem_handle_t name: hNewMemObjArg desc: "[in][optional] Handle of memory object to set at argument index." @@ -142,7 +146,7 @@ members: desc: "[in] Argument index." - type: "const ur_kernel_arg_pointer_properties_t *" name: pProperties - desc: "[in][optinal] Pointer to USM pointer properties." + desc: "[in][optional] Pointer to USM pointer properties." - type: "const void *" name: pNewPointerArg desc: "[in][optional] USM pointer to memory location holding the argument value to set at argument index." @@ -160,7 +164,7 @@ members: desc: "[in] Argument size." - type: "const ur_kernel_arg_value_properties_t *" name: pProperties - desc: "[in][optinal] Pointer to value properties." + desc: "[in][optional] Pointer to value properties." - type: "const void *" name: pNewValueArg desc: "[in][optional] Argument value representing matching kernel arg type to set at argument index." @@ -314,9 +318,18 @@ params: name: pSyncPointWaitList desc: "[in][optional] A list of sync points that this command depends on. May be ignored if command-buffer is in-order." + - type: uint32_t + name: numEventsInWaitList + desc: "[in] Size of the event wait list." + - type: "const $x_event_handle_t*" + name: phEventWaitList + desc: "[in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the command execution. If nullptr, the numEventsInWaitList must be 0, indicating no wait events." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint desc: "[out][optional] Sync point associated with this command." + - type: $x_event_handle_t* + name: phEvent + desc: "[out][optional] return an event object that will be signaled by the completion of this command in the next execution of the command-buffer." - type: "$x_exp_command_buffer_command_handle_t*" name: phCommand desc: "[out][optional] Handle to this command." @@ -330,6 +343,13 @@ returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP: - "`pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0`" - "`pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0`" + - $X_RESULT_ERROR_INVALID_EVENT + - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST: + - "`phEventWaitList == NULL && numEventsInWaitList > 0`" + - "`phEventWaitList != NULL && numEventsInWaitList == 0`" + - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_UNSUPPORTED_FEATURE: + - "If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL." - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- @@ -357,9 +377,21 @@ params: name: pSyncPointWaitList desc: "[in][optional] A list of sync points that this command depends on. May be ignored if command-buffer is in-order." + - type: uint32_t + name: numEventsInWaitList + desc: "[in] Size of the event wait list." + - type: "const $x_event_handle_t*" + name: phEventWaitList + desc: "[in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the command execution. If nullptr, the numEventsInWaitList must be 0, indicating no wait events." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint desc: "[out][optional] Sync point associated with this command." + - type: $x_event_handle_t* + name: phEvent + desc: "[out][optional] return an event object that will be signaled by the completion of this command in the next execution of the command-buffer." + - type: "$x_exp_command_buffer_command_handle_t*" + name: phCommand + desc: "[out][optional] Handle to this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_SIZE: @@ -370,6 +402,13 @@ returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP: - "`pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0`" - "`pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0`" + - $X_RESULT_ERROR_INVALID_EVENT + - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST: + - "`phEventWaitList == NULL && numEventsInWaitList > 0`" + - "`phEventWaitList != NULL && numEventsInWaitList == 0`" + - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_UNSUPPORTED_FEATURE: + - "If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL." - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- @@ -400,9 +439,21 @@ params: name: pSyncPointWaitList desc: "[in][optional] A list of sync points that this command depends on. May be ignored if command-buffer is in-order." + - type: uint32_t + name: numEventsInWaitList + desc: "[in] Size of the event wait list." + - type: "const $x_event_handle_t*" + name: phEventWaitList + desc: "[in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the command execution. If nullptr, the numEventsInWaitList must be 0, indicating no wait events." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint desc: "[out][optional] sync point associated with this command." + - type: $x_event_handle_t* + name: phEvent + desc: "[out][optional] return an event object that will be signaled by the completion of this command in the next execution of the command-buffer." + - type: "$x_exp_command_buffer_command_handle_t*" + name: phCommand + desc: "[out][optional] Handle to this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_SIZE: @@ -415,6 +466,13 @@ returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP: - "`pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0`" - "`pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0`" + - $X_RESULT_ERROR_INVALID_EVENT + - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST: + - "`phEventWaitList == NULL && numEventsInWaitList > 0`" + - "`phEventWaitList != NULL && numEventsInWaitList == 0`" + - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_UNSUPPORTED_FEATURE: + - "If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL." - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- @@ -448,9 +506,21 @@ params: name: pSyncPointWaitList desc: "[in][optional] A list of sync points that this command depends on. May be ignored if command-buffer is in-order." + - type: uint32_t + name: numEventsInWaitList + desc: "[in] Size of the event wait list." + - type: "const $x_event_handle_t*" + name: phEventWaitList + desc: "[in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the command execution. If nullptr, the numEventsInWaitList must be 0, indicating no wait events." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint desc: "[out][optional] Sync point associated with this command." + - type: $x_event_handle_t* + name: phEvent + desc: "[out][optional] return an event object that will be signaled by the completion of this command in the next execution of the command-buffer." + - type: "$x_exp_command_buffer_command_handle_t*" + name: phCommand + desc: "[out][optional] Handle to this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -458,6 +528,13 @@ returns: - "`pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0`" - "`pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0`" - $X_RESULT_ERROR_INVALID_MEM_OBJECT + - $X_RESULT_ERROR_INVALID_EVENT + - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST: + - "`phEventWaitList == NULL && numEventsInWaitList > 0`" + - "`phEventWaitList != NULL && numEventsInWaitList == 0`" + - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_UNSUPPORTED_FEATURE: + - "If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL." - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- @@ -488,9 +565,21 @@ params: name: pSyncPointWaitList desc: "[in][optional] A list of sync points that this command depends on. May be ignored if command-buffer is in-order." + - type: uint32_t + name: numEventsInWaitList + desc: "[in] Size of the event wait list." + - type: "const $x_event_handle_t*" + name: phEventWaitList + desc: "[in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the command execution. If nullptr, the numEventsInWaitList must be 0, indicating no wait events." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint desc: "[out][optional] Sync point associated with this command." + - type: $x_event_handle_t* + name: phEvent + desc: "[out][optional] return an event object that will be signaled by the completion of this command in the next execution of the command-buffer." + - type: "$x_exp_command_buffer_command_handle_t*" + name: phCommand + desc: "[out][optional] Handle to this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -498,6 +587,13 @@ returns: - "`pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0`" - "`pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0`" - $X_RESULT_ERROR_INVALID_MEM_OBJECT + - $X_RESULT_ERROR_INVALID_EVENT + - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST: + - "`phEventWaitList == NULL && numEventsInWaitList > 0`" + - "`phEventWaitList != NULL && numEventsInWaitList == 0`" + - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_UNSUPPORTED_FEATURE: + - "If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL." - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- @@ -528,9 +624,21 @@ params: name: pSyncPointWaitList desc: "[in][optional] A list of sync points that this command depends on. May be ignored if command-buffer is in-order." + - type: uint32_t + name: numEventsInWaitList + desc: "[in] Size of the event wait list." + - type: "const $x_event_handle_t*" + name: phEventWaitList + desc: "[in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the command execution. If nullptr, the numEventsInWaitList must be 0, indicating no wait events." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint desc: "[out][optional] Sync point associated with this command." + - type: $x_event_handle_t* + name: phEvent + desc: "[out][optional] return an event object that will be signaled by the completion of this command in the next execution of the command-buffer." + - type: "$x_exp_command_buffer_command_handle_t*" + name: phCommand + desc: "[out][optional] Handle to this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -538,6 +646,13 @@ returns: - "`pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0`" - "`pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0`" - $X_RESULT_ERROR_INVALID_MEM_OBJECT + - $X_RESULT_ERROR_INVALID_EVENT + - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST: + - "`phEventWaitList == NULL && numEventsInWaitList > 0`" + - "`phEventWaitList != NULL && numEventsInWaitList == 0`" + - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_UNSUPPORTED_FEATURE: + - "If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL." - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- @@ -583,9 +698,21 @@ params: name: pSyncPointWaitList desc: "[in][optional] A list of sync points that this command depends on. May be ignored if command-buffer is in-order." + - type: uint32_t + name: numEventsInWaitList + desc: "[in] Size of the event wait list." + - type: "const $x_event_handle_t*" + name: phEventWaitList + desc: "[in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the command execution. If nullptr, the numEventsInWaitList must be 0, indicating no wait events." - type: $x_exp_command_buffer_sync_point_t* name: pSyncPoint desc: "[out][optional] Sync point associated with this command." + - type: $x_event_handle_t* + name: phEvent + desc: "[out][optional] return an event object that will be signaled by the completion of this command in the next execution of the command-buffer." + - type: "$x_exp_command_buffer_command_handle_t*" + name: phCommand + desc: "[out][optional] Handle to this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -593,6 +720,13 @@ returns: - "`pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0`" - "`pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0`" - $X_RESULT_ERROR_INVALID_MEM_OBJECT + - $X_RESULT_ERROR_INVALID_EVENT + - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST: + - "`phEventWaitList == NULL && numEventsInWaitList > 0`" + - "`phEventWaitList != NULL && numEventsInWaitList == 0`" + - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_UNSUPPORTED_FEATURE: + - "If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL." - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- @@ -638,9 +772,21 @@ params: name: pSyncPointWaitList desc: "[in][optional] A list of sync points that this command depends on. May be ignored if command-buffer is in-order." + - type: uint32_t + name: numEventsInWaitList + desc: "[in] Size of the event wait list." + - type: "const $x_event_handle_t*" + name: phEventWaitList + desc: "[in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the command execution. If nullptr, the numEventsInWaitList must be 0, indicating no wait events." - type: $x_exp_command_buffer_sync_point_t* name: pSyncPoint desc: "[out][optional] Sync point associated with this command." + - type: $x_event_handle_t* + name: phEvent + desc: "[out][optional] return an event object that will be signaled by the completion of this command in the next execution of the command-buffer." + - type: "$x_exp_command_buffer_command_handle_t*" + name: phCommand + desc: "[out][optional] Handle to this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -648,6 +794,13 @@ returns: - "`pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0`" - "`pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0`" - $X_RESULT_ERROR_INVALID_MEM_OBJECT + - $X_RESULT_ERROR_INVALID_EVENT + - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST: + - "`phEventWaitList == NULL && numEventsInWaitList > 0`" + - "`phEventWaitList != NULL && numEventsInWaitList == 0`" + - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_UNSUPPORTED_FEATURE: + - "If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL." - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- @@ -693,9 +846,21 @@ params: name: pSyncPointWaitList desc: "[in][optional] A list of sync points that this command depends on. May be ignored if command-buffer is in-order." + - type: uint32_t + name: numEventsInWaitList + desc: "[in] Size of the event wait list." + - type: "const $x_event_handle_t*" + name: phEventWaitList + desc: "[in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the command execution. If nullptr, the numEventsInWaitList must be 0, indicating no wait events." - type: $x_exp_command_buffer_sync_point_t* name: pSyncPoint desc: "[out][optional] Sync point associated with this command." + - type: $x_event_handle_t* + name: phEvent + desc: "[out][optional] return an event object that will be signaled by the completion of this command in the next execution of the command-buffer." + - type: "$x_exp_command_buffer_command_handle_t*" + name: phCommand + desc: "[out][optional] Handle to this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -703,6 +868,13 @@ returns: - "`pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0`" - "`pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0`" - $X_RESULT_ERROR_INVALID_MEM_OBJECT + - $X_RESULT_ERROR_INVALID_EVENT + - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST: + - "`phEventWaitList == NULL && numEventsInWaitList > 0`" + - "`phEventWaitList != NULL && numEventsInWaitList == 0`" + - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_UNSUPPORTED_FEATURE: + - "If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL." - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- @@ -736,9 +908,21 @@ params: name: pSyncPointWaitList desc: "[in][optional] A list of sync points that this command depends on. May be ignored if command-buffer is in-order." + - type: uint32_t + name: numEventsInWaitList + desc: "[in] Size of the event wait list." + - type: "const $x_event_handle_t*" + name: phEventWaitList + desc: "[in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the command execution. If nullptr, the numEventsInWaitList must be 0, indicating no wait events." - type: $x_exp_command_buffer_sync_point_t* name: pSyncPoint desc: "[out][optional] sync point associated with this command." + - type: $x_event_handle_t* + name: phEvent + desc: "[out][optional] return an event object that will be signaled by the completion of this command in the next execution of the command-buffer." + - type: "$x_exp_command_buffer_command_handle_t*" + name: phCommand + desc: "[out][optional] Handle to this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -748,6 +932,13 @@ returns: - $X_RESULT_ERROR_INVALID_MEM_OBJECT - $X_RESULT_ERROR_INVALID_SIZE: - "If `offset + size` results in an out-of-bounds access." + - $X_RESULT_ERROR_INVALID_EVENT + - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST: + - "`phEventWaitList == NULL && numEventsInWaitList > 0`" + - "`phEventWaitList != NULL && numEventsInWaitList == 0`" + - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_UNSUPPORTED_FEATURE: + - "If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL." - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- @@ -778,9 +969,21 @@ params: name: pSyncPointWaitList desc: "[in][optional] A list of sync points that this command depends on. May be ignored if command-buffer is in-order." + - type: uint32_t + name: numEventsInWaitList + desc: "[in] Size of the event wait list." + - type: "const $x_event_handle_t*" + name: phEventWaitList + desc: "[in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the command execution. If nullptr, the numEventsInWaitList must be 0, indicating no wait events." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint desc: "[out][optional] sync point associated with this command." + - type: $x_event_handle_t* + name: phEvent + desc: "[out][optional] return an event object that will be signaled by the completion of this command in the next execution of the command-buffer." + - type: "$x_exp_command_buffer_command_handle_t*" + name: phCommand + desc: "[out][optional] Handle to this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -791,6 +994,13 @@ returns: - $X_RESULT_ERROR_INVALID_SIZE: - "`size == 0`" - "If `size` is higher than the allocation size of `pMemory`" + - $X_RESULT_ERROR_INVALID_EVENT + - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST: + - "`phEventWaitList == NULL && numEventsInWaitList > 0`" + - "`phEventWaitList != NULL && numEventsInWaitList == 0`" + - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_UNSUPPORTED_FEATURE: + - "If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL." - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- @@ -822,9 +1032,21 @@ params: name: pSyncPointWaitList desc: "[in][optional] A list of sync points that this command depends on. May be ignored if command-buffer is in-order." + - type: uint32_t + name: numEventsInWaitList + desc: "[in] Size of the event wait list." + - type: "const $x_event_handle_t*" + name: phEventWaitList + desc: "[in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the command execution. If nullptr, the numEventsInWaitList must be 0, indicating no wait events." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint desc: "[out][optional] sync point associated with this command." + - type: $x_event_handle_t* + name: phEvent + desc: "[out][optional] return an event object that will be signaled by the completion of this command in the next execution of the command-buffer." + - type: "$x_exp_command_buffer_command_handle_t*" + name: phCommand + desc: "[out][optional] Handle to this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -835,6 +1057,13 @@ returns: - $X_RESULT_ERROR_INVALID_SIZE: - "`size == 0`" - "If `size` is higher than the allocation size of `pMemory`" + - $X_RESULT_ERROR_INVALID_EVENT + - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST: + - "`phEventWaitList == NULL && numEventsInWaitList > 0`" + - "`phEventWaitList != NULL && numEventsInWaitList == 0`" + - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_UNSUPPORTED_FEATURE: + - "If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL." - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- @@ -899,7 +1128,7 @@ returns: - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY --- #-------------------------------------------------------------------------- type: function -desc: "Update a kernel launch command in a finalized command-buffer. This entry-point is synchronous and may block if the command-buffer is executing when the entry-point is called." +desc: "Update a kernel launch command in a finalized command-buffer." class: $xCommandBuffer name: UpdateKernelLaunchExp params: @@ -921,6 +1150,7 @@ returns: - "If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value when `hCommand` was created with a NULL local work size." - "If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value when `hCommand` was created with a non-NULL local work size." - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP + - "If `hCommand` is not a kernel execution command." - $X_RESULT_ERROR_INVALID_MEM_OBJECT - $X_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX - $X_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE @@ -932,6 +1162,58 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function +desc: "Get a new event that will be signaled the next time the command in the command-buffer executes." +class: $xCommandBuffer +name: UpdateSignalEventExp +params: + - type: $x_exp_command_buffer_command_handle_t + name: hCommand + desc: "[in] Handle of the command-buffer command to update." + - type: "$x_event_handle_t*" + name: phSignalEvent + desc: "[out] Event to be signaled." +returns: + - $X_RESULT_ERROR_UNSUPPORTED_FEATURE: + - "If COMMAND_BUFFER_EVENT_SUPPORT_EXP is not supported by the device associated with `hCommand`." + - $X_RESULT_ERROR_INVALID_OPERATION: + - "If $x_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to." + - "If the command-buffer `hCommand` belongs to has not been finalized." + - "If no `phEvent` parameter as set on creation of the command associated with `hCommand`." + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP + - $X_RESULT_ERROR_INVALID_VALUE + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY + - $X_RESULT_ERROR_OUT_OF_RESOURCES +--- #-------------------------------------------------------------------------- +type: function +desc: "Set the list of wait events for a command to depend on to a list of new events." +class: $xCommandBuffer +name: UpdateWaitEventsExp +params: + - type: $x_exp_command_buffer_command_handle_t + name: hCommand + desc: "[in] Handle of the command-buffer command to update." + - type: uint32_t + name: numEventsInWaitList + desc: "[in] Size of the event wait list." + - type: "const $x_event_handle_t*" + name: phEventWaitList + desc: "[in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the command execution. If nullptr, the numEventsInWaitList must be 0, indicating no wait events." +returns: + - $X_RESULT_ERROR_UNSUPPORTED_FEATURE: + - "If COMMAND_BUFFER_EVENT_SUPPORT_EXP is not supported by the device associated with `hCommand`." + - $X_RESULT_ERROR_INVALID_OPERATION: + - "If $x_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to." + - "If the command-buffer `hCommand` belongs to has not been finalized." + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP + - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST: + - "`phEventWaitList == NULL && numEventsInWaitList > 0`" + - "`phEventWaitList != NULL && numEventsInWaitList == 0`" + - "If event objects in phEventWaitList are not valid events." + - "If `numEventsInWaitList` does not match the number of wait events set when the command associated with `hCommand` was created." + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY + - $X_RESULT_ERROR_OUT_OF_RESOURCES +--- #-------------------------------------------------------------------------- +type: function desc: "Get command-buffer object information." class: $xCommandBuffer name: GetInfoExp diff --git a/scripts/core/registry.yml b/scripts/core/registry.yml index ab59404bb4..f4ba983bfc 100644 --- a/scripts/core/registry.yml +++ b/scripts/core/registry.yml @@ -364,9 +364,6 @@ etors: - name: COMMAND_BUFFER_APPEND_KERNEL_LAUNCH_EXP desc: Enumerator for $xCommandBufferAppendKernelLaunchExp value: '125' -- name: COMMAND_BUFFER_ENQUEUE_EXP - desc: Enumerator for $xCommandBufferEnqueueExp - value: '128' - name: USM_PITCHED_ALLOC_EXP desc: Enumerator for $xUSMPitchedAllocExp value: '132' @@ -511,39 +508,6 @@ etors: - name: LOADER_TEAR_DOWN desc: Enumerator for $xLoaderTearDown value: '202' -- name: COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP - desc: Enumerator for $xCommandBufferAppendUSMMemcpyExp - value: '203' -- name: COMMAND_BUFFER_APPEND_USM_FILL_EXP - desc: Enumerator for $xCommandBufferAppendUSMFillExp - value: '204' -- name: COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_EXP - desc: Enumerator for $xCommandBufferAppendMemBufferCopyExp - value: '205' -- name: COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_EXP - desc: Enumerator for $xCommandBufferAppendMemBufferWriteExp - value: '206' -- name: COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_EXP - desc: Enumerator for $xCommandBufferAppendMemBufferReadExp - value: '207' -- name: COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_RECT_EXP - desc: Enumerator for $xCommandBufferAppendMemBufferCopyRectExp - value: '208' -- name: COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_RECT_EXP - desc: Enumerator for $xCommandBufferAppendMemBufferWriteRectExp - value: '209' -- name: COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_RECT_EXP - desc: Enumerator for $xCommandBufferAppendMemBufferReadRectExp - value: '210' -- name: COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP - desc: Enumerator for $xCommandBufferAppendMemBufferFillExp - value: '211' -- name: COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP - desc: Enumerator for $xCommandBufferAppendUSMPrefetchExp - value: '212' -- name: COMMAND_BUFFER_APPEND_USM_ADVISE_EXP - desc: Enumerator for $xCommandBufferAppendUSMAdviseExp - value: '213' - name: ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP desc: Enumerator for $xEnqueueCooperativeKernelLaunchExp value: '214' @@ -595,9 +559,51 @@ etors: - name: BINDLESS_IMAGES_RELEASE_EXTERNAL_MEMORY_EXP desc: Enumerator for $xBindlessImagesReleaseExternalMemoryExp value: '230' +- name: COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP + desc: Enumerator for $xCommandBufferAppendUSMMemcpyExp + value: '231' +- name: COMMAND_BUFFER_APPEND_USM_FILL_EXP + desc: Enumerator for $xCommandBufferAppendUSMFillExp + value: '232' +- name: COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_EXP + desc: Enumerator for $xCommandBufferAppendMemBufferCopyExp + value: '233' +- name: COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_EXP + desc: Enumerator for $xCommandBufferAppendMemBufferWriteExp + value: '234' +- name: COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_EXP + desc: Enumerator for $xCommandBufferAppendMemBufferReadExp + value: '235' +- name: COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_RECT_EXP + desc: Enumerator for $xCommandBufferAppendMemBufferCopyRectExp + value: '236' +- name: COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_RECT_EXP + desc: Enumerator for $xCommandBufferAppendMemBufferWriteRectExp + value: '237' +- name: COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_RECT_EXP + desc: Enumerator for $xCommandBufferAppendMemBufferReadRectExp + value: '238' +- name: COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP + desc: Enumerator for $xCommandBufferAppendMemBufferFillExp + value: '239' +- name: COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP + desc: Enumerator for $xCommandBufferAppendUSMPrefetchExp + value: '240' +- name: COMMAND_BUFFER_APPEND_USM_ADVISE_EXP + desc: Enumerator for $xCommandBufferAppendUSMAdviseExp + value: '241' +- name: COMMAND_BUFFER_ENQUEUE_EXP + desc: Enumerator for $xCommandBufferEnqueueExp + value: '242' +- name: COMMAND_BUFFER_UPDATE_SIGNAL_EVENT_EXP + desc: Enumerator for $xCommandBufferUpdateSignalEventExp + value: '243' +- name: COMMAND_BUFFER_UPDATE_WAIT_EVENTS_EXP + desc: Enumerator for $xCommandBufferUpdateWaitEventsExp + value: '244' - name: BINDLESS_IMAGES_MAP_EXTERNAL_LINEAR_MEMORY_EXP desc: Enumerator for $xBindlessImagesMapExternalLinearMemoryExp - value: '231' + value: '245' --- type: enum desc: Defines structure types diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 2fdb6b08a3..36c29c4740 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -71,16 +71,42 @@ ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() { } } -ur_exp_command_buffer_command_handle_t_:: - ur_exp_command_buffer_command_handle_t_( - ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel, - CUgraphNode Node, CUDA_KERNEL_NODE_PARAMS Params, uint32_t WorkDim, - const size_t *GlobalWorkOffsetPtr, const size_t *GlobalWorkSizePtr, - const size_t *LocalWorkSizePtr) - : CommandBuffer(CommandBuffer), Kernel(Kernel), Node(Node), Params(Params), - WorkDim(WorkDim), RefCountInternal(1), RefCountExternal(1) { - CommandBuffer->incrementInternalReferenceCount(); +std::unique_ptr +ur_exp_command_buffer_handle_t_::addSignalNode(CUgraphNode DepNode, + CUgraphNode &SignalNode) { + CUevent Event; + UR_CHECK_ERROR(cuEventCreate(&Event, CU_EVENT_DEFAULT)); + UR_CHECK_ERROR( + cuGraphAddEventRecordNode(&SignalNode, CudaGraph, &DepNode, 1, Event)); + return std::unique_ptr( + ur_event_handle_t_::makeWithNative(Context, Event)); +} + +ur_result_t ur_exp_command_buffer_handle_t_::addWaitNodes( + std::vector &DepsList, uint32_t NumEventsInWaitList, + const ur_event_handle_t *EventWaitList) { + std::vector WaitNodes(NumEventsInWaitList); + for (uint32_t i = 0; i < NumEventsInWaitList; i++) { + CUevent Event = EventWaitList[i]->get(); + UR_CHECK_ERROR(cuGraphAddEventWaitNode( + &WaitNodes[i], CudaGraph, DepsList.data(), DepsList.size(), Event)); + } + // Set DepsLists as an output parameter for communicating the list of wait + // nodes created. + DepsList = WaitNodes; + return UR_RESULT_SUCCESS; +} + +kernel_command_handle::kernel_command_handle( + ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel, + CUgraphNode Node, CUDA_KERNEL_NODE_PARAMS Params, uint32_t WorkDim, + const size_t *GlobalWorkOffsetPtr, const size_t *GlobalWorkSizePtr, + const size_t *LocalWorkSizePtr, CUgraphNode SignalNode, + std::vector WaitNodes) + : ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode, + WaitNodes), + Kernel(Kernel), Params(Params), WorkDim(WorkDim) { const size_t CopySize = sizeof(size_t) * WorkDim; std::memcpy(GlobalWorkOffset, GlobalWorkOffsetPtr, CopySize); std::memcpy(GlobalWorkSize, GlobalWorkSizePtr, CopySize); @@ -96,6 +122,28 @@ ur_exp_command_buffer_command_handle_t_:: std::memset(GlobalWorkOffset + WorkDim, 0, ZeroSize); std::memset(GlobalWorkSize + WorkDim, 0, ZeroSize); } +}; + +ur_exp_command_buffer_command_handle_t_:: + ur_exp_command_buffer_command_handle_t_( + ur_exp_command_buffer_handle_t CommandBuffer, CUgraphNode Node, + CUgraphNode SignalNode, std::vector WaitNodes) + : CommandBuffer(CommandBuffer), Node(Node), SignalNode(SignalNode), + WaitNodes(WaitNodes), RefCountInternal(1), RefCountExternal(1) { + CommandBuffer->incrementInternalReferenceCount(); +} + +ur_exp_command_buffer_command_handle_t_:: + ~ur_exp_command_buffer_command_handle_t_() { + // We create the ur_event_t returned to the user for a signal node using + // `makeWithNative` which sets `HasOwnership` to false. Therefore destruction + // of the `ur_event_t` object doesn't free the underlying CuEvent_t object and + // we need to do it manually ourselves. + if (SignalNode) { + CUevent SignalEvent; + cuGraphEventRecordNodeGetEvent(SignalNode, &SignalEvent); + cuEventDestroy(SignalEvent); + } } /// Helper function for finding the Cuda Nodes associated with the @@ -154,17 +202,27 @@ static void setCopyParams(const void *SrcPtr, const CUmemorytype_enum SrcType, Params.Depth = 1; } -// Helper function for enqueuing memory fills +// Helper function for enqueuing memory fills. Templated on the CommandType +// enum class for the type of fill being created. +template static ur_result_t enqueueCommandBufferFillHelper( ur_exp_command_buffer_handle_t CommandBuffer, void *DstDevice, const CUmemorytype_enum DstType, const void *Pattern, size_t PatternSize, size_t Size, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, - ur_exp_command_buffer_sync_point_t *RetSyncPoint) { + uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, + ur_exp_command_buffer_sync_point_t *RetSyncPoint, + ur_event_handle_t *RetEvent, + ur_exp_command_buffer_command_handle_t *RetCommand) { std::vector DepsList; UR_CHECK_ERROR(getNodesFromSyncPoints(CommandBuffer, NumSyncPointsInWaitList, SyncPointWaitList, DepsList)); + if (NumEventsInWaitList) { + UR_CHECK_ERROR(CommandBuffer->addWaitNodes(DepsList, NumEventsInWaitList, + EventWaitList)); + } + try { // Graph node added to graph, if multiple nodes are created this will // be set to the leaf node @@ -261,11 +319,28 @@ static ur_result_t enqueueCommandBufferFillHelper( } } + CUgraphNode SignalNode = nullptr; + if (RetEvent) { + auto SignalEvent = CommandBuffer->addSignalNode(GraphNode, SignalNode); + *RetEvent = SignalEvent.release(); + } + // Get sync point and register the cuNode with it. - auto SyncPoint = CommandBuffer->addSyncPoint(GraphNode); + CUgraphNode SyncPointNode = SignalNode ? SignalNode : GraphNode; + auto SyncPoint = CommandBuffer->addSyncPoint(SyncPointNode); if (RetSyncPoint) { *RetSyncPoint = SyncPoint; } + + std::vector WaitNodes = + NumEventsInWaitList ? DepsList : std::vector(); + auto NewCommand = new T(CommandBuffer, GraphNode, SignalNode, WaitNodes); + CommandBuffer->CommandHandles.push_back(NewCommand); + + if (RetCommand) { + NewCommand->incrementInternalReferenceCount(); + *RetCommand = NewCommand; + } } catch (ur_result_t Err) { return Err; } @@ -346,7 +421,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, ur_exp_command_buffer_command_handle_t *phCommand) { // Preconditions UR_ASSERT(hCommandBuffer->Context == hKernel->getContext(), @@ -355,19 +431,45 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( UR_ASSERT(workDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); CUgraphNode GraphNode; - std::vector DepsList; UR_CHECK_ERROR(getNodesFromSyncPoints(hCommandBuffer, numSyncPointsInWaitList, pSyncPointWaitList, DepsList)); + if (numEventsInWaitList) { + UR_CHECK_ERROR(hCommandBuffer->addWaitNodes(DepsList, numEventsInWaitList, + phEventWaitList)); + } + if (*pGlobalWorkSize == 0) { try { // Create an empty node if the kernel workload size is zero - UR_CHECK_ERROR(cuGraphAddEmptyNode(&GraphNode, hCommandBuffer->CudaGraph, - DepsList.data(), DepsList.size())); + if (!phEvent) { + UR_CHECK_ERROR(cuGraphAddEmptyNode(&GraphNode, + hCommandBuffer->CudaGraph, + DepsList.data(), DepsList.size())); + } else { + CUevent Event = nullptr; + UR_CHECK_ERROR(cuEventCreate(&Event, CU_EVENT_DEFAULT)); + UR_CHECK_ERROR( + cuGraphAddEventRecordNode(&GraphNode, hCommandBuffer->CudaGraph, + DepsList.data(), DepsList.size(), Event)); + + auto RetEventUP = std::unique_ptr( + ur_event_handle_t_::makeWithNative(hCommandBuffer->Context, Event)); + + *phEvent = RetEventUP.release(); + } + + // Add signal node if external return event is used. + CUgraphNode SignalNode = nullptr; + if (phEvent) { + auto SignalEvent = hCommandBuffer->addSignalNode(GraphNode, SignalNode); + *phEvent = SignalEvent.release(); + } // Get sync point and register the cuNode with it. - auto SyncPoint = hCommandBuffer->addSyncPoint(GraphNode); + CUgraphNode SyncPointNode = SignalNode ? SignalNode : GraphNode; + auto SyncPoint = hCommandBuffer->addSyncPoint(SyncPointNode); if (pSyncPoint) { *pSyncPoint = SyncPoint; } @@ -411,23 +513,32 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( if (LocalSize != 0) hKernel->clearLocalSize(); + // Add signal node if external return event is used. + CUgraphNode SignalNode = nullptr; + if (phEvent) { + auto SignalEvent = hCommandBuffer->addSignalNode(GraphNode, SignalNode); + *phEvent = SignalEvent.release(); + } + // Get sync point and register the cuNode with it. - auto SyncPoint = hCommandBuffer->addSyncPoint(GraphNode); + CUgraphNode SyncPointNode = SignalNode ? SignalNode : GraphNode; + auto SyncPoint = hCommandBuffer->addSyncPoint(SyncPointNode); if (pSyncPoint) { *pSyncPoint = SyncPoint; } - auto NewCommand = new ur_exp_command_buffer_command_handle_t_{ - hCommandBuffer, hKernel, GraphNode, NodeParams, - workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize}; - - NewCommand->incrementInternalReferenceCount(); + std::vector WaitNodes = + numEventsInWaitList ? DepsList : std::vector(); + auto NewCommand = new kernel_command_handle( + hCommandBuffer, hKernel, GraphNode, NodeParams, workDim, + pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, SignalNode, + WaitNodes); hCommandBuffer->CommandHandles.push_back(NewCommand); if (phCommand) { + NewCommand->incrementInternalReferenceCount(); *phCommand = NewCommand; } - } catch (ur_result_t Err) { return Err; } @@ -438,12 +549,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t hCommandBuffer, void *pDst, const void *pSrc, size_t size, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, + ur_exp_command_buffer_command_handle_t *phCommand) { CUgraphNode GraphNode; std::vector DepsList; UR_CHECK_ERROR(getNodesFromSyncPoints(hCommandBuffer, numSyncPointsInWaitList, pSyncPointWaitList, DepsList)); + if (numEventsInWaitList) { + UR_CHECK_ERROR(hCommandBuffer->addWaitNodes(DepsList, numEventsInWaitList, + phEventWaitList)); + } + try { CUDA_MEMCPY3D NodeParams = {}; setCopyParams(pSrc, CU_MEMORYTYPE_HOST, pDst, CU_MEMORYTYPE_HOST, size, @@ -453,11 +571,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( &GraphNode, hCommandBuffer->CudaGraph, DepsList.data(), DepsList.size(), &NodeParams, hCommandBuffer->Device->getNativeContext())); + // Add signal node if external return event is used. + CUgraphNode SignalNode = nullptr; + if (phEvent) { + auto SignalEvent = hCommandBuffer->addSignalNode(GraphNode, SignalNode); + *phEvent = SignalEvent.release(); + } + // Get sync point and register the cuNode with it. - auto SyncPoint = hCommandBuffer->addSyncPoint(GraphNode); + CUgraphNode SyncPointNode = SignalNode ? SignalNode : GraphNode; + auto SyncPoint = hCommandBuffer->addSyncPoint(SyncPointNode); if (pSyncPoint) { *pSyncPoint = SyncPoint; } + + std::vector WaitNodes = + numEventsInWaitList ? DepsList : std::vector(); + auto NewCommand = new usm_memcpy_command_handle(hCommandBuffer, GraphNode, + SignalNode, WaitNodes); + hCommandBuffer->CommandHandles.push_back(NewCommand); + + if (phCommand) { + NewCommand->incrementInternalReferenceCount(); + *phCommand = NewCommand; + } } catch (ur_result_t Err) { return Err; } @@ -469,7 +606,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_mem_handle_t hDstMem, size_t srcOffset, size_t dstOffset, size_t size, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, + ur_exp_command_buffer_command_handle_t *phCommand) { CUgraphNode GraphNode; std::vector DepsList; @@ -481,6 +620,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( UR_CHECK_ERROR(getNodesFromSyncPoints(hCommandBuffer, numSyncPointsInWaitList, pSyncPointWaitList, DepsList)); + if (numEventsInWaitList) { + UR_CHECK_ERROR(hCommandBuffer->addWaitNodes(DepsList, numEventsInWaitList, + phEventWaitList)); + } + try { auto Src = std::get(hSrcMem->Mem) .getPtrWithOffset(hCommandBuffer->Device, srcOffset); @@ -495,11 +639,29 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( &GraphNode, hCommandBuffer->CudaGraph, DepsList.data(), DepsList.size(), &NodeParams, hCommandBuffer->Device->getNativeContext())); + // Add signal node if external return event is used. + CUgraphNode SignalNode = nullptr; + if (phEvent) { + auto SignalEvent = hCommandBuffer->addSignalNode(GraphNode, SignalNode); + *phEvent = SignalEvent.release(); + } + // Get sync point and register the cuNode with it. - auto SyncPoint = hCommandBuffer->addSyncPoint(GraphNode); + CUgraphNode SyncPointNode = SignalNode ? SignalNode : GraphNode; + auto SyncPoint = hCommandBuffer->addSyncPoint(SyncPointNode); if (pSyncPoint) { *pSyncPoint = SyncPoint; } + + std::vector WaitNodes = + numEventsInWaitList ? DepsList : std::vector(); + auto NewCommand = new buffer_copy_command_handle(hCommandBuffer, GraphNode, + SignalNode, WaitNodes); + + if (phCommand) { + NewCommand->incrementInternalReferenceCount(); + *phCommand = NewCommand; + } } catch (ur_result_t Err) { return Err; } @@ -513,12 +675,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, + ur_exp_command_buffer_command_handle_t *phCommand) { CUgraphNode GraphNode; std::vector DepsList; UR_CHECK_ERROR(getNodesFromSyncPoints(hCommandBuffer, numSyncPointsInWaitList, pSyncPointWaitList, DepsList)); + if (numEventsInWaitList) { + UR_CHECK_ERROR(hCommandBuffer->addWaitNodes(DepsList, numEventsInWaitList, + phEventWaitList)); + } + try { auto SrcPtr = std::get(hSrcMem->Mem).getPtr(hCommandBuffer->Device); @@ -534,11 +703,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( &GraphNode, hCommandBuffer->CudaGraph, DepsList.data(), DepsList.size(), &NodeParams, hCommandBuffer->Device->getNativeContext())); + // Add signal node if external return event is used. + CUgraphNode SignalNode = nullptr; + if (phEvent) { + auto SignalEvent = hCommandBuffer->addSignalNode(GraphNode, SignalNode); + *phEvent = SignalEvent.release(); + } + // Get sync point and register the cuNode with it. - auto SyncPoint = hCommandBuffer->addSyncPoint(GraphNode); + CUgraphNode SyncPointNode = SignalNode ? SignalNode : GraphNode; + auto SyncPoint = hCommandBuffer->addSyncPoint(SyncPointNode); if (pSyncPoint) { *pSyncPoint = SyncPoint; } + + std::vector WaitNodes = + numEventsInWaitList ? DepsList : std::vector(); + auto NewCommand = new buffer_copy_rect_command_handle( + hCommandBuffer, GraphNode, SignalNode, WaitNodes); + hCommandBuffer->CommandHandles.push_back(NewCommand); + + if (phCommand) { + NewCommand->incrementInternalReferenceCount(); + *phCommand = NewCommand; + } } catch (ur_result_t Err) { return Err; } @@ -551,12 +739,19 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( size_t offset, size_t size, const void *pSrc, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, + ur_exp_command_buffer_command_handle_t *phCommand) { CUgraphNode GraphNode; std::vector DepsList; UR_CHECK_ERROR(getNodesFromSyncPoints(hCommandBuffer, numSyncPointsInWaitList, pSyncPointWaitList, DepsList)); + if (numEventsInWaitList) { + UR_CHECK_ERROR(hCommandBuffer->addWaitNodes(DepsList, numEventsInWaitList, + phEventWaitList)); + } + try { auto Dst = std::get(hBuffer->Mem) .getPtrWithOffset(hCommandBuffer->Device, offset); @@ -569,11 +764,30 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( &GraphNode, hCommandBuffer->CudaGraph, DepsList.data(), DepsList.size(), &NodeParams, hCommandBuffer->Device->getNativeContext())); + // Add signal node if external return event is used. + CUgraphNode SignalNode = nullptr; + if (phEvent) { + auto SignalEvent = hCommandBuffer->addSignalNode(GraphNode, SignalNode); + *phEvent = SignalEvent.release(); + } + // Get sync point and register the cuNode with it. - auto SyncPoint = hCommandBuffer->addSyncPoint(GraphNode); + CUgraphNode SyncPointNode = SignalNode ? SignalNode : GraphNode; + auto SyncPoint = hCommandBuffer->addSyncPoint(SyncPointNode); if (pSyncPoint) { *pSyncPoint = SyncPoint; } + + std::vector WaitNodes = + numEventsInWaitList ? DepsList : std::vector(); + auto NewCommand = new buffer_write_command_handle(hCommandBuffer, GraphNode, + SignalNode, WaitNodes); + hCommandBuffer->CommandHandles.push_back(NewCommand); + + if (phCommand) { + NewCommand->incrementInternalReferenceCount(); + *phCommand = NewCommand; + } } catch (ur_result_t Err) { return Err; } @@ -585,12 +799,19 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, size_t offset, size_t size, void *pDst, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, + ur_exp_command_buffer_command_handle_t *phCommand) { CUgraphNode GraphNode; std::vector DepsList; UR_CHECK_ERROR(getNodesFromSyncPoints(hCommandBuffer, numSyncPointsInWaitList, pSyncPointWaitList, DepsList)); + if (numEventsInWaitList) { + UR_CHECK_ERROR(hCommandBuffer->addWaitNodes(DepsList, numEventsInWaitList, + phEventWaitList)); + } + try { auto Src = std::get(hBuffer->Mem) .getPtrWithOffset(hCommandBuffer->Device, offset); @@ -603,11 +824,30 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( &GraphNode, hCommandBuffer->CudaGraph, DepsList.data(), DepsList.size(), &NodeParams, hCommandBuffer->Device->getNativeContext())); + // Add signal node if external return event is used. + CUgraphNode SignalNode = nullptr; + if (phEvent) { + auto SignalEvent = hCommandBuffer->addSignalNode(GraphNode, SignalNode); + *phEvent = SignalEvent.release(); + } + // Get sync point and register the cuNode with it. - auto SyncPoint = hCommandBuffer->addSyncPoint(GraphNode); + CUgraphNode SyncPointNode = SignalNode ? SignalNode : GraphNode; + auto SyncPoint = hCommandBuffer->addSyncPoint(SyncPointNode); if (pSyncPoint) { *pSyncPoint = SyncPoint; } + + std::vector WaitNodes = + numEventsInWaitList ? DepsList : std::vector(); + auto NewCommand = new buffer_read_command_handle(hCommandBuffer, GraphNode, + SignalNode, WaitNodes); + hCommandBuffer->CommandHandles.push_back(NewCommand); + + if (phCommand) { + NewCommand->incrementInternalReferenceCount(); + *phCommand = NewCommand; + } } catch (ur_result_t Err) { return Err; } @@ -622,12 +862,19 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( size_t hostRowPitch, size_t hostSlicePitch, void *pSrc, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, + ur_exp_command_buffer_command_handle_t *phCommand) { CUgraphNode GraphNode; std::vector DepsList; UR_CHECK_ERROR(getNodesFromSyncPoints(hCommandBuffer, numSyncPointsInWaitList, pSyncPointWaitList, DepsList)); + if (numEventsInWaitList) { + UR_CHECK_ERROR(hCommandBuffer->addWaitNodes(DepsList, numEventsInWaitList, + phEventWaitList)); + } + try { auto DstPtr = std::get(hBuffer->Mem).getPtr(hCommandBuffer->Device); @@ -642,11 +889,30 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( &GraphNode, hCommandBuffer->CudaGraph, DepsList.data(), DepsList.size(), &NodeParams, hCommandBuffer->Device->getNativeContext())); + // Add signal node if external return event is used. + CUgraphNode SignalNode = nullptr; + if (phEvent) { + auto SignalEvent = hCommandBuffer->addSignalNode(GraphNode, SignalNode); + *phEvent = SignalEvent.release(); + } + // Get sync point and register the cuNode with it. - auto SyncPoint = hCommandBuffer->addSyncPoint(GraphNode); + CUgraphNode SyncPointNode = SignalNode ? SignalNode : GraphNode; + auto SyncPoint = hCommandBuffer->addSyncPoint(SyncPointNode); if (pSyncPoint) { *pSyncPoint = SyncPoint; } + + std::vector WaitNodes = + numEventsInWaitList ? DepsList : std::vector(); + auto NewCommand = new buffer_write_rect_command_handle( + hCommandBuffer, GraphNode, SignalNode, WaitNodes); + hCommandBuffer->CommandHandles.push_back(NewCommand); + + if (phCommand) { + NewCommand->incrementInternalReferenceCount(); + *phCommand = NewCommand; + } } catch (ur_result_t Err) { return Err; } @@ -661,12 +927,19 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( size_t hostRowPitch, size_t hostSlicePitch, void *pDst, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, + ur_exp_command_buffer_command_handle_t *phCommand) { CUgraphNode GraphNode; std::vector DepsList; UR_CHECK_ERROR(getNodesFromSyncPoints(hCommandBuffer, numSyncPointsInWaitList, pSyncPointWaitList, DepsList)); + if (numEventsInWaitList) { + UR_CHECK_ERROR(hCommandBuffer->addWaitNodes(DepsList, numEventsInWaitList, + phEventWaitList)); + } + try { auto SrcPtr = std::get(hBuffer->Mem).getPtr(hCommandBuffer->Device); @@ -681,11 +954,30 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( &GraphNode, hCommandBuffer->CudaGraph, DepsList.data(), DepsList.size(), &NodeParams, hCommandBuffer->Device->getNativeContext())); + // Add signal node if external return event is used. + CUgraphNode SignalNode = nullptr; + if (phEvent) { + auto SignalEvent = hCommandBuffer->addSignalNode(GraphNode, SignalNode); + *phEvent = SignalEvent.release(); + } + // Get sync point and register the cuNode with it. - auto SyncPoint = hCommandBuffer->addSyncPoint(GraphNode); + CUgraphNode SyncPointNode = SignalNode ? SignalNode : GraphNode; + auto SyncPoint = hCommandBuffer->addSyncPoint(SyncPointNode); if (pSyncPoint) { *pSyncPoint = SyncPoint; } + + std::vector WaitNodes = + numEventsInWaitList ? DepsList : std::vector(); + auto NewCommand = new buffer_read_rect_command_handle( + hCommandBuffer, GraphNode, SignalNode, WaitNodes); + hCommandBuffer->CommandHandles.push_back(NewCommand); + + if (phCommand) { + NewCommand->incrementInternalReferenceCount(); + *phCommand = NewCommand; + } } catch (ur_result_t Err) { return Err; } @@ -697,7 +989,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( size_t /*Size*/, ur_usm_migration_flags_t /*Flags*/, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, + ur_exp_command_buffer_command_handle_t *phCommand) { // Prefetch cmd is not supported by Cuda Graph. // We implement it as an empty node to enforce dependencies. CUgraphNode GraphNode; @@ -706,17 +1000,40 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( UR_CHECK_ERROR(getNodesFromSyncPoints(hCommandBuffer, numSyncPointsInWaitList, pSyncPointWaitList, DepsList)); + if (numEventsInWaitList) { + UR_CHECK_ERROR(hCommandBuffer->addWaitNodes(DepsList, numEventsInWaitList, + phEventWaitList)); + } + try { // Add an empty node to preserve dependencies. UR_CHECK_ERROR(cuGraphAddEmptyNode(&GraphNode, hCommandBuffer->CudaGraph, DepsList.data(), DepsList.size())); + // Add signal node if external return event is used. + CUgraphNode SignalNode = nullptr; + if (phEvent) { + auto SignalEvent = hCommandBuffer->addSignalNode(GraphNode, SignalNode); + *phEvent = SignalEvent.release(); + } + // Get sync point and register the cuNode with it. - auto SyncPoint = hCommandBuffer->addSyncPoint(GraphNode); + CUgraphNode SyncPointNode = SignalNode ? SignalNode : GraphNode; + auto SyncPoint = hCommandBuffer->addSyncPoint(SyncPointNode); if (pSyncPoint) { *pSyncPoint = SyncPoint; } + std::vector WaitNodes = + numEventsInWaitList ? DepsList : std::vector(); + auto NewCommand = new usm_prefetch_command_handle(hCommandBuffer, GraphNode, + SignalNode, WaitNodes); + hCommandBuffer->CommandHandles.push_back(NewCommand); + + if (phCommand) { + NewCommand->incrementInternalReferenceCount(); + *phCommand = NewCommand; + } } catch (ur_result_t Err) { return Err; } @@ -728,7 +1045,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( size_t /*Size*/, ur_usm_advice_flags_t /*Advice*/, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, + ur_exp_command_buffer_command_handle_t *phCommand) { // Mem-Advise cmd is not supported by Cuda Graph. // We implement it as an empty node to enforce dependencies. CUgraphNode GraphNode; @@ -737,16 +1056,40 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( UR_CHECK_ERROR(getNodesFromSyncPoints(hCommandBuffer, numSyncPointsInWaitList, pSyncPointWaitList, DepsList)); + if (numEventsInWaitList) { + UR_CHECK_ERROR(hCommandBuffer->addWaitNodes(DepsList, numEventsInWaitList, + phEventWaitList)); + } + try { // Add an empty node to preserve dependencies. UR_CHECK_ERROR(cuGraphAddEmptyNode(&GraphNode, hCommandBuffer->CudaGraph, DepsList.data(), DepsList.size())); + // Add signal node if external return event is used. + CUgraphNode SignalNode = nullptr; + if (phEvent) { + auto SignalEvent = hCommandBuffer->addSignalNode(GraphNode, SignalNode); + *phEvent = SignalEvent.release(); + } + // Get sync point and register the cuNode with it. - auto SyncPoint = hCommandBuffer->addSyncPoint(GraphNode); + CUgraphNode SyncPointNode = SignalNode ? SignalNode : GraphNode; + auto SyncPoint = hCommandBuffer->addSyncPoint(SyncPointNode); if (pSyncPoint) { *pSyncPoint = SyncPoint; } + + std::vector WaitNodes = + numEventsInWaitList ? DepsList : std::vector(); + auto NewCommand = new usm_advise_command_handle(hCommandBuffer, GraphNode, + SignalNode, WaitNodes); + hCommandBuffer->CommandHandles.push_back(NewCommand); + + if (phCommand) { + NewCommand->incrementInternalReferenceCount(); + *phCommand = NewCommand; + } } catch (ur_result_t Err) { return Err; } @@ -759,7 +1102,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( const void *pPattern, size_t patternSize, size_t offset, size_t size, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, + ur_exp_command_buffer_command_handle_t *phCommand) { auto ArgsAreMultiplesOfPatternSize = (offset % patternSize == 0) || (size % patternSize == 0); @@ -774,9 +1119,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( auto DstDevice = std::get(hBuffer->Mem) .getPtrWithOffset(hCommandBuffer->Device, offset); - return enqueueCommandBufferFillHelper( + return enqueueCommandBufferFillHelper( hCommandBuffer, &DstDevice, CU_MEMORYTYPE_DEVICE, pPattern, patternSize, - size, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + size, numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, + phEventWaitList, pSyncPoint, phEvent, phCommand); } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( @@ -784,17 +1130,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( const void *pPattern, size_t patternSize, size_t size, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { - + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, + ur_exp_command_buffer_command_handle_t *phCommand) { auto PatternIsValid = (pPattern != nullptr); auto PatternSizeIsValid = ((patternSize & (patternSize - 1)) == 0) && (patternSize > 0); // is a positive power of two UR_ASSERT(PatternIsValid && PatternSizeIsValid, UR_RESULT_ERROR_INVALID_SIZE); - return enqueueCommandBufferFillHelper( + return enqueueCommandBufferFillHelper( hCommandBuffer, pPtr, CU_MEMORYTYPE_UNIFIED, pPattern, patternSize, size, - numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, + phEventWaitList, pSyncPoint, phEvent, phCommand); } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( @@ -862,9 +1210,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( return UR_RESULT_ERROR_INVALID_OPERATION; } + if (hCommand->getCommandType() != CommandType::Kernel) { + return UR_RESULT_ERROR_INVALID_VALUE; + } + + auto CommandHandle = static_cast(hCommand); + if (auto NewWorkDim = pUpdateKernelLaunch->newWorkDim) { // Error if work dim changes - if (NewWorkDim != hCommand->WorkDim) { + if (NewWorkDim != CommandHandle->WorkDim) { return UR_RESULT_ERROR_INVALID_OPERATION; } @@ -878,7 +1232,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( // or if local size nullptr and created with non-null const bool IsNewLocalSizeNull = pUpdateKernelLaunch->pNewLocalWorkSize == nullptr; - const bool IsOriginalLocalSizeNull = hCommand->isNullLocalSize(); + const bool IsOriginalLocalSizeNull = CommandHandle->isNullLocalSize(); if (IsNewLocalSizeNull ^ IsOriginalLocalSizeNull) { return UR_RESULT_ERROR_INVALID_OPERATION; @@ -886,7 +1240,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( } // Kernel corresponding to the command to update - ur_kernel_handle_t Kernel = hCommand->Kernel; + ur_kernel_handle_t Kernel = CommandHandle->Kernel; // Update pointer arguments to the kernel uint32_t NumPointerArgs = pUpdateKernelLaunch->numNewPointerArgs; @@ -955,29 +1309,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( if (NewWorkDim != 0) { UR_ASSERT(NewWorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); UR_ASSERT(NewWorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); - hCommand->WorkDim = NewWorkDim; + CommandHandle->WorkDim = NewWorkDim; } if (pUpdateKernelLaunch->pNewGlobalWorkOffset) { - hCommand->setGlobalOffset(pUpdateKernelLaunch->pNewGlobalWorkOffset); + CommandHandle->setGlobalOffset(pUpdateKernelLaunch->pNewGlobalWorkOffset); } if (pUpdateKernelLaunch->pNewGlobalWorkSize) { - hCommand->setGlobalSize(pUpdateKernelLaunch->pNewGlobalWorkSize); + CommandHandle->setGlobalSize(pUpdateKernelLaunch->pNewGlobalWorkSize); } if (pUpdateKernelLaunch->pNewLocalWorkSize) { - hCommand->setLocalSize(pUpdateKernelLaunch->pNewLocalWorkSize); + CommandHandle->setLocalSize(pUpdateKernelLaunch->pNewLocalWorkSize); } - size_t *GlobalWorkOffset = hCommand->GlobalWorkOffset; - size_t *GlobalWorkSize = hCommand->GlobalWorkSize; + size_t *GlobalWorkOffset = CommandHandle->GlobalWorkOffset; + size_t *GlobalWorkSize = CommandHandle->GlobalWorkSize; // If no worksize is provided make sure we pass nullptr to setKernelParams so // it can guess the local work size. - const bool ProvidedLocalSize = !hCommand->isNullLocalSize(); - size_t *LocalWorkSize = ProvidedLocalSize ? hCommand->LocalWorkSize : nullptr; - uint32_t WorkDim = hCommand->WorkDim; + const bool ProvidedLocalSize = !CommandHandle->isNullLocalSize(); + size_t *LocalWorkSize = + ProvidedLocalSize ? CommandHandle->LocalWorkSize : nullptr; + uint32_t WorkDim = CommandHandle->WorkDim; // Set the number of threads per block to the number of threads per warp // by default unless user has provided a better number @@ -993,7 +1348,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( return Result; } - CUDA_KERNEL_NODE_PARAMS &Params = hCommand->Params; + CUDA_KERNEL_NODE_PARAMS &Params = CommandHandle->Params; Params.func = CuFunc; Params.gridDimX = BlocksPerGrid[0]; @@ -1005,12 +1360,78 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( Params.sharedMemBytes = Kernel->getLocalSize(); Params.kernelParams = const_cast(Kernel->getArgIndices().data()); - CUgraphNode Node = hCommand->Node; + CUgraphNode Node = CommandHandle->Node; CUgraphExec CudaGraphExec = CommandBuffer->CudaGraphExec; UR_CHECK_ERROR(cuGraphExecKernelNodeSetParams(CudaGraphExec, Node, &Params)); return UR_RESULT_SUCCESS; } +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateSignalEventExp( + ur_exp_command_buffer_command_handle_t hCommand, + ur_event_handle_t *phEvent) { + ur_exp_command_buffer_handle_t CommandBuffer = hCommand->CommandBuffer; + + // Update requires command-buffer to be finalized + if (!CommandBuffer->CudaGraphExec) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + + // Update requires command-buffer to be created with update enabled + if (!CommandBuffer->IsUpdatable) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + + // Error to try update the signal event, when a signal event wasn't set on + // creation + CUgraphNode SignalNode = hCommand->SignalNode; + if (phEvent != nullptr && SignalNode == nullptr) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + + CUevent SignalEvent; + UR_CHECK_ERROR(cuGraphEventRecordNodeGetEvent(SignalNode, &SignalEvent)); + + if (phEvent) { + *phEvent = std::unique_ptr( + ur_event_handle_t_::makeWithNative(CommandBuffer->Context, + SignalEvent)) + .release(); + } + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateWaitEventsExp( + ur_exp_command_buffer_command_handle_t hCommand, + uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventWaitList) { + ur_exp_command_buffer_handle_t CommandBuffer = hCommand->CommandBuffer; + + // Update requires command-buffer to be finalized + if (!CommandBuffer->CudaGraphExec) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + + // Update requires command-buffer to be created with update enabled + if (!CommandBuffer->IsUpdatable) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + + // Error if number of wait nodes is not the same as when node was created + std::vector &WaitNodes = hCommand->WaitNodes; + if (NumEventsInWaitList != WaitNodes.size()) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + CUgraphExec CudaGraphExec = CommandBuffer->CudaGraphExec; + for (uint32_t i = 0; i < NumEventsInWaitList; i++) { + ur_event_handle_t WaitEvent = phEventWaitList[i]; + UR_CHECK_ERROR(cuGraphExecEventWaitNodeSetEvent(CudaGraphExec, WaitNodes[i], + WaitEvent->get())); + } + + return UR_RESULT_SUCCESS; +} + UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_exp_command_buffer_info_t propName, size_t propSize, void *pPropValue, diff --git a/source/adapters/cuda/command_buffer.hpp b/source/adapters/cuda/command_buffer.hpp index 504095612b..9dc3d1c060 100644 --- a/source/adapters/cuda/command_buffer.hpp +++ b/source/adapters/cuda/command_buffer.hpp @@ -34,17 +34,78 @@ logger::always("UR <--- {}({})", #Call, Result); \ } -// Handle to a kernel command. -// -// Struct that stores all the information related to a kernel command in a -// command-buffer, such that the command can be recreated. When handles can -// be returned from other command types this struct will need refactored. +enum class CommandType { + Kernel, + USMMemcpy, + USMFill, + MemBufferCopy, + MemBufferCopyRect, + MemBufferRead, + MemBufferReadRect, + MemBufferWrite, + MemBufferWriteRect, + MemBufferFill, + USMPrefetch, + USMAdvise +}; + +// Command handle that can be returned from command append entry-points. +// Implemented as an abstract base class that handles for the specific +// command types derive from. struct ur_exp_command_buffer_command_handle_t_ { ur_exp_command_buffer_command_handle_t_( - ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel, - CUgraphNode Node, CUDA_KERNEL_NODE_PARAMS Params, uint32_t WorkDim, - const size_t *GlobalWorkOffsetPtr, const size_t *GlobalWorkSizePtr, - const size_t *LocalWorkSizePtr); + ur_exp_command_buffer_handle_t CommandBuffer, CUgraphNode Node, + CUgraphNode SignalNode, std::vector WaitNodes); + + virtual ~ur_exp_command_buffer_command_handle_t_(); + + virtual CommandType getCommandType() const noexcept = 0; + + uint32_t incrementInternalReferenceCount() noexcept { + return ++RefCountInternal; + } + uint32_t decrementInternalReferenceCount() noexcept { + return --RefCountInternal; + } + + uint32_t incrementExternalReferenceCount() noexcept { + return ++RefCountExternal; + } + uint32_t decrementExternalReferenceCount() noexcept { + return --RefCountExternal; + } + uint32_t getExternalReferenceCount() const noexcept { + return RefCountExternal; + } + + // Parent UR command-buffer. + ur_exp_command_buffer_handle_t CommandBuffer; + // Node created in graph for the command. + CUgraphNode Node; + // An optional EventRecordNode that's a successor of Node to signal + // dependent commands outwith the command-buffer. + CUgraphNode SignalNode; + // Optional list of EventWait Nodes to wait on commands from outside of the + // command-buffer. + std::vector WaitNodes; + +private: + std::atomic_uint32_t RefCountInternal; + std::atomic_uint32_t RefCountExternal; +}; + +struct kernel_command_handle : ur_exp_command_buffer_command_handle_t_ { + kernel_command_handle(ur_exp_command_buffer_handle_t CommandBuffer, + ur_kernel_handle_t Kernel, CUgraphNode Node, + CUDA_KERNEL_NODE_PARAMS Params, uint32_t WorkDim, + const size_t *GlobalWorkOffsetPtr, + const size_t *GlobalWorkSizePtr, + const size_t *LocalWorkSizePtr, CUgraphNode SignalNode, + std::vector WaitNodes); + + CommandType getCommandType() const noexcept override { + return CommandType::Kernel; + } void setGlobalOffset(const size_t *GlobalWorkOffsetPtr) { const size_t CopySize = sizeof(size_t) * WorkDim; @@ -78,36 +139,137 @@ struct ur_exp_command_buffer_command_handle_t_ { return 0 == std::memcmp(LocalWorkSize, Zeros, sizeof(LocalWorkSize)); } - uint32_t incrementInternalReferenceCount() noexcept { - return ++RefCountInternal; + ur_kernel_handle_t Kernel; + CUDA_KERNEL_NODE_PARAMS Params; + + uint32_t WorkDim; + size_t GlobalWorkOffset[3]; + size_t GlobalWorkSize[3]; + size_t LocalWorkSize[3]; +}; + +struct usm_memcpy_command_handle : ur_exp_command_buffer_command_handle_t_ { + usm_memcpy_command_handle(ur_exp_command_buffer_handle_t CommandBuffer, + CUgraphNode Node, CUgraphNode SignalNode, + std::vector WaitNodes) + : ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode, + WaitNodes) {} + CommandType getCommandType() const noexcept override { + return CommandType::USMMemcpy; } - uint32_t decrementInternalReferenceCount() noexcept { - return --RefCountInternal; +}; + +struct usm_fill_command_handle : ur_exp_command_buffer_command_handle_t_ { + usm_fill_command_handle(ur_exp_command_buffer_handle_t CommandBuffer, + CUgraphNode Node, CUgraphNode SignalNode, + std::vector WaitNodes) + : ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode, + WaitNodes) {} + CommandType getCommandType() const noexcept override { + return CommandType::USMFill; } +}; - uint32_t incrementExternalReferenceCount() noexcept { - return ++RefCountExternal; +struct buffer_copy_command_handle : ur_exp_command_buffer_command_handle_t_ { + buffer_copy_command_handle(ur_exp_command_buffer_handle_t CommandBuffer, + CUgraphNode Node, CUgraphNode SignalNode, + std::vector WaitNodes) + : ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode, + WaitNodes) {} + CommandType getCommandType() const noexcept override { + return CommandType::MemBufferCopy; } - uint32_t decrementExternalReferenceCount() noexcept { - return --RefCountExternal; +}; + +struct buffer_copy_rect_command_handle + : ur_exp_command_buffer_command_handle_t_ { + buffer_copy_rect_command_handle(ur_exp_command_buffer_handle_t CommandBuffer, + CUgraphNode Node, CUgraphNode SignalNode, + std::vector WaitNodes) + : ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode, + WaitNodes) {} + CommandType getCommandType() const noexcept override { + return CommandType::MemBufferCopyRect; } - uint32_t getExternalReferenceCount() const noexcept { - return RefCountExternal; +}; + +struct buffer_read_command_handle : ur_exp_command_buffer_command_handle_t_ { + buffer_read_command_handle(ur_exp_command_buffer_handle_t CommandBuffer, + CUgraphNode Node, CUgraphNode SignalNode, + std::vector WaitNodes) + : ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode, + WaitNodes) {} + CommandType getCommandType() const noexcept override { + return CommandType::MemBufferRead; } +}; - ur_exp_command_buffer_handle_t CommandBuffer; - ur_kernel_handle_t Kernel; - CUgraphNode Node; - CUDA_KERNEL_NODE_PARAMS Params; +struct buffer_read_rect_command_handle + : ur_exp_command_buffer_command_handle_t_ { + buffer_read_rect_command_handle(ur_exp_command_buffer_handle_t CommandBuffer, + CUgraphNode Node, CUgraphNode SignalNode, + std::vector WaitNodes) + : ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode, + WaitNodes) {} + CommandType getCommandType() const noexcept override { + return CommandType::MemBufferReadRect; + } +}; - uint32_t WorkDim; - size_t GlobalWorkOffset[3]; - size_t GlobalWorkSize[3]; - size_t LocalWorkSize[3]; +struct buffer_write_command_handle : ur_exp_command_buffer_command_handle_t_ { + buffer_write_command_handle(ur_exp_command_buffer_handle_t CommandBuffer, + CUgraphNode Node, CUgraphNode SignalNode, + std::vector WaitNodes) + : ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode, + WaitNodes) {} + CommandType getCommandType() const noexcept override { + return CommandType::MemBufferWrite; + } +}; -private: - std::atomic_uint32_t RefCountInternal; - std::atomic_uint32_t RefCountExternal; +struct buffer_write_rect_command_handle + : ur_exp_command_buffer_command_handle_t_ { + buffer_write_rect_command_handle(ur_exp_command_buffer_handle_t CommandBuffer, + CUgraphNode Node, CUgraphNode SignalNode, + std::vector WaitNodes) + : ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode, + WaitNodes) {} + CommandType getCommandType() const noexcept override { + return CommandType::MemBufferWriteRect; + } +}; + +struct buffer_fill_command_handle : ur_exp_command_buffer_command_handle_t_ { + buffer_fill_command_handle(ur_exp_command_buffer_handle_t CommandBuffer, + CUgraphNode Node, CUgraphNode SignalNode, + std::vector WaitNodes) + : ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode, + WaitNodes) {} + CommandType getCommandType() const noexcept override { + return CommandType::MemBufferFill; + } +}; + +struct usm_prefetch_command_handle : ur_exp_command_buffer_command_handle_t_ { + usm_prefetch_command_handle(ur_exp_command_buffer_handle_t CommandBuffer, + CUgraphNode Node, CUgraphNode SignalNode, + std::vector WaitNodes) + : ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode, + WaitNodes) {} + CommandType getCommandType() const noexcept override { + return CommandType::USMPrefetch; + } +}; + +struct usm_advise_command_handle : ur_exp_command_buffer_command_handle_t_ { + usm_advise_command_handle(ur_exp_command_buffer_handle_t CommandBuffer, + CUgraphNode Node, CUgraphNode SignalNode, + std::vector WaitNodes) + : ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode, + WaitNodes) {} + CommandType getCommandType() const noexcept override { + return CommandType::USMAdvise; + } }; struct ur_exp_command_buffer_handle_t_ { @@ -115,7 +277,7 @@ struct ur_exp_command_buffer_handle_t_ { ur_exp_command_buffer_handle_t_(ur_context_handle_t Context, ur_device_handle_t Device, bool IsUpdatable); - ~ur_exp_command_buffer_handle_t_(); + virtual ~ur_exp_command_buffer_handle_t_(); void registerSyncPoint(ur_exp_command_buffer_sync_point_t SyncPoint, CUgraphNode CuNode) { @@ -127,6 +289,24 @@ struct ur_exp_command_buffer_handle_t_ { return NextSyncPoint; } + // Creates a cuEvent object and adds a cuGraphAddEventRecordNode node to the + // graph. + // @param[in] DepNode Node for the EventRecord node to depend on. + // @param[out] SignalNode Node created by cuGraphAddEventRecordNode. + // @return UR event backed by CuEvent object that will be recorded to. + std::unique_ptr addSignalNode(CUgraphNode DepNode, + CUgraphNode &SignalNode); + + // Adds a cuGraphAddEventWaitNodes node to the graph + // @param[in,out] Dependencies for each of the wait nodes created. Set to the + // list of wait nodes created on success. + // @param[in] NumEventsInWaitList Number of wait nodes to create. + // @param[in] UR events wrapping the cuEvent objects the nodes will wait on. + // @returns UR_RESULT_SUCCESS or an error + ur_result_t addWaitNodes(std::vector &DepsList, + uint32_t NumEventsInWaitList, + const ur_event_handle_t *EventWaitList); + // Helper to register next sync point // @param CuNode Node to register as next sync point // @return Pointer to the sync that registers the Node diff --git a/source/adapters/cuda/device.cpp b/source/adapters/cuda/device.cpp index bbaaa27cdb..441e31bf58 100644 --- a/source/adapters/cuda/device.cpp +++ b/source/adapters/cuda/device.cpp @@ -1094,6 +1094,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: + case UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP: return ReturnValue(true); case UR_DEVICE_INFO_CLUSTER_LAUNCH_EXP: { int Value = getAttribute(hDevice, diff --git a/source/adapters/cuda/ur_interface_loader.cpp b/source/adapters/cuda/ur_interface_loader.cpp index bb3fb9aee5..00b32395e7 100644 --- a/source/adapters/cuda/ur_interface_loader.cpp +++ b/source/adapters/cuda/ur_interface_loader.cpp @@ -301,7 +301,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnCommandGetInfoExp = urCommandBufferCommandGetInfoExp; pDdiTable->pfnReleaseCommandExp = urCommandBufferReleaseCommandExp; pDdiTable->pfnRetainCommandExp = urCommandBufferRetainCommandExp; - + pDdiTable->pfnUpdateWaitEventsExp = urCommandBufferUpdateWaitEventsExp; + pDdiTable->pfnUpdateSignalEventExp = urCommandBufferUpdateSignalEventExp; return retVal; } diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index 4ff38626af..d659f8eedd 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -314,8 +314,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, ur_exp_command_buffer_command_handle_t *phCommand) { + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; // Preconditions UR_ASSERT(hCommandBuffer->Context == hKernel->getContext(), UR_RESULT_ERROR_INVALID_KERNEL); @@ -408,7 +412,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t hCommandBuffer, void *pDst, const void *pSrc, size_t size, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, + ur_exp_command_buffer_command_handle_t *phCommand) { + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + std::ignore = phCommand; hipGraphNode_t GraphNode; std::vector DepsList; @@ -439,7 +449,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_mem_handle_t hDstMem, size_t srcOffset, size_t dstOffset, size_t size, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, + ur_exp_command_buffer_command_handle_t *phCommand) { + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + std::ignore = phCommand; hipGraphNode_t GraphNode; std::vector DepsList; @@ -481,7 +497,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, + ur_exp_command_buffer_command_handle_t *phCommand) { + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + std::ignore = phCommand; hipGraphNode_t GraphNode; std::vector DepsList; @@ -523,7 +545,13 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( size_t offset, size_t size, const void *pSrc, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, + ur_exp_command_buffer_command_handle_t *phCommand) { + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + std::ignore = phCommand; hipGraphNode_t GraphNode; std::vector DepsList; @@ -557,7 +585,13 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, size_t offset, size_t size, void *pDst, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, + ur_exp_command_buffer_command_handle_t *phCommand) { + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + std::ignore = phCommand; hipGraphNode_t GraphNode; std::vector DepsList; @@ -594,7 +628,13 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( size_t hostRowPitch, size_t hostSlicePitch, void *pSrc, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, + ur_exp_command_buffer_command_handle_t *phCommand) { + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + std::ignore = phCommand; hipGraphNode_t GraphNode; std::vector DepsList; @@ -636,7 +676,13 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( size_t hostRowPitch, size_t hostSlicePitch, void *pDst, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, + ur_exp_command_buffer_command_handle_t *phCommand) { + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + std::ignore = phCommand; hipGraphNode_t GraphNode; std::vector DepsList; @@ -675,7 +721,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( size_t /*Size*/, ur_usm_migration_flags_t /*Flags*/, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, + ur_exp_command_buffer_command_handle_t *phCommand) { + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + std::ignore = phCommand; // Prefetch cmd is not supported by Hip Graph. // We implement it as an empty node to enforce dependencies. hipGraphNode_t GraphNode; @@ -708,7 +760,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( size_t /*Size*/, ur_usm_advice_flags_t /*Advice*/, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, + ur_exp_command_buffer_command_handle_t *phCommand) { + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + std::ignore = phCommand; // Mem-Advise cmd is not supported by Hip Graph. // We implement it as an empty node to enforce dependencies. hipGraphNode_t GraphNode; @@ -741,7 +799,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( const void *pPattern, size_t patternSize, size_t offset, size_t size, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, + ur_exp_command_buffer_command_handle_t *phCommand) { + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + std::ignore = phCommand; auto ArgsAreMultiplesOfPatternSize = (offset % patternSize == 0) || (size % patternSize == 0); @@ -768,7 +832,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( const void *pPattern, size_t patternSize, size_t size, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, + ur_exp_command_buffer_command_handle_t *phCommand) { + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + std::ignore = phCommand; auto PatternIsValid = (pPattern != nullptr); @@ -984,6 +1054,23 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( return UR_RESULT_SUCCESS; } +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateSignalEventExp( + ur_exp_command_buffer_command_handle_t hCommand, + ur_event_handle_t *phEvent) { + std::ignore = hCommand; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateWaitEventsExp( + ur_exp_command_buffer_command_handle_t hCommand, + uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventWaitList) { + std::ignore = hCommand; + std::ignore = NumEventsInWaitList; + std::ignore = phEventWaitList; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_exp_command_buffer_info_t propName, size_t propSize, void *pPropValue, diff --git a/source/adapters/hip/device.cpp b/source/adapters/hip/device.cpp index 3ae98e929d..07049a3ec5 100644 --- a/source/adapters/hip/device.cpp +++ b/source/adapters/hip/device.cpp @@ -917,6 +917,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, const int CmdBufDriverMinVersion = 50530202; // ROCM 5.5.1 return ReturnValue(DriverVersion >= CmdBufDriverMinVersion); } + case UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP: + return ReturnValue(false); default: break; } diff --git a/source/adapters/hip/ur_interface_loader.cpp b/source/adapters/hip/ur_interface_loader.cpp index af9b8fa9c3..1454ddfdf1 100644 --- a/source/adapters/hip/ur_interface_loader.cpp +++ b/source/adapters/hip/ur_interface_loader.cpp @@ -298,6 +298,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnCommandGetInfoExp = urCommandBufferCommandGetInfoExp; pDdiTable->pfnReleaseCommandExp = urCommandBufferReleaseCommandExp; pDdiTable->pfnRetainCommandExp = urCommandBufferRetainCommandExp; + pDdiTable->pfnUpdateWaitEventsExp = urCommandBufferUpdateWaitEventsExp; + pDdiTable->pfnUpdateSignalEventExp = urCommandBufferUpdateSignalEventExp; return retVal; } diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index e507730888..5ca0efdbd3 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -628,7 +628,6 @@ urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t CommandBuffer) { } namespace { - /** * Sets the global offset for a kernel command that will be appended to the * command buffer. @@ -738,8 +737,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const size_t *GlobalWorkSize, const size_t *LocalWorkSize, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, - ur_exp_command_buffer_sync_point_t *RetSyncPoint, + uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, + ur_exp_command_buffer_sync_point_t *RetSyncPoint, ur_event_handle_t *Event, ur_exp_command_buffer_command_handle_t *Command) { + std::ignore = NumEventsInWaitList; + std::ignore = EventWaitList; + std::ignore = Event; + UR_ASSERT(Kernel->Program, UR_RESULT_ERROR_INVALID_NULL_POINTER); // Lock automatically releases when this goes out of scope. @@ -794,7 +798,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t CommandBuffer, void *Dst, const void *Src, size_t Size, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, - ur_exp_command_buffer_sync_point_t *SyncPoint) { + uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, + ur_exp_command_buffer_sync_point_t *SyncPoint, ur_event_handle_t *Event, + ur_exp_command_buffer_command_handle_t *Command) { + std::ignore = NumEventsInWaitList; + std::ignore = EventWaitList; + std::ignore = Event; + std::ignore = Command; bool PreferCopyEngine = !IsDevicePointer(CommandBuffer->Context, Src) || !IsDevicePointer(CommandBuffer->Context, Dst); @@ -817,7 +827,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_mem_handle_t DstMem, size_t SrcOffset, size_t DstOffset, size_t Size, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, - ur_exp_command_buffer_sync_point_t *SyncPoint) { + uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, + ur_exp_command_buffer_sync_point_t *SyncPoint, ur_event_handle_t *Event, + ur_exp_command_buffer_command_handle_t *Command) { + std::ignore = NumEventsInWaitList; + std::ignore = EventWaitList; + std::ignore = Event; + std::ignore = Command; auto SrcBuffer = ur_cast<_ur_buffer *>(SrcMem); auto DstBuffer = ur_cast<_ur_buffer *>(DstMem); @@ -849,7 +865,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( size_t SrcSlicePitch, size_t DstRowPitch, size_t DstSlicePitch, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, - ur_exp_command_buffer_sync_point_t *SyncPoint) { + uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, + ur_exp_command_buffer_sync_point_t *SyncPoint, ur_event_handle_t *Event, + ur_exp_command_buffer_command_handle_t *Command) { + std::ignore = NumEventsInWaitList; + std::ignore = EventWaitList; + std::ignore = Event; + std::ignore = Command; auto SrcBuffer = ur_cast<_ur_buffer *>(SrcMem); auto DstBuffer = ur_cast<_ur_buffer *>(DstMem); @@ -880,7 +902,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( size_t Offset, size_t Size, const void *Src, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, - ur_exp_command_buffer_sync_point_t *SyncPoint) { + uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, + ur_exp_command_buffer_sync_point_t *SyncPoint, ur_event_handle_t *Event, + ur_exp_command_buffer_command_handle_t *Command) { + std::ignore = NumEventsInWaitList; + std::ignore = EventWaitList; + std::ignore = Event; + std::ignore = Command; std::scoped_lock Lock(Buffer->Mutex); char *ZeHandleDst = nullptr; @@ -904,7 +932,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( size_t HostRowPitch, size_t HostSlicePitch, void *Src, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, - ur_exp_command_buffer_sync_point_t *SyncPoint) { + uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, + ur_exp_command_buffer_sync_point_t *SyncPoint, ur_event_handle_t *Event, + ur_exp_command_buffer_command_handle_t *Command) { + std::ignore = NumEventsInWaitList; + std::ignore = EventWaitList; + std::ignore = Event; + std::ignore = Command; std::scoped_lock Lock(Buffer->Mutex); char *ZeHandleDst = nullptr; @@ -926,7 +960,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t Buffer, size_t Offset, size_t Size, void *Dst, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, - ur_exp_command_buffer_sync_point_t *SyncPoint) { + uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, + ur_exp_command_buffer_sync_point_t *SyncPoint, ur_event_handle_t *Event, + ur_exp_command_buffer_command_handle_t *Command) { + std::ignore = NumEventsInWaitList; + std::ignore = EventWaitList; + std::ignore = Event; + std::ignore = Command; std::scoped_lock SrcLock(Buffer->Mutex); char *ZeHandleSrc = nullptr; @@ -949,7 +989,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( size_t HostRowPitch, size_t HostSlicePitch, void *Dst, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, - ur_exp_command_buffer_sync_point_t *SyncPoint) { + uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, + ur_exp_command_buffer_sync_point_t *SyncPoint, ur_event_handle_t *Event, + ur_exp_command_buffer_command_handle_t *Command) { + std::ignore = NumEventsInWaitList; + std::ignore = EventWaitList; + std::ignore = Event; + std::ignore = Command; std::scoped_lock SrcLock(Buffer->Mutex); char *ZeHandleSrc; @@ -970,7 +1016,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( ur_exp_command_buffer_handle_t CommandBuffer, const void *Mem, size_t Size, ur_usm_migration_flags_t Flags, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, - ur_exp_command_buffer_sync_point_t *RetSyncPoint) { + uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, + ur_exp_command_buffer_sync_point_t *RetSyncPoint, ur_event_handle_t *Event, + ur_exp_command_buffer_command_handle_t *Command) { + std::ignore = NumEventsInWaitList; + std::ignore = EventWaitList; + std::ignore = Event; + std::ignore = Command; std::ignore = Flags; if (CommandBuffer->IsInOrderCmdList) { @@ -1009,7 +1061,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( ur_exp_command_buffer_handle_t CommandBuffer, const void *Mem, size_t Size, ur_usm_advice_flags_t Advice, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, - ur_exp_command_buffer_sync_point_t *RetSyncPoint) { + uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, + ur_exp_command_buffer_sync_point_t *RetSyncPoint, ur_event_handle_t *Event, + ur_exp_command_buffer_command_handle_t *Command) { + std::ignore = NumEventsInWaitList; + std::ignore = EventWaitList; + std::ignore = Event; + std::ignore = Command; // A memory chunk can be advised with muliple memory advices // We therefore prefer if statements to switch cases to combine all potential // flags @@ -1072,7 +1130,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( const void *Pattern, size_t PatternSize, size_t Offset, size_t Size, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, - ur_exp_command_buffer_sync_point_t *SyncPoint) { + uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, + ur_exp_command_buffer_sync_point_t *SyncPoint, ur_event_handle_t *Event, + ur_exp_command_buffer_command_handle_t *Command) { + std::ignore = NumEventsInWaitList; + std::ignore = EventWaitList; + std::ignore = Event; + std::ignore = Command; std::scoped_lock Lock(Buffer->Mutex); @@ -1093,7 +1157,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( const void *Pattern, size_t PatternSize, size_t Size, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, - ur_exp_command_buffer_sync_point_t *SyncPoint) { + uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, + ur_exp_command_buffer_sync_point_t *SyncPoint, ur_event_handle_t *Event, + ur_exp_command_buffer_command_handle_t *Command) { + std::ignore = NumEventsInWaitList; + std::ignore = EventWaitList; + std::ignore = Event; + std::ignore = Command; return enqueueCommandBufferFillHelper( UR_COMMAND_MEM_BUFFER_FILL, CommandBuffer, Ptr, @@ -1653,6 +1723,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( return UR_RESULT_SUCCESS; } +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateSignalEventExp( + ur_exp_command_buffer_command_handle_t Command, ur_event_handle_t *Event) { + std::ignore = Command; + std::ignore = Event; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateWaitEventsExp( + ur_exp_command_buffer_command_handle_t Command, + uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList) { + std::ignore = Command; + std::ignore = NumEventsInWaitList; + std::ignore = EventWaitList; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_exp_command_buffer_info_t propName, size_t propSize, void *pPropValue, diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index 08f13268eb..ef6365f1ca 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -949,13 +949,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: return ReturnValue(true); case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: { - // Update support requires being able to update kernel arguments and all - // aspects of the kernel NDRange. + // Update support requires being able to update kernel arguments, all + // aspects of the kernel NDRange, as well as signal & wait events. const ze_mutable_command_exp_flags_t UpdateMask = ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS | ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_COUNT | ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_SIZE | - ZE_MUTABLE_COMMAND_EXP_FLAG_GLOBAL_OFFSET; + ZE_MUTABLE_COMMAND_EXP_FLAG_GLOBAL_OFFSET | + ZE_MUTABLE_COMMAND_EXP_FLAG_SIGNAL_EVENT | + ZE_MUTABLE_COMMAND_EXP_FLAG_WAIT_EVENTS; const bool KernelArgUpdateSupport = (Device->ZeDeviceMutableCmdListsProperties->mutableCommandFlags & @@ -963,6 +965,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( return ReturnValue(KernelArgUpdateSupport && Device->Platform->ZeMutableCmdListExt.Supported); } + case UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP: + return ReturnValue(false); case UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP: { // On L0 bindless images are supported. return ReturnValue(true); diff --git a/source/adapters/level_zero/ur_interface_loader.cpp b/source/adapters/level_zero/ur_interface_loader.cpp index 8941f756ea..fe85f035e5 100644 --- a/source/adapters/level_zero/ur_interface_loader.cpp +++ b/source/adapters/level_zero/ur_interface_loader.cpp @@ -348,6 +348,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnCommandGetInfoExp = urCommandBufferCommandGetInfoExp; pDdiTable->pfnReleaseCommandExp = urCommandBufferReleaseCommandExp; pDdiTable->pfnRetainCommandExp = urCommandBufferRetainCommandExp; + pDdiTable->pfnUpdateWaitEventsExp = urCommandBufferUpdateWaitEventsExp; + pDdiTable->pfnUpdateSignalEventExp = urCommandBufferUpdateSignalEventExp; return retVal; } diff --git a/source/adapters/mock/ur_mockddi.cpp b/source/adapters/mock/ur_mockddi.cpp index 20d9cc5bed..103e4bb644 100644 --- a/source/adapters/mock/ur_mockddi.cpp +++ b/source/adapters/mock/ur_mockddi.cpp @@ -8355,8 +8355,17 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. ur_exp_command_buffer_command_handle_t *phCommand ///< [out][optional] Handle to this command. ) try { @@ -8371,7 +8380,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( &pLocalWorkSize, &numSyncPointsInWaitList, &pSyncPointWaitList, + &numEventsInWaitList, + &phEventWaitList, &pSyncPoint, + &phEvent, &phCommand}; auto beforeCallback = reinterpret_cast( @@ -8391,6 +8403,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( result = replaceCallback(¶ms); } else { + // optional output handle + if (phEvent) { + *phEvent = mock::createDummyHandle(); + } // optional output handle if (phCommand) { *phCommand = mock::createDummyHandle< @@ -8428,14 +8444,34 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; ur_command_buffer_append_usm_memcpy_exp_params_t params = { - &hCommandBuffer, &pDst, &pSrc, &size, &numSyncPointsInWaitList, - &pSyncPointWaitList, &pSyncPoint}; + &hCommandBuffer, + &pDst, + &pSrc, + &size, + &numSyncPointsInWaitList, + &pSyncPointWaitList, + &numEventsInWaitList, + &phEventWaitList, + &pSyncPoint, + &phEvent, + &phCommand}; auto beforeCallback = reinterpret_cast( mock::getCallbacks().get_before_callback( @@ -8454,6 +8490,15 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( result = replaceCallback(¶ms); } else { + // optional output handle + if (phEvent) { + *phEvent = mock::createDummyHandle(); + } + // optional output handle + if (phCommand) { + *phCommand = mock::createDummyHandle< + ur_exp_command_buffer_command_handle_t>(); + } result = UR_RESULT_SUCCESS; } @@ -8488,15 +8533,35 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] sync point associated with this command. + pSyncPoint, ///< [out][optional] sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; ur_command_buffer_append_usm_fill_exp_params_t params = { - &hCommandBuffer, &pMemory, &pPattern, - &patternSize, &size, &numSyncPointsInWaitList, - &pSyncPointWaitList, &pSyncPoint}; + &hCommandBuffer, + &pMemory, + &pPattern, + &patternSize, + &size, + &numSyncPointsInWaitList, + &pSyncPointWaitList, + &numEventsInWaitList, + &phEventWaitList, + &pSyncPoint, + &phEvent, + &phCommand}; auto beforeCallback = reinterpret_cast( mock::getCallbacks().get_before_callback( @@ -8515,6 +8580,15 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( result = replaceCallback(¶ms); } else { + // optional output handle + if (phEvent) { + *phEvent = mock::createDummyHandle(); + } + // optional output handle + if (phCommand) { + *phCommand = mock::createDummyHandle< + ur_exp_command_buffer_command_handle_t>(); + } result = UR_RESULT_SUCCESS; } @@ -8549,8 +8623,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -8563,7 +8648,11 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( &size, &numSyncPointsInWaitList, &pSyncPointWaitList, - &pSyncPoint}; + &numEventsInWaitList, + &phEventWaitList, + &pSyncPoint, + &phEvent, + &phCommand}; auto beforeCallback = reinterpret_cast( mock::getCallbacks().get_before_callback( @@ -8582,6 +8671,15 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( result = replaceCallback(¶ms); } else { + // optional output handle + if (phEvent) { + *phEvent = mock::createDummyHandle(); + } + // optional output handle + if (phCommand) { + *phCommand = mock::createDummyHandle< + ur_exp_command_buffer_command_handle_t>(); + } result = UR_RESULT_SUCCESS; } @@ -8616,8 +8714,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -8629,7 +8738,11 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( &pSrc, &numSyncPointsInWaitList, &pSyncPointWaitList, - &pSyncPoint}; + &numEventsInWaitList, + &phEventWaitList, + &pSyncPoint, + &phEvent, + &phCommand}; auto beforeCallback = reinterpret_cast( mock::getCallbacks().get_before_callback( @@ -8648,6 +8761,15 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( result = replaceCallback(¶ms); } else { + // optional output handle + if (phEvent) { + *phEvent = mock::createDummyHandle(); + } + // optional output handle + if (phCommand) { + *phCommand = mock::createDummyHandle< + ur_exp_command_buffer_command_handle_t>(); + } result = UR_RESULT_SUCCESS; } @@ -8681,8 +8803,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -8694,7 +8827,11 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( &pDst, &numSyncPointsInWaitList, &pSyncPointWaitList, - &pSyncPoint}; + &numEventsInWaitList, + &phEventWaitList, + &pSyncPoint, + &phEvent, + &phCommand}; auto beforeCallback = reinterpret_cast( mock::getCallbacks().get_before_callback( @@ -8713,6 +8850,15 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( result = replaceCallback(¶ms); } else { + // optional output handle + if (phEvent) { + *phEvent = mock::createDummyHandle(); + } + // optional output handle + if (phCommand) { + *phCommand = mock::createDummyHandle< + ur_exp_command_buffer_command_handle_t>(); + } result = UR_RESULT_SUCCESS; } @@ -8754,8 +8900,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -8772,7 +8929,11 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( &dstSlicePitch, &numSyncPointsInWaitList, &pSyncPointWaitList, - &pSyncPoint}; + &numEventsInWaitList, + &phEventWaitList, + &pSyncPoint, + &phEvent, + &phCommand}; auto beforeCallback = reinterpret_cast( mock::getCallbacks().get_before_callback( @@ -8791,6 +8952,15 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( result = replaceCallback(¶ms); } else { + // optional output handle + if (phEvent) { + *phEvent = mock::createDummyHandle(); + } + // optional output handle + if (phCommand) { + *phCommand = mock::createDummyHandle< + ur_exp_command_buffer_command_handle_t>(); + } result = UR_RESULT_SUCCESS; } @@ -8838,8 +9008,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -8856,7 +9037,11 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( &pSrc, &numSyncPointsInWaitList, &pSyncPointWaitList, - &pSyncPoint}; + &numEventsInWaitList, + &phEventWaitList, + &pSyncPoint, + &phEvent, + &phCommand}; auto beforeCallback = reinterpret_cast( mock::getCallbacks().get_before_callback( @@ -8875,6 +9060,15 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( result = replaceCallback(¶ms); } else { + // optional output handle + if (phEvent) { + *phEvent = mock::createDummyHandle(); + } + // optional output handle + if (phCommand) { + *phCommand = mock::createDummyHandle< + ur_exp_command_buffer_command_handle_t>(); + } result = UR_RESULT_SUCCESS; } @@ -8920,8 +9114,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -8938,7 +9143,11 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( &pDst, &numSyncPointsInWaitList, &pSyncPointWaitList, - &pSyncPoint}; + &numEventsInWaitList, + &phEventWaitList, + &pSyncPoint, + &phEvent, + &phCommand}; auto beforeCallback = reinterpret_cast( mock::getCallbacks().get_before_callback( @@ -8957,6 +9166,15 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( result = replaceCallback(¶ms); } else { + // optional output handle + if (phEvent) { + *phEvent = mock::createDummyHandle(); + } + // optional output handle + if (phCommand) { + *phCommand = mock::createDummyHandle< + ur_exp_command_buffer_command_handle_t>(); + } result = UR_RESULT_SUCCESS; } @@ -8992,8 +9210,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] sync point associated with this command. + pSyncPoint, ///< [out][optional] sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -9006,7 +9235,11 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( &size, &numSyncPointsInWaitList, &pSyncPointWaitList, - &pSyncPoint}; + &numEventsInWaitList, + &phEventWaitList, + &pSyncPoint, + &phEvent, + &phCommand}; auto beforeCallback = reinterpret_cast( mock::getCallbacks().get_before_callback( @@ -9025,6 +9258,15 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( result = replaceCallback(¶ms); } else { + // optional output handle + if (phEvent) { + *phEvent = mock::createDummyHandle(); + } + // optional output handle + if (phCommand) { + *phCommand = mock::createDummyHandle< + ur_exp_command_buffer_command_handle_t>(); + } result = UR_RESULT_SUCCESS; } @@ -9057,8 +9299,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] sync point associated with this command. + pSyncPoint, ///< [out][optional] sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -9069,7 +9322,11 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( &flags, &numSyncPointsInWaitList, &pSyncPointWaitList, - &pSyncPoint}; + &numEventsInWaitList, + &phEventWaitList, + &pSyncPoint, + &phEvent, + &phCommand}; auto beforeCallback = reinterpret_cast( mock::getCallbacks().get_before_callback( @@ -9088,6 +9345,15 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( result = replaceCallback(¶ms); } else { + // optional output handle + if (phEvent) { + *phEvent = mock::createDummyHandle(); + } + // optional output handle + if (phCommand) { + *phCommand = mock::createDummyHandle< + ur_exp_command_buffer_command_handle_t>(); + } result = UR_RESULT_SUCCESS; } @@ -9120,8 +9386,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] sync point associated with this command. + pSyncPoint, ///< [out][optional] sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -9132,7 +9409,11 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( &advice, &numSyncPointsInWaitList, &pSyncPointWaitList, - &pSyncPoint}; + &numEventsInWaitList, + &phEventWaitList, + &pSyncPoint, + &phEvent, + &phCommand}; auto beforeCallback = reinterpret_cast( mock::getCallbacks().get_before_callback( @@ -9151,6 +9432,15 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( result = replaceCallback(¶ms); } else { + // optional output handle + if (phEvent) { + *phEvent = mock::createDummyHandle(); + } + // optional output handle + if (phCommand) { + *phCommand = mock::createDummyHandle< + ur_exp_command_buffer_command_handle_t>(); + } result = UR_RESULT_SUCCESS; } @@ -9372,6 +9662,107 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferUpdateSignalEventExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateSignalEventExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer command to update. + ur_event_handle_t *phSignalEvent ///< [out] Event to be signaled. + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + ur_command_buffer_update_signal_event_exp_params_t params = { + &hCommand, &phSignalEvent}; + + auto beforeCallback = reinterpret_cast( + mock::getCallbacks().get_before_callback( + "urCommandBufferUpdateSignalEventExp")); + if (beforeCallback) { + result = beforeCallback(¶ms); + if (result != UR_RESULT_SUCCESS) { + return result; + } + } + + auto replaceCallback = reinterpret_cast( + mock::getCallbacks().get_replace_callback( + "urCommandBufferUpdateSignalEventExp")); + if (replaceCallback) { + result = replaceCallback(¶ms); + } else { + + *phSignalEvent = mock::createDummyHandle(); + result = UR_RESULT_SUCCESS; + } + + if (result != UR_RESULT_SUCCESS) { + return result; + } + + auto afterCallback = reinterpret_cast( + mock::getCallbacks().get_after_callback( + "urCommandBufferUpdateSignalEventExp")); + if (afterCallback) { + return afterCallback(¶ms); + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferUpdateWaitEventsExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateWaitEventsExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer command to update. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + ur_command_buffer_update_wait_events_exp_params_t params = { + &hCommand, &numEventsInWaitList, &phEventWaitList}; + + auto beforeCallback = reinterpret_cast( + mock::getCallbacks().get_before_callback( + "urCommandBufferUpdateWaitEventsExp")); + if (beforeCallback) { + result = beforeCallback(¶ms); + if (result != UR_RESULT_SUCCESS) { + return result; + } + } + + auto replaceCallback = reinterpret_cast( + mock::getCallbacks().get_replace_callback( + "urCommandBufferUpdateWaitEventsExp")); + if (replaceCallback) { + result = replaceCallback(¶ms); + } else { + + result = UR_RESULT_SUCCESS; + } + + if (result != UR_RESULT_SUCCESS) { + return result; + } + + auto afterCallback = reinterpret_cast( + mock::getCallbacks().get_after_callback( + "urCommandBufferUpdateWaitEventsExp")); + if (afterCallback) { + return afterCallback(¶ms); + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferGetInfoExp __urdlllocal ur_result_t UR_APICALL urCommandBufferGetInfoExp( @@ -10424,6 +10815,12 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnUpdateKernelLaunchExp = driver::urCommandBufferUpdateKernelLaunchExp; + pDdiTable->pfnUpdateSignalEventExp = + driver::urCommandBufferUpdateSignalEventExp; + + pDdiTable->pfnUpdateWaitEventsExp = + driver::urCommandBufferUpdateWaitEventsExp; + pDdiTable->pfnGetInfoExp = driver::urCommandBufferGetInfoExp; pDdiTable->pfnCommandGetInfoExp = driver::urCommandBufferCommandGetInfoExp; diff --git a/source/adapters/native_cpu/command_buffer.cpp b/source/adapters/native_cpu/command_buffer.cpp index fde6c03b86..50d7dc2687 100644 --- a/source/adapters/native_cpu/command_buffer.cpp +++ b/source/adapters/native_cpu/command_buffer.cpp @@ -49,9 +49,9 @@ urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t) { UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t, ur_kernel_handle_t, uint32_t, const size_t *, const size_t *, const size_t *, uint32_t, - const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_command_handle_t *) { + const ur_exp_command_buffer_sync_point_t *, uint32_t, + const ur_event_handle_t *, ur_exp_command_buffer_sync_point_t *, + ur_event_handle_t *, ur_exp_command_buffer_command_handle_t *) { detail::ur::die("Experimental Command-buffer feature is not " "implemented for the NativeCPU adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; @@ -59,8 +59,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t, void *, const void *, size_t, uint32_t, - const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *) { + const ur_exp_command_buffer_sync_point_t *, uint32_t, + const ur_event_handle_t *, ur_exp_command_buffer_sync_point_t *, + ur_event_handle_t *, ur_exp_command_buffer_command_handle_t *) { detail::ur::die("Experimental Command-buffer feature is not " "implemented for the NativeCPU adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; @@ -69,7 +70,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, size_t, size_t, size_t, uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *) { + uint32_t, const ur_event_handle_t *, ur_exp_command_buffer_sync_point_t *, + ur_event_handle_t *, ur_exp_command_buffer_command_handle_t *) { detail::ur::die("Experimental Command-buffer feature is not " "implemented for the NativeCPU adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; @@ -79,7 +81,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, size_t, size_t, size_t, size_t, uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *) { + uint32_t, const ur_event_handle_t *, ur_exp_command_buffer_sync_point_t *, + ur_event_handle_t *, ur_exp_command_buffer_command_handle_t *) { detail::ur::die("Experimental Command-buffer feature is not " "implemented for the NativeCPU adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; @@ -89,7 +92,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t, ur_mem_handle_t, size_t, size_t, const void *, uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *) { + uint32_t, const ur_event_handle_t *, ur_exp_command_buffer_sync_point_t *, + ur_event_handle_t *, ur_exp_command_buffer_command_handle_t *) { detail::ur::die("Experimental Command-buffer feature is not " "implemented for the NativeCPU adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; @@ -98,8 +102,9 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t, ur_mem_handle_t, size_t, size_t, void *, - uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *) { + uint32_t, const ur_exp_command_buffer_sync_point_t *, uint32_t, + const ur_event_handle_t *, ur_exp_command_buffer_sync_point_t *, + ur_event_handle_t *, ur_exp_command_buffer_command_handle_t *) { detail::ur::die("Experimental Command-buffer feature is not " "implemented for the NativeCPU adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; @@ -109,8 +114,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, size_t, size_t, size_t, size_t, void *, - uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *) { + uint32_t, const ur_exp_command_buffer_sync_point_t *, uint32_t, + const ur_event_handle_t *, ur_exp_command_buffer_sync_point_t *, + ur_event_handle_t *, ur_exp_command_buffer_command_handle_t *) { detail::ur::die("Experimental Command-buffer feature is not " "implemented for the NativeCPU adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; @@ -120,8 +126,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t, ur_mem_handle_t, ur_rect_offset_t, ur_rect_offset_t, ur_rect_region_t, size_t, size_t, size_t, size_t, void *, - uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *) { + uint32_t, const ur_exp_command_buffer_sync_point_t *, uint32_t, + const ur_event_handle_t *, ur_exp_command_buffer_sync_point_t *, + ur_event_handle_t *, ur_exp_command_buffer_command_handle_t *) { detail::ur::die("Experimental Command-buffer feature is not " "implemented for the NativeCPU adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; @@ -138,29 +145,33 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( ur_exp_command_buffer_handle_t, ur_mem_handle_t, const void *, size_t, size_t, size_t, uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *) { + uint32_t, const ur_event_handle_t *, ur_exp_command_buffer_sync_point_t *, + ur_event_handle_t *, ur_exp_command_buffer_command_handle_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( ur_exp_command_buffer_handle_t, void *, const void *, size_t, size_t, - uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *) { + uint32_t, const ur_exp_command_buffer_sync_point_t *, uint32_t, + const ur_event_handle_t *, ur_exp_command_buffer_sync_point_t *, + ur_event_handle_t *, ur_exp_command_buffer_command_handle_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( ur_exp_command_buffer_handle_t, const void *, size_t, ur_usm_migration_flags_t, uint32_t, - const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *) { + const ur_exp_command_buffer_sync_point_t *, uint32_t, + const ur_event_handle_t *, ur_exp_command_buffer_sync_point_t *, + ur_event_handle_t *, ur_exp_command_buffer_command_handle_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( ur_exp_command_buffer_handle_t, const void *, size_t, ur_usm_advice_flags_t, - uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *) { + uint32_t, const ur_exp_command_buffer_sync_point_t *, uint32_t, + const ur_event_handle_t *, ur_exp_command_buffer_sync_point_t *, + ur_event_handle_t *, ur_exp_command_buffer_command_handle_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -180,6 +191,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateSignalEventExp( + ur_exp_command_buffer_command_handle_t, ur_event_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferUpdateWaitEventsExp(ur_exp_command_buffer_command_handle_t, + uint32_t, const ur_event_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( ur_exp_command_buffer_handle_t, ur_exp_command_buffer_info_t, size_t, void *, size_t *) { diff --git a/source/adapters/native_cpu/device.cpp b/source/adapters/native_cpu/device.cpp index c5652398e3..aafb577ef1 100644 --- a/source/adapters/native_cpu/device.cpp +++ b/source/adapters/native_cpu/device.cpp @@ -389,6 +389,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: + case UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP: return ReturnValue(false); case UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP: diff --git a/source/adapters/native_cpu/ur_interface_loader.cpp b/source/adapters/native_cpu/ur_interface_loader.cpp index ff6c9d8c0f..94c6c4a03e 100644 --- a/source/adapters/native_cpu/ur_interface_loader.cpp +++ b/source/adapters/native_cpu/ur_interface_loader.cpp @@ -290,6 +290,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnCommandGetInfoExp = urCommandBufferCommandGetInfoExp; pDdiTable->pfnReleaseCommandExp = urCommandBufferReleaseCommandExp; pDdiTable->pfnRetainCommandExp = urCommandBufferRetainCommandExp; + pDdiTable->pfnUpdateWaitEventsExp = urCommandBufferUpdateWaitEventsExp; + pDdiTable->pfnUpdateSignalEventExp = urCommandBufferUpdateSignalEventExp; return retVal; } diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 5698f36928..50b8e80df0 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -142,8 +142,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, ur_exp_command_buffer_command_handle_t *phCommandHandle) { + (void)numEventsInWaitList; + (void)phEventWaitList; + (void)phEvent; cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); cl_ext::clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR = nullptr; @@ -192,7 +196,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( [[maybe_unused]] uint32_t numSyncPointsInWaitList, [[maybe_unused]] const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint, + [[maybe_unused]] ur_event_handle_t *phEvent, + [[maybe_unused]] ur_exp_command_buffer_command_handle_t *phCommand) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -203,7 +211,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( [[maybe_unused]] uint32_t numSyncPointsInWaitList, [[maybe_unused]] const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint, + [[maybe_unused]] ur_event_handle_t *phEvent, + [[maybe_unused]] ur_exp_command_buffer_command_handle_t *phCommand) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -212,8 +224,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_mem_handle_t hDstMem, size_t srcOffset, size_t dstOffset, size_t size, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { - + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, + ur_exp_command_buffer_command_handle_t *phCommand) { + (void)numEventsInWaitList; + (void)phEventWaitList; + (void)phEvent; + (void)phCommand; cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); cl_ext::clCommandCopyBufferKHR_fn clCommandCopyBufferKHR = nullptr; UR_RETURN_ON_FAILURE( @@ -242,7 +259,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( [[maybe_unused]] uint32_t numSyncPointsInWaitList, [[maybe_unused]] const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint, + [[maybe_unused]] ur_event_handle_t *phEvent, + [[maybe_unused]] ur_exp_command_buffer_command_handle_t *phCommand) { size_t OpenCLOriginRect[3]{srcOrigin.x, srcOrigin.y, srcOrigin.z}; size_t OpenCLDstRect[3]{dstOrigin.x, dstOrigin.y, dstOrigin.z}; @@ -273,7 +294,11 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( [[maybe_unused]] uint32_t numSyncPointsInWaitList, [[maybe_unused]] const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint, + [[maybe_unused]] ur_event_handle_t *phEvent, + [[maybe_unused]] ur_exp_command_buffer_command_handle_t *phCommand) { cl_adapter::die("Experimental Command-buffer feature is not " "implemented for OpenCL adapter."); @@ -288,7 +313,11 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( [[maybe_unused]] uint32_t numSyncPointsInWaitList, [[maybe_unused]] const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint, + [[maybe_unused]] ur_event_handle_t *phEvent, + [[maybe_unused]] ur_exp_command_buffer_command_handle_t *phCommand) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -306,7 +335,11 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( [[maybe_unused]] uint32_t numSyncPointsInWaitList, [[maybe_unused]] const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint, + [[maybe_unused]] ur_event_handle_t *phEvent, + [[maybe_unused]] ur_exp_command_buffer_command_handle_t *phCommand) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -324,7 +357,11 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( [[maybe_unused]] uint32_t numSyncPointsInWaitList, [[maybe_unused]] const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint, + [[maybe_unused]] ur_event_handle_t *phEvent, + [[maybe_unused]] ur_exp_command_buffer_command_handle_t *phCommand) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -333,7 +370,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( const void *pPattern, size_t patternSize, size_t offset, size_t size, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, + [[maybe_unused]] ur_event_handle_t *phEvent, + [[maybe_unused]] ur_exp_command_buffer_command_handle_t *phCommand) { cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); cl_ext::clCommandFillBufferKHR_fn clCommandFillBufferKHR = nullptr; @@ -354,14 +395,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( ur_exp_command_buffer_handle_t hCommandBuffer, const void *mem, size_t size, ur_usm_migration_flags_t flags, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, + ur_exp_command_buffer_command_handle_t *phCommand) { (void)hCommandBuffer; (void)mem; (void)size; (void)flags; (void)numSyncPointsInWaitList; (void)pSyncPointWaitList; + (void)numEventsInWaitList; + (void)phEventWaitList; (void)pSyncPoint; + (void)phEvent; + (void)phCommand; // Not implemented return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; @@ -371,14 +418,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( ur_exp_command_buffer_handle_t hCommandBuffer, const void *mem, size_t size, ur_usm_advice_flags_t advice, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, + ur_exp_command_buffer_command_handle_t *phCommand) { (void)hCommandBuffer; (void)mem; (void)size; (void)advice; (void)numSyncPointsInWaitList; (void)pSyncPointWaitList; + (void)numEventsInWaitList; + (void)phEventWaitList; (void)pSyncPoint; + (void)phEvent; + (void)phCommand; // Not implemented return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; @@ -578,6 +631,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( return UR_RESULT_SUCCESS; } +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateSignalEventExp( + [[maybe_unused]] ur_exp_command_buffer_command_handle_t Command, + [[maybe_unused]] ur_event_handle_t *Event) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateWaitEventsExp( + [[maybe_unused]] ur_exp_command_buffer_command_handle_t Command, + [[maybe_unused]] uint32_t NumEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *EventWaitList) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_exp_command_buffer_info_t propName, size_t propSize, void *pPropValue, diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index a31d6580a0..cc6a98e149 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -1031,6 +1031,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, deviceSupportsURCommandBufferKernelUpdate(Dev, Supported)); return ReturnValue(Supported); } + case UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP: + return ReturnValue(false); default: { return UR_RESULT_ERROR_INVALID_ENUMERATION; } diff --git a/source/adapters/opencl/ur_interface_loader.cpp b/source/adapters/opencl/ur_interface_loader.cpp index 100bb888cf..17279cc429 100644 --- a/source/adapters/opencl/ur_interface_loader.cpp +++ b/source/adapters/opencl/ur_interface_loader.cpp @@ -308,6 +308,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnCommandGetInfoExp = urCommandBufferCommandGetInfoExp; pDdiTable->pfnReleaseCommandExp = urCommandBufferReleaseCommandExp; pDdiTable->pfnRetainCommandExp = urCommandBufferRetainCommandExp; + pDdiTable->pfnUpdateWaitEventsExp = urCommandBufferUpdateWaitEventsExp; + pDdiTable->pfnUpdateSignalEventExp = urCommandBufferUpdateSignalEventExp; return retVal; } diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index 7f37c23417..9e929a55e0 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -6499,8 +6499,17 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. ur_exp_command_buffer_command_handle_t *phCommand ///< [out][optional] Handle to this command. ) { @@ -6520,7 +6529,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( &pLocalWorkSize, &numSyncPointsInWaitList, &pSyncPointWaitList, + &numEventsInWaitList, + &phEventWaitList, &pSyncPoint, + &phEvent, &phCommand}; uint64_t instance = getContext()->notify_begin( UR_FUNCTION_COMMAND_BUFFER_APPEND_KERNEL_LAUNCH_EXP, @@ -6530,8 +6542,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_result_t result = pfnAppendKernelLaunchExp( hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, - phCommand); + pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, + numEventsInWaitList, phEventWaitList, pSyncPoint, phEvent, phCommand); getContext()->notify_end( UR_FUNCTION_COMMAND_BUFFER_APPEND_KERNEL_LAUNCH_EXP, @@ -6558,8 +6570,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { auto pfnAppendUSMMemcpyExp = getContext()->urDdiTable.CommandBufferExp.pfnAppendUSMMemcpyExp; @@ -6569,17 +6592,27 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( } ur_command_buffer_append_usm_memcpy_exp_params_t params = { - &hCommandBuffer, &pDst, &pSrc, &size, &numSyncPointsInWaitList, - &pSyncPointWaitList, &pSyncPoint}; + &hCommandBuffer, + &pDst, + &pSrc, + &size, + &numSyncPointsInWaitList, + &pSyncPointWaitList, + &numEventsInWaitList, + &phEventWaitList, + &pSyncPoint, + &phEvent, + &phCommand}; uint64_t instance = getContext()->notify_begin( UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP, "urCommandBufferAppendUSMMemcpyExp", ¶ms); getContext()->logger.info("---> urCommandBufferAppendUSMMemcpyExp"); - ur_result_t result = pfnAppendUSMMemcpyExp(hCommandBuffer, pDst, pSrc, size, - numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint); + ur_result_t result = pfnAppendUSMMemcpyExp( + hCommandBuffer, pDst, pSrc, size, numSyncPointsInWaitList, + pSyncPointWaitList, numEventsInWaitList, phEventWaitList, pSyncPoint, + phEvent, phCommand); getContext()->notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP, "urCommandBufferAppendUSMMemcpyExp", ¶ms, @@ -6608,8 +6641,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] sync point associated with this command. + pSyncPoint, ///< [out][optional] sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { auto pfnAppendUSMFillExp = getContext()->urDdiTable.CommandBufferExp.pfnAppendUSMFillExp; @@ -6619,9 +6663,18 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( } ur_command_buffer_append_usm_fill_exp_params_t params = { - &hCommandBuffer, &pMemory, &pPattern, - &patternSize, &size, &numSyncPointsInWaitList, - &pSyncPointWaitList, &pSyncPoint}; + &hCommandBuffer, + &pMemory, + &pPattern, + &patternSize, + &size, + &numSyncPointsInWaitList, + &pSyncPointWaitList, + &numEventsInWaitList, + &phEventWaitList, + &pSyncPoint, + &phEvent, + &phCommand}; uint64_t instance = getContext()->notify_begin( UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_FILL_EXP, "urCommandBufferAppendUSMFillExp", ¶ms); @@ -6630,7 +6683,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( ur_result_t result = pfnAppendUSMFillExp( hCommandBuffer, pMemory, pPattern, patternSize, size, - numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, + phEventWaitList, pSyncPoint, phEvent, phCommand); getContext()->notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_FILL_EXP, "urCommandBufferAppendUSMFillExp", ¶ms, @@ -6659,8 +6713,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { auto pfnAppendMemBufferCopyExp = getContext()->urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyExp; @@ -6678,7 +6743,11 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( &size, &numSyncPointsInWaitList, &pSyncPointWaitList, - &pSyncPoint}; + &numEventsInWaitList, + &phEventWaitList, + &pSyncPoint, + &phEvent, + &phCommand}; uint64_t instance = getContext()->notify_begin( UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_EXP, "urCommandBufferAppendMemBufferCopyExp", ¶ms); @@ -6687,7 +6756,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_result_t result = pfnAppendMemBufferCopyExp( hCommandBuffer, hSrcMem, hDstMem, srcOffset, dstOffset, size, - numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, + phEventWaitList, pSyncPoint, phEvent, phCommand); getContext()->notify_end( UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_EXP, @@ -6717,8 +6787,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { auto pfnAppendMemBufferWriteExp = getContext()->urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteExp; @@ -6735,7 +6816,11 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( &pSrc, &numSyncPointsInWaitList, &pSyncPointWaitList, - &pSyncPoint}; + &numEventsInWaitList, + &phEventWaitList, + &pSyncPoint, + &phEvent, + &phCommand}; uint64_t instance = getContext()->notify_begin( UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_EXP, "urCommandBufferAppendMemBufferWriteExp", ¶ms); @@ -6744,7 +6829,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_result_t result = pfnAppendMemBufferWriteExp( hCommandBuffer, hBuffer, offset, size, pSrc, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint); + pSyncPointWaitList, numEventsInWaitList, phEventWaitList, pSyncPoint, + phEvent, phCommand); getContext()->notify_end( UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_EXP, @@ -6773,8 +6859,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { auto pfnAppendMemBufferReadExp = getContext()->urDdiTable.CommandBufferExp.pfnAppendMemBufferReadExp; @@ -6791,7 +6888,11 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( &pDst, &numSyncPointsInWaitList, &pSyncPointWaitList, - &pSyncPoint}; + &numEventsInWaitList, + &phEventWaitList, + &pSyncPoint, + &phEvent, + &phCommand}; uint64_t instance = getContext()->notify_begin( UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_EXP, "urCommandBufferAppendMemBufferReadExp", ¶ms); @@ -6800,7 +6901,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_result_t result = pfnAppendMemBufferReadExp( hCommandBuffer, hBuffer, offset, size, pDst, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint); + pSyncPointWaitList, numEventsInWaitList, phEventWaitList, pSyncPoint, + phEvent, phCommand); getContext()->notify_end( UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_EXP, @@ -6837,8 +6939,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { auto pfnAppendMemBufferCopyRectExp = getContext()->urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyRectExp; @@ -6860,7 +6973,11 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( &dstSlicePitch, &numSyncPointsInWaitList, &pSyncPointWaitList, - &pSyncPoint}; + &numEventsInWaitList, + &phEventWaitList, + &pSyncPoint, + &phEvent, + &phCommand}; uint64_t instance = getContext()->notify_begin( UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_RECT_EXP, "urCommandBufferAppendMemBufferCopyRectExp", ¶ms); @@ -6870,7 +6987,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_result_t result = pfnAppendMemBufferCopyRectExp( hCommandBuffer, hSrcMem, hDstMem, srcOrigin, dstOrigin, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, - numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, + phEventWaitList, pSyncPoint, phEvent, phCommand); getContext()->notify_end( UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_RECT_EXP, @@ -6914,8 +7032,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { auto pfnAppendMemBufferWriteRectExp = getContext() @@ -6938,7 +7067,11 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( &pSrc, &numSyncPointsInWaitList, &pSyncPointWaitList, - &pSyncPoint}; + &numEventsInWaitList, + &phEventWaitList, + &pSyncPoint, + &phEvent, + &phCommand}; uint64_t instance = getContext()->notify_begin( UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_RECT_EXP, "urCommandBufferAppendMemBufferWriteRectExp", ¶ms); @@ -6949,7 +7082,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_result_t result = pfnAppendMemBufferWriteRectExp( hCommandBuffer, hBuffer, bufferOffset, hostOffset, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, - numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, + phEventWaitList, pSyncPoint, phEvent, phCommand); getContext()->notify_end( UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_RECT_EXP, @@ -6991,8 +7125,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { auto pfnAppendMemBufferReadRectExp = getContext()->urDdiTable.CommandBufferExp.pfnAppendMemBufferReadRectExp; @@ -7014,7 +7159,11 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( &pDst, &numSyncPointsInWaitList, &pSyncPointWaitList, - &pSyncPoint}; + &numEventsInWaitList, + &phEventWaitList, + &pSyncPoint, + &phEvent, + &phCommand}; uint64_t instance = getContext()->notify_begin( UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_RECT_EXP, "urCommandBufferAppendMemBufferReadRectExp", ¶ms); @@ -7024,7 +7173,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_result_t result = pfnAppendMemBufferReadRectExp( hCommandBuffer, hBuffer, bufferOffset, hostOffset, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, - numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, + phEventWaitList, pSyncPoint, phEvent, phCommand); getContext()->notify_end( UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_RECT_EXP, @@ -7056,8 +7206,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] sync point associated with this command. + pSyncPoint, ///< [out][optional] sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { auto pfnAppendMemBufferFillExp = getContext()->urDdiTable.CommandBufferExp.pfnAppendMemBufferFillExp; @@ -7075,7 +7236,11 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( &size, &numSyncPointsInWaitList, &pSyncPointWaitList, - &pSyncPoint}; + &numEventsInWaitList, + &phEventWaitList, + &pSyncPoint, + &phEvent, + &phCommand}; uint64_t instance = getContext()->notify_begin( UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP, "urCommandBufferAppendMemBufferFillExp", ¶ms); @@ -7084,7 +7249,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( ur_result_t result = pfnAppendMemBufferFillExp( hCommandBuffer, hBuffer, pPattern, patternSize, offset, size, - numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, + phEventWaitList, pSyncPoint, phEvent, phCommand); getContext()->notify_end( UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP, @@ -7112,8 +7278,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] sync point associated with this command. + pSyncPoint, ///< [out][optional] sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { auto pfnAppendUSMPrefetchExp = getContext()->urDdiTable.CommandBufferExp.pfnAppendUSMPrefetchExp; @@ -7129,7 +7306,11 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( &flags, &numSyncPointsInWaitList, &pSyncPointWaitList, - &pSyncPoint}; + &numEventsInWaitList, + &phEventWaitList, + &pSyncPoint, + &phEvent, + &phCommand}; uint64_t instance = getContext()->notify_begin( UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP, "urCommandBufferAppendUSMPrefetchExp", ¶ms); @@ -7138,7 +7319,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( ur_result_t result = pfnAppendUSMPrefetchExp( hCommandBuffer, pMemory, size, flags, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint); + pSyncPointWaitList, numEventsInWaitList, phEventWaitList, pSyncPoint, + phEvent, phCommand); getContext()->notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP, "urCommandBufferAppendUSMPrefetchExp", ¶ms, @@ -7165,8 +7347,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] sync point associated with this command. + pSyncPoint, ///< [out][optional] sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { auto pfnAppendUSMAdviseExp = getContext()->urDdiTable.CommandBufferExp.pfnAppendUSMAdviseExp; @@ -7182,16 +7375,21 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( &advice, &numSyncPointsInWaitList, &pSyncPointWaitList, - &pSyncPoint}; + &numEventsInWaitList, + &phEventWaitList, + &pSyncPoint, + &phEvent, + &phCommand}; uint64_t instance = getContext()->notify_begin( UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_ADVISE_EXP, "urCommandBufferAppendUSMAdviseExp", ¶ms); getContext()->logger.info("---> urCommandBufferAppendUSMAdviseExp"); - ur_result_t result = pfnAppendUSMAdviseExp(hCommandBuffer, pMemory, size, - advice, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint); + ur_result_t result = pfnAppendUSMAdviseExp( + hCommandBuffer, pMemory, size, advice, numSyncPointsInWaitList, + pSyncPointWaitList, numEventsInWaitList, phEventWaitList, pSyncPoint, + phEvent, phCommand); getContext()->notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_ADVISE_EXP, "urCommandBufferAppendUSMAdviseExp", ¶ms, @@ -7358,6 +7556,83 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferUpdateSignalEventExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateSignalEventExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer command to update. + ur_event_handle_t *phSignalEvent ///< [out] Event to be signaled. +) { + auto pfnUpdateSignalEventExp = + getContext()->urDdiTable.CommandBufferExp.pfnUpdateSignalEventExp; + + if (nullptr == pfnUpdateSignalEventExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_command_buffer_update_signal_event_exp_params_t params = { + &hCommand, &phSignalEvent}; + uint64_t instance = getContext()->notify_begin( + UR_FUNCTION_COMMAND_BUFFER_UPDATE_SIGNAL_EVENT_EXP, + "urCommandBufferUpdateSignalEventExp", ¶ms); + + getContext()->logger.info("---> urCommandBufferUpdateSignalEventExp"); + + ur_result_t result = pfnUpdateSignalEventExp(hCommand, phSignalEvent); + + getContext()->notify_end(UR_FUNCTION_COMMAND_BUFFER_UPDATE_SIGNAL_EVENT_EXP, + "urCommandBufferUpdateSignalEventExp", ¶ms, + &result, instance); + + std::ostringstream args_str; + ur::extras::printFunctionParams( + args_str, UR_FUNCTION_COMMAND_BUFFER_UPDATE_SIGNAL_EVENT_EXP, ¶ms); + getContext()->logger.info("({}) -> {};\n", args_str.str(), result); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferUpdateWaitEventsExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateWaitEventsExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer command to update. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. +) { + auto pfnUpdateWaitEventsExp = + getContext()->urDdiTable.CommandBufferExp.pfnUpdateWaitEventsExp; + + if (nullptr == pfnUpdateWaitEventsExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_command_buffer_update_wait_events_exp_params_t params = { + &hCommand, &numEventsInWaitList, &phEventWaitList}; + uint64_t instance = getContext()->notify_begin( + UR_FUNCTION_COMMAND_BUFFER_UPDATE_WAIT_EVENTS_EXP, + "urCommandBufferUpdateWaitEventsExp", ¶ms); + + getContext()->logger.info("---> urCommandBufferUpdateWaitEventsExp"); + + ur_result_t result = + pfnUpdateWaitEventsExp(hCommand, numEventsInWaitList, phEventWaitList); + + getContext()->notify_end(UR_FUNCTION_COMMAND_BUFFER_UPDATE_WAIT_EVENTS_EXP, + "urCommandBufferUpdateWaitEventsExp", ¶ms, + &result, instance); + + std::ostringstream args_str; + ur::extras::printFunctionParams( + args_str, UR_FUNCTION_COMMAND_BUFFER_UPDATE_WAIT_EVENTS_EXP, ¶ms); + getContext()->logger.info("({}) -> {};\n", args_str.str(), result); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferGetInfoExp __urdlllocal ur_result_t UR_APICALL urCommandBufferGetInfoExp( @@ -8312,6 +8587,14 @@ __urdlllocal ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnUpdateKernelLaunchExp = ur_tracing_layer::urCommandBufferUpdateKernelLaunchExp; + dditable.pfnUpdateSignalEventExp = pDdiTable->pfnUpdateSignalEventExp; + pDdiTable->pfnUpdateSignalEventExp = + ur_tracing_layer::urCommandBufferUpdateSignalEventExp; + + dditable.pfnUpdateWaitEventsExp = pDdiTable->pfnUpdateWaitEventsExp; + pDdiTable->pfnUpdateWaitEventsExp = + ur_tracing_layer::urCommandBufferUpdateWaitEventsExp; + dditable.pfnGetInfoExp = pDdiTable->pfnGetInfoExp; pDdiTable->pfnGetInfoExp = ur_tracing_layer::urCommandBufferGetInfoExp; diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index 1185a92dba..68d3864960 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -8060,8 +8060,17 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. ur_exp_command_buffer_command_handle_t *phCommand ///< [out][optional] Handle to this command. ) { @@ -8096,6 +8105,22 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( if (pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; } + + if (phEventWaitList == NULL && numEventsInWaitList > 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList == 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } if (getContext()->enableLifetimeValidation && @@ -8105,8 +8130,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_result_t result = pfnAppendKernelLaunchExp( hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, - phCommand); + pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, + numEventsInWaitList, phEventWaitList, pSyncPoint, phEvent, phCommand); return result; } @@ -8124,8 +8149,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { auto pfnAppendUSMMemcpyExp = getContext()->urDdiTable.CommandBufferExp.pfnAppendUSMMemcpyExp; @@ -8158,11 +8194,28 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( if (pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; } + + if (phEventWaitList == NULL && numEventsInWaitList > 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList == 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } - ur_result_t result = pfnAppendUSMMemcpyExp(hCommandBuffer, pDst, pSrc, size, - numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint); + ur_result_t result = pfnAppendUSMMemcpyExp( + hCommandBuffer, pDst, pSrc, size, numSyncPointsInWaitList, + pSyncPointWaitList, numEventsInWaitList, phEventWaitList, pSyncPoint, + phEvent, phCommand); return result; } @@ -8182,8 +8235,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] sync point associated with this command. + pSyncPoint, ///< [out][optional] sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { auto pfnAppendUSMFillExp = getContext()->urDdiTable.CommandBufferExp.pfnAppendUSMFillExp; @@ -8224,11 +8288,28 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( if (pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; } + + if (phEventWaitList == NULL && numEventsInWaitList > 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList == 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = pfnAppendUSMFillExp( hCommandBuffer, pMemory, pPattern, patternSize, size, - numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, + phEventWaitList, pSyncPoint, phEvent, phCommand); return result; } @@ -8248,8 +8329,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { auto pfnAppendMemBufferCopyExp = getContext()->urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyExp; @@ -8278,6 +8370,22 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( if (pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; } + + if (phEventWaitList == NULL && numEventsInWaitList > 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList == 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } if (getContext()->enableLifetimeValidation && @@ -8292,7 +8400,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_result_t result = pfnAppendMemBufferCopyExp( hCommandBuffer, hSrcMem, hDstMem, srcOffset, dstOffset, size, - numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, + phEventWaitList, pSyncPoint, phEvent, phCommand); return result; } @@ -8312,8 +8421,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { auto pfnAppendMemBufferWriteExp = getContext()->urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteExp; @@ -8342,6 +8462,22 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( if (pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; } + + if (phEventWaitList == NULL && numEventsInWaitList > 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList == 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } if (getContext()->enableLifetimeValidation && @@ -8351,7 +8487,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_result_t result = pfnAppendMemBufferWriteExp( hCommandBuffer, hBuffer, offset, size, pSrc, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint); + pSyncPointWaitList, numEventsInWaitList, phEventWaitList, pSyncPoint, + phEvent, phCommand); return result; } @@ -8370,8 +8507,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { auto pfnAppendMemBufferReadExp = getContext()->urDdiTable.CommandBufferExp.pfnAppendMemBufferReadExp; @@ -8400,6 +8548,22 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( if (pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; } + + if (phEventWaitList == NULL && numEventsInWaitList > 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList == 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } if (getContext()->enableLifetimeValidation && @@ -8409,7 +8573,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_result_t result = pfnAppendMemBufferReadExp( hCommandBuffer, hBuffer, offset, size, pDst, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint); + pSyncPointWaitList, numEventsInWaitList, phEventWaitList, pSyncPoint, + phEvent, phCommand); return result; } @@ -8436,8 +8601,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { auto pfnAppendMemBufferCopyRectExp = getContext()->urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyRectExp; @@ -8466,6 +8642,22 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( if (pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; } + + if (phEventWaitList == NULL && numEventsInWaitList > 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList == 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } if (getContext()->enableLifetimeValidation && @@ -8481,7 +8673,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_result_t result = pfnAppendMemBufferCopyRectExp( hCommandBuffer, hSrcMem, hDstMem, srcOrigin, dstOrigin, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, - numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, + phEventWaitList, pSyncPoint, phEvent, phCommand); return result; } @@ -8514,8 +8707,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { auto pfnAppendMemBufferWriteRectExp = getContext() @@ -8545,6 +8749,22 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( if (pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; } + + if (phEventWaitList == NULL && numEventsInWaitList > 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList == 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } if (getContext()->enableLifetimeValidation && @@ -8555,7 +8775,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_result_t result = pfnAppendMemBufferWriteRectExp( hCommandBuffer, hBuffer, bufferOffset, hostOffset, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, - numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, + phEventWaitList, pSyncPoint, phEvent, phCommand); return result; } @@ -8586,8 +8807,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { auto pfnAppendMemBufferReadRectExp = getContext()->urDdiTable.CommandBufferExp.pfnAppendMemBufferReadRectExp; @@ -8616,6 +8848,22 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( if (pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; } + + if (phEventWaitList == NULL && numEventsInWaitList > 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList == 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } if (getContext()->enableLifetimeValidation && @@ -8626,7 +8874,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_result_t result = pfnAppendMemBufferReadRectExp( hCommandBuffer, hBuffer, bufferOffset, hostOffset, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, - numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, + phEventWaitList, pSyncPoint, phEvent, phCommand); return result; } @@ -8647,8 +8896,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] sync point associated with this command. + pSyncPoint, ///< [out][optional] sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { auto pfnAppendMemBufferFillExp = getContext()->urDdiTable.CommandBufferExp.pfnAppendMemBufferFillExp; @@ -8677,6 +8937,22 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( if (pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0) { return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; } + + if (phEventWaitList == NULL && numEventsInWaitList > 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList == 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } if (getContext()->enableLifetimeValidation && @@ -8686,7 +8962,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( ur_result_t result = pfnAppendMemBufferFillExp( hCommandBuffer, hBuffer, pPattern, patternSize, offset, size, - numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, + phEventWaitList, pSyncPoint, phEvent, phCommand); return result; } @@ -8704,8 +8981,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] sync point associated with this command. + pSyncPoint, ///< [out][optional] sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { auto pfnAppendUSMPrefetchExp = getContext()->urDdiTable.CommandBufferExp.pfnAppendUSMPrefetchExp; @@ -8738,11 +9026,28 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( if (size == 0) { return UR_RESULT_ERROR_INVALID_SIZE; } + + if (phEventWaitList == NULL && numEventsInWaitList > 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList == 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } ur_result_t result = pfnAppendUSMPrefetchExp( hCommandBuffer, pMemory, size, flags, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint); + pSyncPointWaitList, numEventsInWaitList, phEventWaitList, pSyncPoint, + phEvent, phCommand); return result; } @@ -8760,8 +9065,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] sync point associated with this command. + pSyncPoint, ///< [out][optional] sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { auto pfnAppendUSMAdviseExp = getContext()->urDdiTable.CommandBufferExp.pfnAppendUSMAdviseExp; @@ -8794,11 +9110,28 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( if (size == 0) { return UR_RESULT_ERROR_INVALID_SIZE; } + + if (phEventWaitList == NULL && numEventsInWaitList > 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList == 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } } - ur_result_t result = pfnAppendUSMAdviseExp(hCommandBuffer, pMemory, size, - advice, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint); + ur_result_t result = pfnAppendUSMAdviseExp( + hCommandBuffer, pMemory, size, advice, numSyncPointsInWaitList, + pSyncPointWaitList, numEventsInWaitList, phEventWaitList, pSyncPoint, + phEvent, phCommand); return result; } @@ -8942,6 +9275,81 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferUpdateSignalEventExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateSignalEventExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer command to update. + ur_event_handle_t *phSignalEvent ///< [out] Event to be signaled. +) { + auto pfnUpdateSignalEventExp = + getContext()->urDdiTable.CommandBufferExp.pfnUpdateSignalEventExp; + + if (nullptr == pfnUpdateSignalEventExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (getContext()->enableParameterValidation) { + if (NULL == hCommand) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == phSignalEvent) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + } + + ur_result_t result = pfnUpdateSignalEventExp(hCommand, phSignalEvent); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferUpdateWaitEventsExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateWaitEventsExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer command to update. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. +) { + auto pfnUpdateWaitEventsExp = + getContext()->urDdiTable.CommandBufferExp.pfnUpdateWaitEventsExp; + + if (nullptr == pfnUpdateWaitEventsExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (getContext()->enableParameterValidation) { + if (NULL == hCommand) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (phEventWaitList == NULL && numEventsInWaitList > 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList == 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } + } + + ur_result_t result = + pfnUpdateWaitEventsExp(hCommand, numEventsInWaitList, phEventWaitList); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferGetInfoExp __urdlllocal ur_result_t UR_APICALL urCommandBufferGetInfoExp( @@ -9995,6 +10403,14 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnUpdateKernelLaunchExp = ur_validation_layer::urCommandBufferUpdateKernelLaunchExp; + dditable.pfnUpdateSignalEventExp = pDdiTable->pfnUpdateSignalEventExp; + pDdiTable->pfnUpdateSignalEventExp = + ur_validation_layer::urCommandBufferUpdateSignalEventExp; + + dditable.pfnUpdateWaitEventsExp = pDdiTable->pfnUpdateWaitEventsExp; + pDdiTable->pfnUpdateWaitEventsExp = + ur_validation_layer::urCommandBufferUpdateWaitEventsExp; + dditable.pfnGetInfoExp = pDdiTable->pfnGetInfoExp; pDdiTable->pfnGetInfoExp = ur_validation_layer::urCommandBufferGetInfoExp; diff --git a/source/loader/loader.def.in b/source/loader/loader.def.in index 5e628b4faf..3ea804c1a2 100644 --- a/source/loader/loader.def.in +++ b/source/loader/loader.def.in @@ -45,6 +45,8 @@ EXPORTS urCommandBufferRetainCommandExp urCommandBufferRetainExp urCommandBufferUpdateKernelLaunchExp + urCommandBufferUpdateSignalEventExp + urCommandBufferUpdateWaitEventsExp urContextCreate urContextCreateWithNativeHandle urContextGetInfo @@ -222,6 +224,8 @@ EXPORTS urPrintCommandBufferRetainCommandExpParams urPrintCommandBufferRetainExpParams urPrintCommandBufferUpdateKernelLaunchExpParams + urPrintCommandBufferUpdateSignalEventExpParams + urPrintCommandBufferUpdateWaitEventsExpParams urPrintContextCreateParams urPrintContextCreateWithNativeHandleParams urPrintContextFlags diff --git a/source/loader/loader.map.in b/source/loader/loader.map.in index 18e4018aee..776c7962a7 100644 --- a/source/loader/loader.map.in +++ b/source/loader/loader.map.in @@ -45,6 +45,8 @@ urCommandBufferRetainCommandExp; urCommandBufferRetainExp; urCommandBufferUpdateKernelLaunchExp; + urCommandBufferUpdateSignalEventExp; + urCommandBufferUpdateWaitEventsExp; urContextCreate; urContextCreateWithNativeHandle; urContextGetInfo; @@ -222,6 +224,8 @@ urPrintCommandBufferRetainCommandExpParams; urPrintCommandBufferRetainExpParams; urPrintCommandBufferUpdateKernelLaunchExpParams; + urPrintCommandBufferUpdateSignalEventExpParams; + urPrintCommandBufferUpdateWaitEventsExpParams; urPrintContextCreateParams; urPrintContextCreateWithNativeHandleParams; urPrintContextFlags; diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index 56e16b769d..767ca87d1f 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -7111,8 +7111,17 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. ur_exp_command_buffer_command_handle_t *phCommand ///< [out][optional] Handle to this command. ) { @@ -7138,16 +7147,36 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( // convert loader handle to platform handle hKernel = reinterpret_cast(hKernel)->handle; + // convert loader handles to platform handles + auto phEventWaitListLocal = + std::vector(numEventsInWaitList); + for (size_t i = 0; i < numEventsInWaitList; ++i) { + phEventWaitListLocal[i] = + reinterpret_cast(phEventWaitList[i])->handle; + } + // forward to device-platform result = pfnAppendKernelLaunchExp( hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, + pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, + numEventsInWaitList, phEventWaitListLocal.data(), pSyncPoint, phEvent, phCommand); if (UR_RESULT_SUCCESS != result) { return result; } + try { + // convert platform handle to loader handle + if (nullptr != phEvent) { + *phEvent = reinterpret_cast( + context->factories.ur_event_factory.getInstance(*phEvent, + dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + try { // convert platform handle to loader handle if (nullptr != phCommand) { @@ -7176,8 +7205,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -7198,10 +7238,46 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( reinterpret_cast(hCommandBuffer) ->handle; + // convert loader handles to platform handles + auto phEventWaitListLocal = + std::vector(numEventsInWaitList); + for (size_t i = 0; i < numEventsInWaitList; ++i) { + phEventWaitListLocal[i] = + reinterpret_cast(phEventWaitList[i])->handle; + } + // forward to device-platform - result = pfnAppendUSMMemcpyExp(hCommandBuffer, pDst, pSrc, size, - numSyncPointsInWaitList, pSyncPointWaitList, - pSyncPoint); + result = pfnAppendUSMMemcpyExp( + hCommandBuffer, pDst, pSrc, size, numSyncPointsInWaitList, + pSyncPointWaitList, numEventsInWaitList, phEventWaitListLocal.data(), + pSyncPoint, phEvent, phCommand); + + if (UR_RESULT_SUCCESS != result) { + return result; + } + + try { + // convert platform handle to loader handle + if (nullptr != phEvent) { + *phEvent = reinterpret_cast( + context->factories.ur_event_factory.getInstance(*phEvent, + dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + try { + // convert platform handle to loader handle + if (nullptr != phCommand) { + *phCommand = + reinterpret_cast( + context->factories.ur_exp_command_buffer_command_factory + .getInstance(*phCommand, dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } return result; } @@ -7221,8 +7297,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] sync point associated with this command. + pSyncPoint, ///< [out][optional] sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -7243,10 +7330,46 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( reinterpret_cast(hCommandBuffer) ->handle; + // convert loader handles to platform handles + auto phEventWaitListLocal = + std::vector(numEventsInWaitList); + for (size_t i = 0; i < numEventsInWaitList; ++i) { + phEventWaitListLocal[i] = + reinterpret_cast(phEventWaitList[i])->handle; + } + // forward to device-platform - result = pfnAppendUSMFillExp(hCommandBuffer, pMemory, pPattern, patternSize, - size, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint); + result = pfnAppendUSMFillExp( + hCommandBuffer, pMemory, pPattern, patternSize, size, + numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, + phEventWaitListLocal.data(), pSyncPoint, phEvent, phCommand); + + if (UR_RESULT_SUCCESS != result) { + return result; + } + + try { + // convert platform handle to loader handle + if (nullptr != phEvent) { + *phEvent = reinterpret_cast( + context->factories.ur_event_factory.getInstance(*phEvent, + dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + try { + // convert platform handle to loader handle + if (nullptr != phCommand) { + *phCommand = + reinterpret_cast( + context->factories.ur_exp_command_buffer_command_factory + .getInstance(*phCommand, dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } return result; } @@ -7266,8 +7389,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -7294,10 +7428,46 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( // convert loader handle to platform handle hDstMem = reinterpret_cast(hDstMem)->handle; + // convert loader handles to platform handles + auto phEventWaitListLocal = + std::vector(numEventsInWaitList); + for (size_t i = 0; i < numEventsInWaitList; ++i) { + phEventWaitListLocal[i] = + reinterpret_cast(phEventWaitList[i])->handle; + } + // forward to device-platform result = pfnAppendMemBufferCopyExp( hCommandBuffer, hSrcMem, hDstMem, srcOffset, dstOffset, size, - numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, + phEventWaitListLocal.data(), pSyncPoint, phEvent, phCommand); + + if (UR_RESULT_SUCCESS != result) { + return result; + } + + try { + // convert platform handle to loader handle + if (nullptr != phEvent) { + *phEvent = reinterpret_cast( + context->factories.ur_event_factory.getInstance(*phEvent, + dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + try { + // convert platform handle to loader handle + if (nullptr != phCommand) { + *phCommand = + reinterpret_cast( + context->factories.ur_exp_command_buffer_command_factory + .getInstance(*phCommand, dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } return result; } @@ -7317,8 +7487,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -7342,10 +7523,46 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( // convert loader handle to platform handle hBuffer = reinterpret_cast(hBuffer)->handle; + // convert loader handles to platform handles + auto phEventWaitListLocal = + std::vector(numEventsInWaitList); + for (size_t i = 0; i < numEventsInWaitList; ++i) { + phEventWaitListLocal[i] = + reinterpret_cast(phEventWaitList[i])->handle; + } + // forward to device-platform - result = pfnAppendMemBufferWriteExp(hCommandBuffer, hBuffer, offset, size, - pSrc, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint); + result = pfnAppendMemBufferWriteExp( + hCommandBuffer, hBuffer, offset, size, pSrc, numSyncPointsInWaitList, + pSyncPointWaitList, numEventsInWaitList, phEventWaitListLocal.data(), + pSyncPoint, phEvent, phCommand); + + if (UR_RESULT_SUCCESS != result) { + return result; + } + + try { + // convert platform handle to loader handle + if (nullptr != phEvent) { + *phEvent = reinterpret_cast( + context->factories.ur_event_factory.getInstance(*phEvent, + dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + try { + // convert platform handle to loader handle + if (nullptr != phCommand) { + *phCommand = + reinterpret_cast( + context->factories.ur_exp_command_buffer_command_factory + .getInstance(*phCommand, dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } return result; } @@ -7364,8 +7581,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -7389,10 +7617,46 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( // convert loader handle to platform handle hBuffer = reinterpret_cast(hBuffer)->handle; + // convert loader handles to platform handles + auto phEventWaitListLocal = + std::vector(numEventsInWaitList); + for (size_t i = 0; i < numEventsInWaitList; ++i) { + phEventWaitListLocal[i] = + reinterpret_cast(phEventWaitList[i])->handle; + } + // forward to device-platform - result = pfnAppendMemBufferReadExp(hCommandBuffer, hBuffer, offset, size, - pDst, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint); + result = pfnAppendMemBufferReadExp( + hCommandBuffer, hBuffer, offset, size, pDst, numSyncPointsInWaitList, + pSyncPointWaitList, numEventsInWaitList, phEventWaitListLocal.data(), + pSyncPoint, phEvent, phCommand); + + if (UR_RESULT_SUCCESS != result) { + return result; + } + + try { + // convert platform handle to loader handle + if (nullptr != phEvent) { + *phEvent = reinterpret_cast( + context->factories.ur_event_factory.getInstance(*phEvent, + dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + try { + // convert platform handle to loader handle + if (nullptr != phCommand) { + *phCommand = + reinterpret_cast( + context->factories.ur_exp_command_buffer_command_factory + .getInstance(*phCommand, dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } return result; } @@ -7419,8 +7683,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -7447,11 +7722,47 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( // convert loader handle to platform handle hDstMem = reinterpret_cast(hDstMem)->handle; + // convert loader handles to platform handles + auto phEventWaitListLocal = + std::vector(numEventsInWaitList); + for (size_t i = 0; i < numEventsInWaitList; ++i) { + phEventWaitListLocal[i] = + reinterpret_cast(phEventWaitList[i])->handle; + } + // forward to device-platform result = pfnAppendMemBufferCopyRectExp( hCommandBuffer, hSrcMem, hDstMem, srcOrigin, dstOrigin, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, - numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, + phEventWaitListLocal.data(), pSyncPoint, phEvent, phCommand); + + if (UR_RESULT_SUCCESS != result) { + return result; + } + + try { + // convert platform handle to loader handle + if (nullptr != phEvent) { + *phEvent = reinterpret_cast( + context->factories.ur_event_factory.getInstance(*phEvent, + dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + try { + // convert platform handle to loader handle + if (nullptr != phCommand) { + *phCommand = + reinterpret_cast( + context->factories.ur_exp_command_buffer_command_factory + .getInstance(*phCommand, dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } return result; } @@ -7484,8 +7795,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -7509,11 +7831,47 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( // convert loader handle to platform handle hBuffer = reinterpret_cast(hBuffer)->handle; - // forward to device-platform - result = pfnAppendMemBufferWriteRectExp( - hCommandBuffer, hBuffer, bufferOffset, hostOffset, region, - bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, - numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + // convert loader handles to platform handles + auto phEventWaitListLocal = + std::vector(numEventsInWaitList); + for (size_t i = 0; i < numEventsInWaitList; ++i) { + phEventWaitListLocal[i] = + reinterpret_cast(phEventWaitList[i])->handle; + } + + // forward to device-platform + result = pfnAppendMemBufferWriteRectExp( + hCommandBuffer, hBuffer, bufferOffset, hostOffset, region, + bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, + numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, + phEventWaitListLocal.data(), pSyncPoint, phEvent, phCommand); + + if (UR_RESULT_SUCCESS != result) { + return result; + } + + try { + // convert platform handle to loader handle + if (nullptr != phEvent) { + *phEvent = reinterpret_cast( + context->factories.ur_event_factory.getInstance(*phEvent, + dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + try { + // convert platform handle to loader handle + if (nullptr != phCommand) { + *phCommand = + reinterpret_cast( + context->factories.ur_exp_command_buffer_command_factory + .getInstance(*phCommand, dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } return result; } @@ -7544,8 +7902,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -7569,11 +7938,47 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( // convert loader handle to platform handle hBuffer = reinterpret_cast(hBuffer)->handle; + // convert loader handles to platform handles + auto phEventWaitListLocal = + std::vector(numEventsInWaitList); + for (size_t i = 0; i < numEventsInWaitList; ++i) { + phEventWaitListLocal[i] = + reinterpret_cast(phEventWaitList[i])->handle; + } + // forward to device-platform result = pfnAppendMemBufferReadRectExp( hCommandBuffer, hBuffer, bufferOffset, hostOffset, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, - numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, + phEventWaitListLocal.data(), pSyncPoint, phEvent, phCommand); + + if (UR_RESULT_SUCCESS != result) { + return result; + } + + try { + // convert platform handle to loader handle + if (nullptr != phEvent) { + *phEvent = reinterpret_cast( + context->factories.ur_event_factory.getInstance(*phEvent, + dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + try { + // convert platform handle to loader handle + if (nullptr != phCommand) { + *phCommand = + reinterpret_cast( + context->factories.ur_exp_command_buffer_command_factory + .getInstance(*phCommand, dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } return result; } @@ -7594,8 +7999,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] sync point associated with this command. + pSyncPoint, ///< [out][optional] sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -7619,10 +8035,46 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( // convert loader handle to platform handle hBuffer = reinterpret_cast(hBuffer)->handle; + // convert loader handles to platform handles + auto phEventWaitListLocal = + std::vector(numEventsInWaitList); + for (size_t i = 0; i < numEventsInWaitList; ++i) { + phEventWaitListLocal[i] = + reinterpret_cast(phEventWaitList[i])->handle; + } + // forward to device-platform result = pfnAppendMemBufferFillExp( hCommandBuffer, hBuffer, pPattern, patternSize, offset, size, - numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, + phEventWaitListLocal.data(), pSyncPoint, phEvent, phCommand); + + if (UR_RESULT_SUCCESS != result) { + return result; + } + + try { + // convert platform handle to loader handle + if (nullptr != phEvent) { + *phEvent = reinterpret_cast( + context->factories.ur_event_factory.getInstance(*phEvent, + dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + try { + // convert platform handle to loader handle + if (nullptr != phCommand) { + *phCommand = + reinterpret_cast( + context->factories.ur_exp_command_buffer_command_factory + .getInstance(*phCommand, dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } return result; } @@ -7640,8 +8092,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] sync point associated with this command. + pSyncPoint, ///< [out][optional] sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -7662,10 +8125,46 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( reinterpret_cast(hCommandBuffer) ->handle; + // convert loader handles to platform handles + auto phEventWaitListLocal = + std::vector(numEventsInWaitList); + for (size_t i = 0; i < numEventsInWaitList; ++i) { + phEventWaitListLocal[i] = + reinterpret_cast(phEventWaitList[i])->handle; + } + // forward to device-platform - result = pfnAppendUSMPrefetchExp(hCommandBuffer, pMemory, size, flags, - numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint); + result = pfnAppendUSMPrefetchExp( + hCommandBuffer, pMemory, size, flags, numSyncPointsInWaitList, + pSyncPointWaitList, numEventsInWaitList, phEventWaitListLocal.data(), + pSyncPoint, phEvent, phCommand); + + if (UR_RESULT_SUCCESS != result) { + return result; + } + + try { + // convert platform handle to loader handle + if (nullptr != phEvent) { + *phEvent = reinterpret_cast( + context->factories.ur_event_factory.getInstance(*phEvent, + dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + try { + // convert platform handle to loader handle + if (nullptr != phCommand) { + *phCommand = + reinterpret_cast( + context->factories.ur_exp_command_buffer_command_factory + .getInstance(*phCommand, dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } return result; } @@ -7683,8 +8182,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] sync point associated with this command. + pSyncPoint, ///< [out][optional] sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -7705,10 +8215,46 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( reinterpret_cast(hCommandBuffer) ->handle; + // convert loader handles to platform handles + auto phEventWaitListLocal = + std::vector(numEventsInWaitList); + for (size_t i = 0; i < numEventsInWaitList; ++i) { + phEventWaitListLocal[i] = + reinterpret_cast(phEventWaitList[i])->handle; + } + // forward to device-platform - result = pfnAppendUSMAdviseExp(hCommandBuffer, pMemory, size, advice, - numSyncPointsInWaitList, pSyncPointWaitList, - pSyncPoint); + result = pfnAppendUSMAdviseExp( + hCommandBuffer, pMemory, size, advice, numSyncPointsInWaitList, + pSyncPointWaitList, numEventsInWaitList, phEventWaitListLocal.data(), + pSyncPoint, phEvent, phCommand); + + if (UR_RESULT_SUCCESS != result) { + return result; + } + + try { + // convert platform handle to loader handle + if (nullptr != phEvent) { + *phEvent = reinterpret_cast( + context->factories.ur_event_factory.getInstance(*phEvent, + dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + try { + // convert platform handle to loader handle + if (nullptr != phCommand) { + *phCommand = + reinterpret_cast( + context->factories.ur_exp_command_buffer_command_factory + .getInstance(*phCommand, dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } return result; } @@ -7897,6 +8443,96 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferUpdateSignalEventExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateSignalEventExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer command to update. + ur_event_handle_t *phSignalEvent ///< [out] Event to be signaled. +) { + ur_result_t result = UR_RESULT_SUCCESS; + + [[maybe_unused]] auto context = getContext(); + + // extract platform's function pointer table + auto dditable = + reinterpret_cast(hCommand) + ->dditable; + auto pfnUpdateSignalEventExp = + dditable->ur.CommandBufferExp.pfnUpdateSignalEventExp; + if (nullptr == pfnUpdateSignalEventExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hCommand = + reinterpret_cast(hCommand) + ->handle; + + // forward to device-platform + result = pfnUpdateSignalEventExp(hCommand, phSignalEvent); + + if (UR_RESULT_SUCCESS != result) { + return result; + } + + try { + // convert platform handle to loader handle + *phSignalEvent = reinterpret_cast( + context->factories.ur_event_factory.getInstance(*phSignalEvent, + dditable)); + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferUpdateWaitEventsExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateWaitEventsExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer command to update. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. +) { + ur_result_t result = UR_RESULT_SUCCESS; + + [[maybe_unused]] auto context = getContext(); + + // extract platform's function pointer table + auto dditable = + reinterpret_cast(hCommand) + ->dditable; + auto pfnUpdateWaitEventsExp = + dditable->ur.CommandBufferExp.pfnUpdateWaitEventsExp; + if (nullptr == pfnUpdateWaitEventsExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hCommand = + reinterpret_cast(hCommand) + ->handle; + + // convert loader handles to platform handles + auto phEventWaitListLocal = + std::vector(numEventsInWaitList); + for (size_t i = 0; i < numEventsInWaitList; ++i) { + phEventWaitListLocal[i] = + reinterpret_cast(phEventWaitList[i])->handle; + } + + // forward to device-platform + result = pfnUpdateWaitEventsExp(hCommand, numEventsInWaitList, + phEventWaitListLocal.data()); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferGetInfoExp __urdlllocal ur_result_t UR_APICALL urCommandBufferGetInfoExp( @@ -8835,6 +9471,10 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( ur_loader::urCommandBufferReleaseCommandExp; pDdiTable->pfnUpdateKernelLaunchExp = ur_loader::urCommandBufferUpdateKernelLaunchExp; + pDdiTable->pfnUpdateSignalEventExp = + ur_loader::urCommandBufferUpdateSignalEventExp; + pDdiTable->pfnUpdateWaitEventsExp = + ur_loader::urCommandBufferUpdateWaitEventsExp; pDdiTable->pfnGetInfoExp = ur_loader::urCommandBufferGetInfoExp; pDdiTable->pfnCommandGetInfoExp = ur_loader::urCommandBufferCommandGetInfoExp; diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 05b0c71995..dddcb18c25 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -7529,6 +7529,13 @@ ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` /// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( @@ -7547,8 +7554,17 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. ur_exp_command_buffer_command_handle_t *phCommand ///< [out][optional] Handle to this command. ) try { @@ -7559,10 +7575,10 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( return UR_RESULT_ERROR_UNINITIALIZED; } - return pfnAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, - pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint, phCommand); + return pfnAppendKernelLaunchExp( + hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, + numEventsInWaitList, phEventWaitList, pSyncPoint, phEvent, phCommand); } catch (...) { return exceptionToResult(std::current_exception()); } @@ -7589,6 +7605,13 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` /// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( @@ -7602,8 +7625,19 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) try { auto pfnAppendUSMMemcpyExp = ur_lib::getContext()->urDdiTable.CommandBufferExp.pfnAppendUSMMemcpyExp; @@ -7613,7 +7647,8 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( return pfnAppendUSMMemcpyExp(hCommandBuffer, pDst, pSrc, size, numSyncPointsInWaitList, pSyncPointWaitList, - pSyncPoint); + numEventsInWaitList, phEventWaitList, + pSyncPoint, phEvent, phCommand); } catch (...) { return exceptionToResult(std::current_exception()); } @@ -7642,6 +7677,13 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` /// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( @@ -7657,8 +7699,19 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] sync point associated with this command. + pSyncPoint, ///< [out][optional] sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) try { auto pfnAppendUSMFillExp = ur_lib::getContext()->urDdiTable.CommandBufferExp.pfnAppendUSMFillExp; @@ -7668,7 +7721,8 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( return pfnAppendUSMFillExp(hCommandBuffer, pMemory, pPattern, patternSize, size, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint); + pSyncPointWaitList, numEventsInWaitList, + phEventWaitList, pSyncPoint, phEvent, phCommand); } catch (...) { return exceptionToResult(std::current_exception()); } @@ -7691,6 +7745,13 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` /// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( @@ -7706,8 +7767,19 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) try { auto pfnAppendMemBufferCopyExp = ur_lib::getContext() @@ -7718,7 +7790,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( return pfnAppendMemBufferCopyExp( hCommandBuffer, hSrcMem, hDstMem, srcOffset, dstOffset, size, - numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, + phEventWaitList, pSyncPoint, phEvent, phCommand); } catch (...) { return exceptionToResult(std::current_exception()); } @@ -7742,6 +7815,13 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` /// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( @@ -7757,8 +7837,19 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) try { auto pfnAppendMemBufferWriteExp = ur_lib::getContext() @@ -7767,9 +7858,10 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( return UR_RESULT_ERROR_UNINITIALIZED; } - return pfnAppendMemBufferWriteExp(hCommandBuffer, hBuffer, offset, size, - pSrc, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint); + return pfnAppendMemBufferWriteExp( + hCommandBuffer, hBuffer, offset, size, pSrc, numSyncPointsInWaitList, + pSyncPointWaitList, numEventsInWaitList, phEventWaitList, pSyncPoint, + phEvent, phCommand); } catch (...) { return exceptionToResult(std::current_exception()); } @@ -7793,6 +7885,13 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` /// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( @@ -7807,8 +7906,19 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) try { auto pfnAppendMemBufferReadExp = ur_lib::getContext() @@ -7817,9 +7927,10 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( return UR_RESULT_ERROR_UNINITIALIZED; } - return pfnAppendMemBufferReadExp(hCommandBuffer, hBuffer, offset, size, - pDst, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint); + return pfnAppendMemBufferReadExp( + hCommandBuffer, hBuffer, offset, size, pDst, numSyncPointsInWaitList, + pSyncPointWaitList, numEventsInWaitList, phEventWaitList, pSyncPoint, + phEvent, phCommand); } catch (...) { return exceptionToResult(std::current_exception()); } @@ -7842,6 +7953,13 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` /// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( @@ -7864,8 +7982,19 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) try { auto pfnAppendMemBufferCopyRectExp = ur_lib::getContext() @@ -7877,7 +8006,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( return pfnAppendMemBufferCopyRectExp( hCommandBuffer, hSrcMem, hDstMem, srcOrigin, dstOrigin, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, - numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, + phEventWaitList, pSyncPoint, phEvent, phCommand); } catch (...) { return exceptionToResult(std::current_exception()); } @@ -7901,6 +8031,13 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` /// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( @@ -7929,8 +8066,19 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) try { auto pfnAppendMemBufferWriteRectExp = ur_lib::getContext() @@ -7942,7 +8090,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( return pfnAppendMemBufferWriteRectExp( hCommandBuffer, hBuffer, bufferOffset, hostOffset, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, - numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, + phEventWaitList, pSyncPoint, phEvent, phCommand); } catch (...) { return exceptionToResult(std::current_exception()); } @@ -7966,6 +8115,13 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` /// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( @@ -7992,8 +8148,19 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) try { auto pfnAppendMemBufferReadRectExp = ur_lib::getContext() @@ -8005,7 +8172,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( return pfnAppendMemBufferReadRectExp( hCommandBuffer, hBuffer, bufferOffset, hostOffset, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, - numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, + phEventWaitList, pSyncPoint, phEvent, phCommand); } catch (...) { return exceptionToResult(std::current_exception()); } @@ -8031,6 +8199,13 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + If `offset + size` results in an out-of-bounds access. +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( @@ -8047,8 +8222,19 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] sync point associated with this command. + pSyncPoint, ///< [out][optional] sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) try { auto pfnAppendMemBufferFillExp = ur_lib::getContext() @@ -8059,7 +8245,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( return pfnAppendMemBufferFillExp( hCommandBuffer, hBuffer, pPattern, patternSize, offset, size, - numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, + phEventWaitList, pSyncPoint, phEvent, phCommand); } catch (...) { return exceptionToResult(std::current_exception()); } @@ -8092,6 +8279,13 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `size == 0` /// + If `size` is higher than the allocation size of `pMemory` +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( @@ -8105,8 +8299,19 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] sync point associated with this command. + pSyncPoint, ///< [out][optional] sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) try { auto pfnAppendUSMPrefetchExp = ur_lib::getContext() @@ -8117,7 +8322,8 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( return pfnAppendUSMPrefetchExp(hCommandBuffer, pMemory, size, flags, numSyncPointsInWaitList, pSyncPointWaitList, - pSyncPoint); + numEventsInWaitList, phEventWaitList, + pSyncPoint, phEvent, phCommand); } catch (...) { return exceptionToResult(std::current_exception()); } @@ -8150,6 +8356,13 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `size == 0` /// + If `size` is higher than the allocation size of `pMemory` +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( @@ -8163,8 +8376,19 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] sync point associated with this command. + pSyncPoint, ///< [out][optional] sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) try { auto pfnAppendUSMAdviseExp = ur_lib::getContext()->urDdiTable.CommandBufferExp.pfnAppendUSMAdviseExp; @@ -8174,7 +8398,8 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( return pfnAppendUSMAdviseExp(hCommandBuffer, pMemory, size, advice, numSyncPointsInWaitList, pSyncPointWaitList, - pSyncPoint); + numEventsInWaitList, phEventWaitList, + pSyncPoint, phEvent, phCommand); } catch (...) { return exceptionToResult(std::current_exception()); } @@ -8283,9 +8508,7 @@ ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Update a kernel launch command in a finalized command-buffer. This -/// entry-point is synchronous and may block if the command-buffer is -/// executing when the entry-point is called. +/// @brief Update a kernel launch command in a finalized command-buffer. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8305,7 +8528,7 @@ ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( /// + If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL. /// + If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value when `hCommand` was created with a NULL local work size. /// + If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value when `hCommand` was created with a non-NULL local work size. -/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP - "If `hCommand` is not a kernel execution command." /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE @@ -8333,6 +8556,92 @@ ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Get a new event that will be signaled the next time the command in the +/// command-buffer executes. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == phSignalEvent` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If COMMAND_BUFFER_EVENT_SUPPORT_EXP is not supported by the device associated with `hCommand`. +/// - ::UR_RESULT_ERROR_INVALID_OPERATION +/// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. +/// + If the command-buffer `hCommand` belongs to has not been finalized. +/// + If no `phEvent` parameter as set on creation of the command associated with `hCommand`. +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urCommandBufferUpdateSignalEventExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer command to update. + ur_event_handle_t *phSignalEvent ///< [out] Event to be signaled. + ) try { + auto pfnUpdateSignalEventExp = + ur_lib::getContext() + ->urDdiTable.CommandBufferExp.pfnUpdateSignalEventExp; + if (nullptr == pfnUpdateSignalEventExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnUpdateSignalEventExp(hCommand, phSignalEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Set the list of wait events for a command to depend on to a list of +/// new events. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If COMMAND_BUFFER_EVENT_SUPPORT_EXP is not supported by the device associated with `hCommand`. +/// - ::UR_RESULT_ERROR_INVALID_OPERATION +/// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. +/// + If the command-buffer `hCommand` belongs to has not been finalized. +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// + If `numEventsInWaitList` does not match the number of wait events set when the command associated with `hCommand` was created. +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urCommandBufferUpdateWaitEventsExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer command to update. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. + ) try { + auto pfnUpdateWaitEventsExp = + ur_lib::getContext() + ->urDdiTable.CommandBufferExp.pfnUpdateWaitEventsExp; + if (nullptr == pfnUpdateWaitEventsExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnUpdateWaitEventsExp(hCommand, numEventsInWaitList, + phEventWaitList); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Get command-buffer object information. /// diff --git a/source/loader/ur_print.cpp b/source/loader/ur_print.cpp index f9d510e95d..4e092cb38d 100644 --- a/source/loader/ur_print.cpp +++ b/source/loader/ur_print.cpp @@ -1436,6 +1436,22 @@ ur_result_t urPrintCommandBufferUpdateKernelLaunchExpParams( return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t urPrintCommandBufferUpdateSignalEventExpParams( + const struct ur_command_buffer_update_signal_event_exp_params_t *params, + char *buffer, const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintCommandBufferUpdateWaitEventsExpParams( + const struct ur_command_buffer_update_wait_events_exp_params_t *params, + char *buffer, const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintCommandBufferGetInfoExpParams( const struct ur_command_buffer_get_info_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size) { diff --git a/source/ur_api.cpp b/source/ur_api.cpp index a9f47a0c92..21e4d4a60d 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -6385,6 +6385,13 @@ ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` /// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( @@ -6403,8 +6410,17 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. ur_exp_command_buffer_command_handle_t *phCommand ///< [out][optional] Handle to this command. ) { @@ -6434,6 +6450,13 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` /// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( @@ -6447,8 +6470,19 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; @@ -6478,6 +6512,13 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` /// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( @@ -6493,8 +6534,19 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] sync point associated with this command. + pSyncPoint, ///< [out][optional] sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; @@ -6518,6 +6570,13 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` /// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( @@ -6533,8 +6592,19 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; @@ -6559,6 +6629,13 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` /// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( @@ -6574,8 +6651,19 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; @@ -6600,6 +6688,13 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` /// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( @@ -6614,8 +6709,19 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; @@ -6639,6 +6745,13 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` /// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( @@ -6661,8 +6774,19 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; @@ -6687,6 +6811,13 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` /// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( @@ -6715,8 +6846,19 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; @@ -6741,6 +6883,13 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` /// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( @@ -6767,8 +6916,19 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] Sync point associated with this command. + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; @@ -6795,6 +6955,13 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + If `offset + size` results in an out-of-bounds access. +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( @@ -6811,8 +6978,19 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] sync point associated with this command. + pSyncPoint, ///< [out][optional] sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; @@ -6846,6 +7024,13 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `size == 0` /// + If `size` is higher than the allocation size of `pMemory` +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( @@ -6859,8 +7044,19 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] sync point associated with this command. + pSyncPoint, ///< [out][optional] sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; @@ -6894,6 +7090,13 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `size == 0` /// + If `size` is higher than the allocation size of `pMemory` +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the device associated with `hCommandBuffer` does not support UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP and either `phEvent` or `phEventWaitList` are not NULL. /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( @@ -6907,8 +7110,19 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. ur_exp_command_buffer_sync_point_t * - pSyncPoint ///< [out][optional] sync point associated with this command. + pSyncPoint, ///< [out][optional] sync point associated with this command. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that will be signaled by the + ///< completion of this command in the next execution of the + ///< command-buffer. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; @@ -6996,9 +7210,7 @@ ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Update a kernel launch command in a finalized command-buffer. This -/// entry-point is synchronous and may block if the command-buffer is -/// executing when the entry-point is called. +/// @brief Update a kernel launch command in a finalized command-buffer. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7018,7 +7230,7 @@ ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( /// + If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL. /// + If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value when `hCommand` was created with a NULL local work size. /// + If `pUpdateKernellaunch->newWorkDim` is non-zero and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value when `hCommand` was created with a non-NULL local work size. -/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP - "If `hCommand` is not a kernel execution command." /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE @@ -7038,6 +7250,75 @@ ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Get a new event that will be signaled the next time the command in the +/// command-buffer executes. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == phSignalEvent` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If COMMAND_BUFFER_EVENT_SUPPORT_EXP is not supported by the device associated with `hCommand`. +/// - ::UR_RESULT_ERROR_INVALID_OPERATION +/// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. +/// + If the command-buffer `hCommand` belongs to has not been finalized. +/// + If no `phEvent` parameter as set on creation of the command associated with `hCommand`. +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urCommandBufferUpdateSignalEventExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer command to update. + ur_event_handle_t *phSignalEvent ///< [out] Event to be signaled. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Set the list of wait events for a command to depend on to a list of +/// new events. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If COMMAND_BUFFER_EVENT_SUPPORT_EXP is not supported by the device associated with `hCommand`. +/// - ::UR_RESULT_ERROR_INVALID_OPERATION +/// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. +/// + If the command-buffer `hCommand` belongs to has not been finalized. +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// + If `numEventsInWaitList` does not match the number of wait events set when the command associated with `hCommand` was created. +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urCommandBufferUpdateWaitEventsExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer command to update. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. + const ur_event_handle_t * + phEventWaitList ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the command execution. If nullptr, + ///< the numEventsInWaitList must be 0, indicating no wait events. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Get command-buffer object information. /// diff --git a/test/conformance/exp_command_buffer/CMakeLists.txt b/test/conformance/exp_command_buffer/CMakeLists.txt index a28d692d9b..534e1cd88f 100644 --- a/test/conformance/exp_command_buffer/CMakeLists.txt +++ b/test/conformance/exp_command_buffer/CMakeLists.txt @@ -14,4 +14,6 @@ add_conformance_test_with_kernels_environment(exp_command_buffer invalid_update.cpp commands.cpp fill.cpp + event_sync_kernel_command.cpp + event_sync.cpp ) diff --git a/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp index 78e1ffd009..70315c8955 100644 --- a/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp @@ -49,7 +49,8 @@ struct BufferFillCommandTest // Append kernel command to command-buffer and close command-buffer ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &global_size, &local_size, 0, nullptr, nullptr, &command_handle)); + &global_size, &local_size, 0, nullptr, 0, nullptr, nullptr, nullptr, + &command_handle)); ASSERT_NE(command_handle, nullptr); ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); diff --git a/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp index 55e6773cb7..5cfa7934c2 100644 --- a/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp @@ -83,7 +83,8 @@ struct BufferSaxpyKernelTest // Append kernel command to command-buffer and close command-buffer ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &global_size, &local_size, 0, nullptr, nullptr, &command_handle)); + &global_size, &local_size, 0, nullptr, 0, nullptr, nullptr, nullptr, + &command_handle)); ASSERT_NE(command_handle, nullptr); ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); diff --git a/test/conformance/exp_command_buffer/commands.cpp b/test/conformance/exp_command_buffer/commands.cpp index 412e4ab6de..b1027ef136 100644 --- a/test/conformance/exp_command_buffer/commands.cpp +++ b/test/conformance/exp_command_buffer/commands.cpp @@ -57,20 +57,20 @@ UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urCommandBufferCommandsTest); TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendUSMMemcpyExp) { ASSERT_SUCCESS(urCommandBufferAppendUSMMemcpyExp( cmd_buf_handle, device_ptrs[0], device_ptrs[1], allocation_size, 0, - nullptr, nullptr)); + nullptr, 0, nullptr, nullptr, nullptr, nullptr)); } TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendUSMFillExp) { uint32_t pattern = 42; ASSERT_SUCCESS(urCommandBufferAppendUSMFillExp( cmd_buf_handle, device_ptrs[0], &pattern, sizeof(pattern), - allocation_size, 0, nullptr, nullptr)); + allocation_size, 0, nullptr, 0, nullptr, nullptr, nullptr, nullptr)); } TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendMemBufferCopyExp) { ASSERT_SUCCESS(urCommandBufferAppendMemBufferCopyExp( cmd_buf_handle, buffers[0], buffers[1], 0, 0, allocation_size, 0, - nullptr, nullptr)); + nullptr, 0, nullptr, nullptr, nullptr, nullptr)); } TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendMemBufferCopyRectExp) { @@ -78,14 +78,14 @@ TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendMemBufferCopyRectExp) { ur_rect_region_t region{4, 4, 1}; ASSERT_SUCCESS(urCommandBufferAppendMemBufferCopyRectExp( cmd_buf_handle, buffers[0], buffers[1], origin, origin, region, 4, 16, - 4, 16, 0, nullptr, nullptr)); + 4, 16, 0, nullptr, 0, nullptr, nullptr, nullptr, nullptr)); } TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendMemBufferReadExp) { std::array host_data{}; ASSERT_SUCCESS(urCommandBufferAppendMemBufferReadExp( cmd_buf_handle, buffers[0], 0, allocation_size, host_data.data(), 0, - nullptr, nullptr)); + nullptr, 0, nullptr, nullptr, nullptr, nullptr)); } TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendMemBufferReadRectExp) { @@ -94,14 +94,14 @@ TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendMemBufferReadRectExp) { ur_rect_region_t region{4, 4, 1}; ASSERT_SUCCESS(urCommandBufferAppendMemBufferReadRectExp( cmd_buf_handle, buffers[0], origin, origin, region, 4, 16, 4, 16, - host_data.data(), 0, nullptr, nullptr)); + host_data.data(), 0, nullptr, 0, nullptr, nullptr, nullptr, nullptr)); } TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendMemBufferWriteExp) { std::array host_data{}; ASSERT_SUCCESS(urCommandBufferAppendMemBufferWriteExp( cmd_buf_handle, buffers[0], 0, allocation_size, host_data.data(), 0, - nullptr, nullptr)); + nullptr, 0, nullptr, nullptr, nullptr, nullptr)); } TEST_P(urCommandBufferCommandsTest, @@ -111,26 +111,26 @@ TEST_P(urCommandBufferCommandsTest, ur_rect_region_t region{4, 4, 1}; ASSERT_SUCCESS(urCommandBufferAppendMemBufferWriteRectExp( cmd_buf_handle, buffers[0], origin, origin, region, 4, 16, 4, 16, - host_data.data(), 0, nullptr, nullptr)); + host_data.data(), 0, nullptr, 0, nullptr, nullptr, nullptr, nullptr)); } TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendMemBufferFillExp) { uint32_t pattern = 42; ASSERT_SUCCESS(urCommandBufferAppendMemBufferFillExp( cmd_buf_handle, buffers[0], &pattern, sizeof(pattern), 0, - allocation_size, 0, nullptr, nullptr)); + allocation_size, 0, nullptr, 0, nullptr, nullptr, nullptr, nullptr)); } TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendUSMPrefetchExp) { ASSERT_SUCCESS(urCommandBufferAppendUSMPrefetchExp( - cmd_buf_handle, device_ptrs[0], allocation_size, 0, 0, nullptr, - nullptr)); + cmd_buf_handle, device_ptrs[0], allocation_size, 0, 0, nullptr, 0, + nullptr, nullptr, nullptr, nullptr)); } TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendUSMAdviseExp) { ASSERT_SUCCESS(urCommandBufferAppendUSMAdviseExp( - cmd_buf_handle, device_ptrs[0], allocation_size, 0, 0, nullptr, - nullptr)); + cmd_buf_handle, device_ptrs[0], allocation_size, 0, 0, nullptr, 0, + nullptr, nullptr, nullptr, nullptr)); } struct urCommandBufferAppendKernelLaunchExpTest @@ -188,7 +188,7 @@ UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urCommandBufferAppendKernelLaunchExpTest); TEST_P(urCommandBufferAppendKernelLaunchExpTest, Basic) { ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( cmd_buf_handle, kernel, n_dimensions, &global_offset, &global_size, - &local_size, 0, nullptr, nullptr, nullptr)); + &local_size, 0, nullptr, 0, nullptr, nullptr, nullptr, nullptr)); ASSERT_SUCCESS(urCommandBufferFinalizeExp(cmd_buf_handle)); diff --git a/test/conformance/exp_command_buffer/event_sync.cpp b/test/conformance/exp_command_buffer/event_sync.cpp new file mode 100644 index 0000000000..7e8e75c4cd --- /dev/null +++ b/test/conformance/exp_command_buffer/event_sync.cpp @@ -0,0 +1,1438 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "fixtures.h" + +// Tests non-kernel commands using ur events for synchronization work as expected +struct CommandEventSyncTest : uur::command_buffer::urCommandBufferExpTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urCommandBufferExpTest::SetUp()); + + ur_bool_t event_support = false; + ASSERT_SUCCESS(urDeviceGetInfo( + device, UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP, + sizeof(ur_bool_t), &event_support, nullptr)); + if (!event_support) { + GTEST_SKIP() << "External event sync is not supported by device."; + } + + ur_queue_flags_t flags = UR_QUEUE_FLAG_SUBMISSION_BATCHED; + ur_queue_properties_t props = { + /*.stype =*/UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, + /*.pNext =*/nullptr, + /*.flags =*/flags, + }; + ASSERT_SUCCESS(urQueueCreate(context, device, &props, &queue)); + ASSERT_NE(queue, nullptr); + + for (auto &device_ptr : device_ptrs) { + ASSERT_SUCCESS(urUSMDeviceAlloc(context, device, nullptr, nullptr, + allocation_size, &device_ptr)); + ASSERT_NE(device_ptr, nullptr); + } + + for (auto &buffer : buffers) { + ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, + allocation_size, nullptr, + &buffer)); + ASSERT_NE(buffer, nullptr); + } + + // Create a command-buffer with update enabled. + ur_exp_command_buffer_desc_t desc{ + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, nullptr, true}; + + ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, &desc, + &second_cmd_buf_handle)); + ASSERT_NE(second_cmd_buf_handle, nullptr); + } + + virtual void TearDown() override { + for (auto &device_ptr : device_ptrs) { + if (device_ptr) { + EXPECT_SUCCESS(urUSMFree(context, device_ptr)); + } + } + + for (auto &event : external_events) { + if (event) { + EXPECT_SUCCESS(urEventRelease(event)); + } + } + + for (auto &buffer : buffers) { + if (buffer) { + EXPECT_SUCCESS(urMemRelease(buffer)); + } + } + + if (queue) { + EXPECT_SUCCESS(urQueueRelease(queue)); + } + + if (second_cmd_buf_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseExp(second_cmd_buf_handle)); + } + + UUR_RETURN_ON_FATAL_FAILURE(urCommandBufferExpTest::TearDown()); + } + + std::array device_ptrs = {nullptr, nullptr, nullptr}; + std::array buffers = {nullptr, nullptr}; + std::array external_events = { + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr}; + std::array sync_points = {0, 0}; + ur_queue_handle_t queue = nullptr; + ur_exp_command_buffer_handle_t second_cmd_buf_handle = nullptr; + static constexpr size_t elements = 64; + static constexpr size_t allocation_size = sizeof(uint32_t) * elements; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(CommandEventSyncTest); + +TEST_P(CommandEventSyncTest, USMMemcpyExp) { + // Get wait event from queue fill on ptr 0 + uint32_t patternX = 42; + ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptrs[0], sizeof(patternX), + &patternX, allocation_size, 0, nullptr, + &external_events[0])); + + // Command to fill ptr 1 + uint32_t patternY = 0xA; + ASSERT_SUCCESS(urCommandBufferAppendUSMFillExp( + cmd_buf_handle, device_ptrs[1], &patternY, sizeof(patternY), + allocation_size, 0, nullptr, 0, nullptr, &sync_points[0], nullptr, + nullptr)); + + // Test command overwriting ptr 1 with ptr 0 command based on queue event + ASSERT_SUCCESS(urCommandBufferAppendUSMMemcpyExp( + cmd_buf_handle, device_ptrs[1], device_ptrs[0], allocation_size, 1, + &sync_points[0], 1, &external_events[0], nullptr, &external_events[1], + nullptr)); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(cmd_buf_handle)); + ASSERT_SUCCESS( + urCommandBufferEnqueueExp(cmd_buf_handle, queue, 0, nullptr, nullptr)); + + // Queue read ptr 1 based on event returned from command-buffer command + std::array host_enqueue_ptr{}; + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, false, host_enqueue_ptr.data(), + device_ptrs[1], allocation_size, 1, + &external_events[1], nullptr)); + + // Verify + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_enqueue_ptr[i], patternX); + } +} + +TEST_P(CommandEventSyncTest, USMFillExp) { + // Get wait event from queue fill on ptr 0 + uint32_t patternX = 42; + ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptrs[0], sizeof(patternX), + &patternX, allocation_size, 0, nullptr, + &external_events[0])); + + // Test fill command overwriting ptr 0 waiting on queue event + uint32_t patternY = 0xA; + ASSERT_SUCCESS(urCommandBufferAppendUSMFillExp( + cmd_buf_handle, device_ptrs[0], &patternY, sizeof(patternY), + allocation_size, 0, nullptr, 1, &external_events[0], nullptr, + &external_events[1], nullptr)); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(cmd_buf_handle)); + ASSERT_SUCCESS( + urCommandBufferEnqueueExp(cmd_buf_handle, queue, 0, nullptr, nullptr)); + + // Queue read ptr 0 based on event returned from command-buffer command + std::array host_enqueue_ptr{}; + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, false, host_enqueue_ptr.data(), + device_ptrs[0], allocation_size, 1, + &external_events[1], nullptr)); + + // Verify + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_enqueue_ptr[i], patternY); + } +} + +TEST_P(CommandEventSyncTest, MemBufferCopyExp) { + // Get wait event from queue fill on buffer 0 + uint32_t patternX = 42; + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, buffers[0], &patternX, + sizeof(patternX), 0, allocation_size, + 0, nullptr, &external_events[0])); + + // Command to fill buffer 1 + uint32_t patternY = 0xA; + ASSERT_SUCCESS(urCommandBufferAppendMemBufferFillExp( + cmd_buf_handle, buffers[1], &patternY, sizeof(patternY), 0, + allocation_size, 0, nullptr, 0, nullptr, &sync_points[0], nullptr, + nullptr)); + + // Test command overwriting buffer 1 with buffer 0 command based on queue event + ASSERT_SUCCESS(urCommandBufferAppendMemBufferCopyExp( + cmd_buf_handle, buffers[0], buffers[1], 0, 0, allocation_size, 1, + &sync_points[0], 1, &external_events[0], nullptr, &external_events[1], + nullptr)); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(cmd_buf_handle)); + ASSERT_SUCCESS( + urCommandBufferEnqueueExp(cmd_buf_handle, queue, 0, nullptr, nullptr)); + + // Queue read buffer 1 based on event returned from command-buffer command + std::array host_enqueue_ptr{}; + ASSERT_SUCCESS(urEnqueueMemBufferRead( + queue, buffers[1], false, 0, allocation_size, host_enqueue_ptr.data(), + 1, &external_events[1], nullptr)); + + // Verify + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_enqueue_ptr[i], patternX); + } +} + +TEST_P(CommandEventSyncTest, MemBufferCopyRectExp) { + // Get wait event from queue fill on buffer 0 + uint32_t patternX = 42; + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, buffers[0], &patternX, + sizeof(patternX), 0, allocation_size, + 0, nullptr, &external_events[0])); + + // Command to fill buffer 1 + uint32_t patternY = 0xA; + ASSERT_SUCCESS(urCommandBufferAppendMemBufferFillExp( + cmd_buf_handle, buffers[1], &patternY, sizeof(patternY), 0, + allocation_size, 0, nullptr, 0, nullptr, &sync_points[0], nullptr, + nullptr)); + + // Test command overwriting buffer 1 with buffer 0 command based on queue event + ur_rect_offset_t src_origin{0, 0, 0}; + ur_rect_offset_t dst_origin{0, 0, 0}; + constexpr size_t rect_buffer_row_size = 16; + ur_rect_region_t region{rect_buffer_row_size, rect_buffer_row_size, 1}; + size_t src_row_pitch = rect_buffer_row_size; + size_t src_slice_pitch = allocation_size; + size_t dst_row_pitch = rect_buffer_row_size; + size_t dst_slice_pitch = allocation_size; + ASSERT_SUCCESS(urCommandBufferAppendMemBufferCopyRectExp( + cmd_buf_handle, buffers[0], buffers[1], src_origin, dst_origin, region, + src_row_pitch, src_slice_pitch, dst_row_pitch, dst_slice_pitch, 1, + &sync_points[0], 1, &external_events[0], nullptr, &external_events[1], + nullptr)); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(cmd_buf_handle)); + ASSERT_SUCCESS( + urCommandBufferEnqueueExp(cmd_buf_handle, queue, 0, nullptr, nullptr)); + + // Queue read buffer 1 based on event returned from command-buffer command + std::array host_enqueue_ptr{}; + ASSERT_SUCCESS(urEnqueueMemBufferRead( + queue, buffers[1], false, 0, allocation_size, host_enqueue_ptr.data(), + 1, &external_events[1], nullptr)); + + // Verify + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_enqueue_ptr[i], patternX); + } +} + +TEST_P(CommandEventSyncTest, MemBufferReadExp) { + // Get wait event from queue fill on buffer 0 + uint32_t patternX = 42; + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, buffers[0], &patternX, + sizeof(patternX), 0, allocation_size, + 0, nullptr, &external_events[0])); + + // Test command reading buffer 0 based on queue event + std::array host_command_ptr{}; + ASSERT_SUCCESS(urCommandBufferAppendMemBufferReadExp( + cmd_buf_handle, buffers[0], 0, allocation_size, host_command_ptr.data(), + 0, nullptr, 1, &external_events[0], nullptr, &external_events[1], + nullptr)); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(cmd_buf_handle)); + ASSERT_SUCCESS( + urCommandBufferEnqueueExp(cmd_buf_handle, queue, 0, nullptr, nullptr)); + + // Overwrite buffer 0 based on event returned from command-buffer command, + // then read back to verify ordering + uint32_t patternY = 0xA; + ASSERT_SUCCESS(urEnqueueMemBufferFill( + queue, buffers[0], &patternY, sizeof(patternY), 0, allocation_size, 1, + &external_events[1], &external_events[2])); + std::array host_enqueue_ptr{}; + ASSERT_SUCCESS(urEnqueueMemBufferRead( + queue, buffers[0], false, 0, allocation_size, host_enqueue_ptr.data(), + 1, &external_events[2], nullptr)); + + // Verify + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_command_ptr[i], patternX); + ASSERT_EQ(host_enqueue_ptr[i], patternY); + } +} + +TEST_P(CommandEventSyncTest, MemBufferReadRectExp) { + // Get wait event from queue fill on buffer 0 + uint32_t patternX = 42; + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, buffers[0], &patternX, + sizeof(patternX), 0, allocation_size, + 0, nullptr, &external_events[0])); + + // Test command reading buffer 0 based on queue event + std::array host_command_ptr{}; + ur_rect_offset_t buffer_offset = {0, 0, 0}; + ur_rect_offset_t host_offset = {0, 0, 0}; + constexpr size_t rect_buffer_row_size = 16; + ur_rect_region_t region = {rect_buffer_row_size, rect_buffer_row_size, 1}; + size_t buffer_row_pitch = rect_buffer_row_size; + size_t buffer_slice_pitch = allocation_size; + size_t host_row_pitch = rect_buffer_row_size; + size_t host_slice_pitch = allocation_size; + ASSERT_SUCCESS(urCommandBufferAppendMemBufferReadRectExp( + cmd_buf_handle, buffers[0], buffer_offset, host_offset, region, + buffer_row_pitch, buffer_slice_pitch, host_row_pitch, host_slice_pitch, + host_command_ptr.data(), 0, nullptr, 1, &external_events[0], nullptr, + &external_events[1], nullptr)); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(cmd_buf_handle)); + ASSERT_SUCCESS( + urCommandBufferEnqueueExp(cmd_buf_handle, queue, 0, nullptr, nullptr)); + + // Overwrite buffer 0 based on event returned from command-buffer command, + // then read back to verify ordering + uint32_t patternY = 0xA; + ASSERT_SUCCESS(urEnqueueMemBufferFill( + queue, buffers[0], &patternY, sizeof(patternY), 0, allocation_size, 1, + &external_events[1], &external_events[2])); + std::array host_enqueue_ptr{}; + ASSERT_SUCCESS(urEnqueueMemBufferRead( + queue, buffers[0], false, 0, allocation_size, host_enqueue_ptr.data(), + 1, &external_events[2], nullptr)); + + // Verify + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_command_ptr[i], patternX); + ASSERT_EQ(host_enqueue_ptr[i], patternY); + } +} + +TEST_P(CommandEventSyncTest, MemBufferWriteExp) { + + // Get wait event from queue fill on buffer 0 + uint32_t patternX = 42; + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, buffers[0], &patternX, + sizeof(patternX), 0, allocation_size, + 0, nullptr, &external_events[0])); + + // Test command overwriting buffer 0 based on queue event + std::array host_command_ptr{}; + uint32_t patternY = 0xA; + std::fill(host_command_ptr.begin(), host_command_ptr.end(), patternY); + ASSERT_SUCCESS(urCommandBufferAppendMemBufferWriteExp( + cmd_buf_handle, buffers[0], 0, allocation_size, host_command_ptr.data(), + 0, nullptr, 1, &external_events[0], nullptr, &external_events[1], + nullptr)); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(cmd_buf_handle)); + ASSERT_SUCCESS( + urCommandBufferEnqueueExp(cmd_buf_handle, queue, 0, nullptr, nullptr)); + + // Read back buffer 0 based on event returned from command-buffer command + std::array host_enqueue_ptr{}; + ASSERT_SUCCESS(urEnqueueMemBufferRead( + queue, buffers[0], false, 0, allocation_size, host_enqueue_ptr.data(), + 1, &external_events[1], nullptr)); + + // Verify + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_enqueue_ptr[i], patternY) << i; + } +} + +TEST_P(CommandEventSyncTest, MemBufferWriteRectExp) { + // Get wait event from queue fill on buffer 0 + uint32_t patternX = 42; + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, buffers[0], &patternX, + sizeof(patternX), 0, allocation_size, + 0, nullptr, &external_events[0])); + + // Test command overwriting buffer 0 based on queue event + std::array host_command_ptr{}; + uint32_t patternY = 0xA; + std::fill(host_command_ptr.begin(), host_command_ptr.end(), patternY); + + ur_rect_offset_t buffer_offset = {0, 0, 0}; + ur_rect_offset_t host_offset = {0, 0, 0}; + constexpr size_t rect_buffer_row_size = 16; + ur_rect_region_t region = {rect_buffer_row_size, rect_buffer_row_size, 1}; + size_t buffer_row_pitch = rect_buffer_row_size; + size_t buffer_slice_pitch = allocation_size; + size_t host_row_pitch = rect_buffer_row_size; + size_t host_slice_pitch = allocation_size; + ASSERT_SUCCESS(urCommandBufferAppendMemBufferWriteRectExp( + cmd_buf_handle, buffers[0], buffer_offset, host_offset, region, + buffer_row_pitch, buffer_slice_pitch, host_row_pitch, host_slice_pitch, + host_command_ptr.data(), 0, nullptr, 1, &external_events[0], nullptr, + &external_events[1], nullptr)); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(cmd_buf_handle)); + ASSERT_SUCCESS( + urCommandBufferEnqueueExp(cmd_buf_handle, queue, 0, nullptr, nullptr)); + + // Read back buffer 0 based on event returned from command-buffer command + std::array host_enqueue_ptr{}; + ASSERT_SUCCESS(urEnqueueMemBufferRead( + queue, buffers[0], false, 0, allocation_size, host_enqueue_ptr.data(), + 1, &external_events[1], nullptr)); + + // Verify + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_enqueue_ptr[i], patternY) << i; + } +} + +TEST_P(CommandEventSyncTest, MemBufferFillExp) { + // Get wait event from queue fill on buffer 0 + uint32_t patternX = 42; + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, buffers[0], &patternX, + sizeof(patternX), 0, allocation_size, + 0, nullptr, &external_events[0])); + + // Test fill command overwriting buffer 0 based on queue event + uint32_t patternY = 0xA; + ASSERT_SUCCESS(urCommandBufferAppendMemBufferFillExp( + cmd_buf_handle, buffers[0], &patternY, sizeof(patternY), 0, + allocation_size, 0, nullptr, 1, &external_events[0], nullptr, + &external_events[1], nullptr)); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(cmd_buf_handle)); + ASSERT_SUCCESS( + urCommandBufferEnqueueExp(cmd_buf_handle, queue, 0, nullptr, nullptr)); + + // Queue read buffer 0 based on event returned from command-buffer command + std::array host_enqueue_ptr{}; + ASSERT_SUCCESS(urEnqueueMemBufferRead( + queue, buffers[0], false, 0, allocation_size, host_enqueue_ptr.data(), + 1, &external_events[1], nullptr)); + + // Verify + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_enqueue_ptr[i], patternY); + } +} + +TEST_P(CommandEventSyncTest, USMPrefetchExp) { + // Get wait event from queue fill on ptr 0 + uint32_t patternX = 42; + ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptrs[0], sizeof(patternX), + &patternX, allocation_size, 0, nullptr, + &external_events[0])); + + // Test prefetch command waiting on queue event + ASSERT_SUCCESS(urCommandBufferAppendUSMPrefetchExp( + cmd_buf_handle, device_ptrs[1], allocation_size, 0 /* migration flags*/, + 0, nullptr, 1, &external_events[0], nullptr, &external_events[1], + nullptr)); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(cmd_buf_handle)); + ASSERT_SUCCESS( + urCommandBufferEnqueueExp(cmd_buf_handle, queue, 0, nullptr, nullptr)); + + // Queue read ptr 0 based on event returned from command-buffer command + std::array host_enqueue_ptr{}; + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, false, host_enqueue_ptr.data(), + device_ptrs[0], allocation_size, 1, + &external_events[1], nullptr)); + + // Verify + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_enqueue_ptr[i], patternX); + } +} + +TEST_P(CommandEventSyncTest, USMAdviseExp) { + // Get wait event from queue fill on ptr 0 + uint32_t patternX = 42; + ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptrs[0], sizeof(patternX), + &patternX, allocation_size, 0, nullptr, + &external_events[0])); + + // Test advise command waiting on queue event + ASSERT_SUCCESS(urCommandBufferAppendUSMAdviseExp( + cmd_buf_handle, device_ptrs[0], allocation_size, 0 /* advice flags*/, 0, + nullptr, 1, &external_events[0], nullptr, &external_events[1], + nullptr)); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(cmd_buf_handle)); + ASSERT_SUCCESS( + urCommandBufferEnqueueExp(cmd_buf_handle, queue, 0, nullptr, nullptr)); + + // Queue read ptr 0 based on event returned from command-buffer command + std::array host_enqueue_ptr{}; + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, false, host_enqueue_ptr.data(), + device_ptrs[0], allocation_size, 1, + &external_events[1], nullptr)); + + // Verify + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_enqueue_ptr[i], patternX); + } +} + +TEST_P(CommandEventSyncTest, MultipleEventCommands) { + // Command to fill ptr 0 + uint32_t patternA = 0xA; + ASSERT_SUCCESS(urCommandBufferAppendUSMFillExp( + cmd_buf_handle, device_ptrs[0], &patternA, sizeof(patternA), + allocation_size, 0, nullptr, 0, nullptr, nullptr, &external_events[0], + nullptr)); + + // Command to fill ptr 1 + uint32_t patternB = 0xB; + ASSERT_SUCCESS(urCommandBufferAppendUSMFillExp( + cmd_buf_handle, device_ptrs[1], &patternB, sizeof(patternB), + allocation_size, 0, nullptr, 1, &external_events[0], nullptr, + &external_events[1], nullptr)); + + // Command to fill ptr 1 + uint32_t patternC = 0xC; + ASSERT_SUCCESS(urCommandBufferAppendUSMFillExp( + cmd_buf_handle, device_ptrs[2], &patternC, sizeof(patternC), + allocation_size, 0, nullptr, 1, &external_events[1], nullptr, + &external_events[2], nullptr)); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(cmd_buf_handle)); + ASSERT_SUCCESS( + urCommandBufferEnqueueExp(cmd_buf_handle, queue, 0, nullptr, nullptr)); + + // Queue read ptr 1 based on event returned from command-buffer command + std::array host_enqueue_ptrA, host_enqueue_ptrB, + host_enqueue_ptrC; + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, false, host_enqueue_ptrA.data(), + device_ptrs[0], allocation_size, 1, + &external_events[0], nullptr)); + + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, false, host_enqueue_ptrB.data(), + device_ptrs[1], allocation_size, 1, + &external_events[1], nullptr)); + + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, false, host_enqueue_ptrC.data(), + device_ptrs[2], allocation_size, 1, + &external_events[2], nullptr)); + + // Verify + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_enqueue_ptrA[i], patternA); + ASSERT_EQ(host_enqueue_ptrB[i], patternB); + ASSERT_EQ(host_enqueue_ptrC[i], patternC); + } +} + +TEST_P(CommandEventSyncTest, MultipleEventCommandsBetweenCommandBuffers) { + // Command to fill ptr 0 + uint32_t patternA = 0xA; + ASSERT_SUCCESS(urCommandBufferAppendUSMFillExp( + cmd_buf_handle, device_ptrs[0], &patternA, sizeof(patternA), + allocation_size, 0, nullptr, 0, nullptr, nullptr, &external_events[0], + nullptr)); + + // Command to fill ptr 1 + uint32_t patternB = 0xB; + ASSERT_SUCCESS(urCommandBufferAppendUSMFillExp( + cmd_buf_handle, device_ptrs[1], &patternB, sizeof(patternB), + allocation_size, 0, nullptr, 1, &external_events[0], nullptr, + &external_events[1], nullptr)); + + // Command to fill ptr 1 + uint32_t patternC = 0xC; + ASSERT_SUCCESS(urCommandBufferAppendUSMFillExp( + cmd_buf_handle, device_ptrs[2], &patternC, sizeof(patternC), + allocation_size, 0, nullptr, 1, &external_events[1], nullptr, + &external_events[2], nullptr)); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(cmd_buf_handle)); + + // Queue read ptr 1 based on event returned from command-buffer command + std::array host_enqueue_ptrA, host_enqueue_ptrB, + host_enqueue_ptrC; + ASSERT_SUCCESS(urCommandBufferAppendUSMMemcpyExp( + second_cmd_buf_handle, host_enqueue_ptrA.data(), device_ptrs[0], + allocation_size, 0, nullptr, 1, &external_events[0], nullptr, nullptr, + nullptr)); + + ASSERT_SUCCESS(urCommandBufferAppendUSMMemcpyExp( + second_cmd_buf_handle, host_enqueue_ptrB.data(), device_ptrs[1], + allocation_size, 0, nullptr, 1, &external_events[1], nullptr, nullptr, + nullptr)); + + ASSERT_SUCCESS(urCommandBufferAppendUSMMemcpyExp( + second_cmd_buf_handle, host_enqueue_ptrC.data(), device_ptrs[2], + allocation_size, 0, nullptr, 1, &external_events[2], nullptr, nullptr, + nullptr)); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(second_cmd_buf_handle)); + ASSERT_SUCCESS( + urCommandBufferEnqueueExp(cmd_buf_handle, queue, 0, nullptr, nullptr)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(second_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + + // Verify + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_enqueue_ptrA[i], patternA); + ASSERT_EQ(host_enqueue_ptrB[i], patternB); + ASSERT_EQ(host_enqueue_ptrC[i], patternC); + } +} + +// Tests non-kernel commands using ur events for synchronization can be +// updated +struct CommandEventSyncUpdateTest : CommandEventSyncTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(CommandEventSyncTest::SetUp()); + + if (!updatable_command_buffer_support) { + GTEST_SKIP() << "External event update is not supported by device."; + } + + // Create a command-buffer with update enabled. + ur_exp_command_buffer_desc_t desc{ + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, nullptr, true}; + + ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, &desc, + &updatable_cmd_buf_handle)); + ASSERT_NE(updatable_cmd_buf_handle, nullptr); + } + + virtual void TearDown() override { + for (auto command_handle : command_handles) { + if (command_handle) { + EXPECT_SUCCESS( + urCommandBufferReleaseCommandExp(command_handle)); + } + } + + if (updatable_cmd_buf_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseExp(updatable_cmd_buf_handle)); + } + + UUR_RETURN_ON_FATAL_FAILURE(CommandEventSyncTest::TearDown()); + } + + ur_exp_command_buffer_handle_t updatable_cmd_buf_handle = nullptr; + std::array command_handles = { + nullptr, nullptr, nullptr}; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(CommandEventSyncUpdateTest); + +TEST_P(CommandEventSyncUpdateTest, USMMemcpyExp) { + // Get wait event from queue fill on ptr 0 + uint32_t patternX = 42; + ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptrs[0], sizeof(patternX), + &patternX, allocation_size, 0, nullptr, + &external_events[0])); + + // Command to fill ptr 1 + uint32_t patternY = 0xA; + ASSERT_SUCCESS(urCommandBufferAppendUSMFillExp( + updatable_cmd_buf_handle, device_ptrs[1], &patternY, sizeof(patternY), + allocation_size, 0, nullptr, 0, nullptr, &sync_points[0], nullptr, + nullptr)); + + // Test command overwriting ptr 1 with ptr 0 command based on queue event + ASSERT_SUCCESS(urCommandBufferAppendUSMMemcpyExp( + updatable_cmd_buf_handle, device_ptrs[1], device_ptrs[0], + allocation_size, 1, &sync_points[0], 1, &external_events[0], nullptr, + &external_events[1], &command_handles[0])); + ASSERT_NE(nullptr, command_handles[0]); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + + // Queue read ptr 1 based on event returned from command-buffer command + std::array host_enqueue_ptr{}; + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, false, host_enqueue_ptr.data(), + device_ptrs[1], allocation_size, 1, + &external_events[1], nullptr)); + + // Verify + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_enqueue_ptr[i], patternX); + } + + uint32_t patternZ = 666; + ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptrs[0], sizeof(patternZ), + &patternZ, allocation_size, 0, nullptr, + &external_events[2])); + + // Update command command-wait event to wait on fill of new value + ASSERT_SUCCESS(urCommandBufferUpdateWaitEventsExp(command_handles[0], 1, + &external_events[2])); + + // Get a new signal event for command-buffer + ASSERT_SUCCESS(urCommandBufferUpdateSignalEventExp(command_handles[0], + &external_events[3])); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, false, host_enqueue_ptr.data(), + device_ptrs[1], allocation_size, 1, + &external_events[3], nullptr)); + + // Verify update + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_enqueue_ptr[i], patternZ); + } +} + +TEST_P(CommandEventSyncUpdateTest, USMFillExp) { + // Get wait event from queue fill on ptr 0 + uint32_t patternX = 42; + ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptrs[0], sizeof(patternX), + &patternX, allocation_size, 0, nullptr, + &external_events[0])); + + // Test fill command overwriting ptr 0 waiting on queue event + uint32_t patternY = 0xA; + ASSERT_SUCCESS(urCommandBufferAppendUSMFillExp( + updatable_cmd_buf_handle, device_ptrs[0], &patternY, sizeof(patternY), + allocation_size, 0, nullptr, 1, &external_events[0], nullptr, + &external_events[1], &command_handles[0])); + ASSERT_NE(nullptr, command_handles[0]); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + + // Queue read ptr 0 based on event returned from command-buffer command + std::array host_enqueue_ptr{}; + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, false, host_enqueue_ptr.data(), + device_ptrs[0], allocation_size, 1, + &external_events[1], nullptr)); + + // Verify + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_enqueue_ptr[i], patternY); + } + + uint32_t patternZ = 666; + ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptrs[0], sizeof(patternZ), + &patternZ, allocation_size, 0, nullptr, + &external_events[2])); + + // Update command command-wait event to wait on fill of new value + ASSERT_SUCCESS(urCommandBufferUpdateWaitEventsExp(command_handles[0], 1, + &external_events[2])); + + // Get a new signal event for command-buffer + ASSERT_SUCCESS(urCommandBufferUpdateSignalEventExp(command_handles[0], + &external_events[3])); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, false, host_enqueue_ptr.data(), + device_ptrs[0], allocation_size, 1, + &external_events[3], nullptr)); + + // Verify update + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_enqueue_ptr[i], patternY); + } +} + +TEST_P(CommandEventSyncUpdateTest, MemBufferCopyExp) { + // Get wait event from queue fill on buffer 0 + uint32_t patternX = 42; + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, buffers[0], &patternX, + sizeof(patternX), 0, allocation_size, + 0, nullptr, &external_events[0])); + + // Command to fill buffer 1 + uint32_t patternY = 0xA; + ASSERT_SUCCESS(urCommandBufferAppendMemBufferFillExp( + updatable_cmd_buf_handle, buffers[1], &patternY, sizeof(patternY), 0, + allocation_size, 0, nullptr, 0, nullptr, &sync_points[0], nullptr, + nullptr)); + + // Test command overwriting buffer 1 with buffer 0 command based on queue event + ASSERT_SUCCESS(urCommandBufferAppendMemBufferCopyExp( + updatable_cmd_buf_handle, buffers[0], buffers[1], 0, 0, allocation_size, + 1, &sync_points[0], 1, &external_events[0], nullptr, + &external_events[1], &command_handles[0])); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + + // Queue read buffer 1 based on event returned from command-buffer command + std::array host_enqueue_ptr{}; + ASSERT_SUCCESS(urEnqueueMemBufferRead( + queue, buffers[1], false, 0, allocation_size, host_enqueue_ptr.data(), + 1, &external_events[1], nullptr)); + + // Verify + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_enqueue_ptr[i], patternX); + } + + uint32_t patternZ = 666; + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, buffers[0], &patternZ, + sizeof(patternZ), 0, allocation_size, + 0, nullptr, &external_events[2])); + + // Update command command-wait event to wait on fill of new value + ASSERT_SUCCESS(urCommandBufferUpdateWaitEventsExp(command_handles[0], 1, + &external_events[2])); + + // Get a new signal event for command-buffer + ASSERT_SUCCESS(urCommandBufferUpdateSignalEventExp(command_handles[0], + &external_events[3])); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + + ASSERT_SUCCESS(urEnqueueMemBufferRead( + queue, buffers[1], false, 0, allocation_size, host_enqueue_ptr.data(), + 1, &external_events[3], nullptr)); + + // Verify update + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_enqueue_ptr[i], patternZ); + } +} + +TEST_P(CommandEventSyncUpdateTest, MemBufferCopyRectExp) { + // Get wait event from queue fill on buffer 0 + uint32_t patternX = 42; + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, buffers[0], &patternX, + sizeof(patternX), 0, allocation_size, + 0, nullptr, &external_events[0])); + + // Command to fill buffer 1 + uint32_t patternY = 0xA; + ASSERT_SUCCESS(urCommandBufferAppendMemBufferFillExp( + updatable_cmd_buf_handle, buffers[1], &patternY, sizeof(patternY), 0, + allocation_size, 0, nullptr, 0, nullptr, &sync_points[0], nullptr, + nullptr)); + + // Test command overwriting buffer 1 with buffer 0 command based on queue event + ur_rect_offset_t src_origin{0, 0, 0}; + ur_rect_offset_t dst_origin{0, 0, 0}; + constexpr size_t rect_buffer_row_size = 16; + ur_rect_region_t region{rect_buffer_row_size, rect_buffer_row_size, 1}; + size_t src_row_pitch = rect_buffer_row_size; + size_t src_slice_pitch = allocation_size; + size_t dst_row_pitch = rect_buffer_row_size; + size_t dst_slice_pitch = allocation_size; + ASSERT_SUCCESS(urCommandBufferAppendMemBufferCopyRectExp( + updatable_cmd_buf_handle, buffers[0], buffers[1], src_origin, + dst_origin, region, src_row_pitch, src_slice_pitch, dst_row_pitch, + dst_slice_pitch, 1, &sync_points[0], 1, &external_events[0], nullptr, + &external_events[1], &command_handles[0])); + ASSERT_NE(nullptr, command_handles[0]); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + + // Queue read buffer 1 based on event returned from command-buffer command + std::array host_enqueue_ptr{}; + ASSERT_SUCCESS(urEnqueueMemBufferRead( + queue, buffers[1], false, 0, allocation_size, host_enqueue_ptr.data(), + 1, &external_events[1], nullptr)); + + // Verify + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_enqueue_ptr[i], patternX); + } + + uint32_t patternZ = 666; + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, buffers[0], &patternZ, + sizeof(patternZ), 0, allocation_size, + 0, nullptr, &external_events[2])); + + // Update command command-wait event to wait on fill of new value + ASSERT_SUCCESS(urCommandBufferUpdateWaitEventsExp(command_handles[0], 1, + &external_events[2])); + + // Get a new signal event for command-buffer + ASSERT_SUCCESS(urCommandBufferUpdateSignalEventExp(command_handles[0], + &external_events[3])); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + + ASSERT_SUCCESS(urEnqueueMemBufferRead( + queue, buffers[1], false, 0, allocation_size, host_enqueue_ptr.data(), + 1, &external_events[3], nullptr)); + + // Verify update + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_enqueue_ptr[i], patternZ); + } +} + +TEST_P(CommandEventSyncUpdateTest, MemBufferReadExp) { + // Get wait event from queue fill on buffer 0 + uint32_t patternX = 42; + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, buffers[0], &patternX, + sizeof(patternX), 0, allocation_size, + 0, nullptr, &external_events[0])); + + // Test command reading buffer 0 based on queue event + std::array host_command_ptr{}; + ASSERT_SUCCESS(urCommandBufferAppendMemBufferReadExp( + updatable_cmd_buf_handle, buffers[0], 0, allocation_size, + host_command_ptr.data(), 0, nullptr, 1, &external_events[0], nullptr, + &external_events[1], &command_handles[0])); + ASSERT_NE(nullptr, command_handles[0]); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + + // Overwrite buffer 0 based on event returned from command-buffer command, + // then read back to verify ordering + uint32_t patternY = 0xA; + ASSERT_SUCCESS(urEnqueueMemBufferFill( + queue, buffers[0], &patternY, sizeof(patternY), 0, allocation_size, 1, + &external_events[1], &external_events[2])); + std::array host_enqueue_ptr{}; + ASSERT_SUCCESS(urEnqueueMemBufferRead( + queue, buffers[0], false, 0, allocation_size, host_enqueue_ptr.data(), + 1, &external_events[2], nullptr)); + + // Verify + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_command_ptr[i], patternX); + ASSERT_EQ(host_enqueue_ptr[i], patternY); + } + + uint32_t patternZ = 666; + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, buffers[0], &patternZ, + sizeof(patternZ), 0, allocation_size, + 0, nullptr, &external_events[3])); + + // Update command command-wait event to wait on fill of new value + ASSERT_SUCCESS(urCommandBufferUpdateWaitEventsExp(command_handles[0], 1, + &external_events[3])); + + // Get a new signal event for command-buffer + ASSERT_SUCCESS(urCommandBufferUpdateSignalEventExp(command_handles[0], + &external_events[4])); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + + uint32_t patternA = 0xF; + ASSERT_SUCCESS(urEnqueueMemBufferFill( + queue, buffers[0], &patternA, sizeof(patternA), 0, allocation_size, 1, + &external_events[4], &external_events[5])); + ASSERT_SUCCESS(urEnqueueMemBufferRead( + queue, buffers[0], false, 0, allocation_size, host_enqueue_ptr.data(), + 1, &external_events[5], nullptr)); + + // Verify update + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_command_ptr[i], patternZ); + ASSERT_EQ(host_enqueue_ptr[i], patternA); + } +} + +TEST_P(CommandEventSyncUpdateTest, MemBufferReadRectExp) { + // Get wait event from queue fill on buffer 0 + uint32_t patternX = 42; + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, buffers[0], &patternX, + sizeof(patternX), 0, allocation_size, + 0, nullptr, &external_events[0])); + + // Test command reading buffer 0 based on queue event + std::array host_command_ptr{}; + ur_rect_offset_t buffer_offset = {0, 0, 0}; + ur_rect_offset_t host_offset = {0, 0, 0}; + constexpr size_t rect_buffer_row_size = 16; + ur_rect_region_t region = {rect_buffer_row_size, rect_buffer_row_size, 1}; + size_t buffer_row_pitch = rect_buffer_row_size; + size_t buffer_slice_pitch = allocation_size; + size_t host_row_pitch = rect_buffer_row_size; + size_t host_slice_pitch = allocation_size; + ASSERT_SUCCESS(urCommandBufferAppendMemBufferReadRectExp( + updatable_cmd_buf_handle, buffers[0], buffer_offset, host_offset, + region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch, + host_slice_pitch, host_command_ptr.data(), 0, nullptr, 1, + &external_events[0], nullptr, &external_events[1], + &command_handles[0])); + ASSERT_NE(nullptr, command_handles[0]); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + + // Overwrite buffer 0 based on event returned from command-buffer command, + // then read back to verify ordering + uint32_t patternY = 0xA; + ASSERT_SUCCESS(urEnqueueMemBufferFill( + queue, buffers[0], &patternY, sizeof(patternY), 0, allocation_size, 1, + &external_events[1], &external_events[2])); + std::array host_enqueue_ptr{}; + ASSERT_SUCCESS(urEnqueueMemBufferRead( + queue, buffers[0], false, 0, allocation_size, host_enqueue_ptr.data(), + 1, &external_events[2], nullptr)); + + // Verify + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_command_ptr[i], patternX); + ASSERT_EQ(host_enqueue_ptr[i], patternY); + } + + uint32_t patternZ = 666; + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, buffers[0], &patternZ, + sizeof(patternZ), 0, allocation_size, + 0, nullptr, &external_events[3])); + + // Update command command-wait event to wait on fill of new value + ASSERT_SUCCESS(urCommandBufferUpdateWaitEventsExp(command_handles[0], 1, + &external_events[3])); + + // Get a new signal event for command-buffer + ASSERT_SUCCESS(urCommandBufferUpdateSignalEventExp(command_handles[0], + &external_events[4])); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + + uint32_t patternA = 0xF; + ASSERT_SUCCESS(urEnqueueMemBufferFill( + queue, buffers[0], &patternA, sizeof(patternA), 0, allocation_size, 1, + &external_events[4], &external_events[5])); + ASSERT_SUCCESS(urEnqueueMemBufferRead( + queue, buffers[0], false, 0, allocation_size, host_enqueue_ptr.data(), + 1, &external_events[5], nullptr)); + + // Verify update + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_command_ptr[i], patternZ); + ASSERT_EQ(host_enqueue_ptr[i], patternA); + } +} + +TEST_P(CommandEventSyncUpdateTest, MemBufferWriteExp) { + // Get wait event from queue fill on buffer 0 + uint32_t patternX = 42; + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, buffers[0], &patternX, + sizeof(patternX), 0, allocation_size, + 0, nullptr, &external_events[0])); + + // Test command overwriting buffer 0 based on queue event + std::array host_command_ptr{}; + uint32_t patternY = 0xA; + std::fill(host_command_ptr.begin(), host_command_ptr.end(), patternY); + ASSERT_SUCCESS(urCommandBufferAppendMemBufferWriteExp( + updatable_cmd_buf_handle, buffers[0], 0, allocation_size, + host_command_ptr.data(), 0, nullptr, 1, &external_events[0], nullptr, + &external_events[1], &command_handles[0])); + ASSERT_NE(nullptr, command_handles[0]); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + + // Read back buffer 0 based on event returned from command-buffer command + std::array host_enqueue_ptr{}; + ASSERT_SUCCESS(urEnqueueMemBufferRead( + queue, buffers[0], false, 0, allocation_size, host_enqueue_ptr.data(), + 1, &external_events[1], nullptr)); + + // Verify + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_enqueue_ptr[i], patternY) << i; + } + + uint32_t patternZ = 666; + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, buffers[0], &patternZ, + sizeof(patternZ), 0, allocation_size, + 0, nullptr, &external_events[2])); + + // Update command command-wait event to wait on fill of new value + ASSERT_SUCCESS(urCommandBufferUpdateWaitEventsExp(command_handles[0], 1, + &external_events[2])); + + // Get a new signal event for command-buffer + ASSERT_SUCCESS(urCommandBufferUpdateSignalEventExp(command_handles[0], + &external_events[3])); + + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urEnqueueMemBufferRead( + queue, buffers[0], false, 0, allocation_size, host_enqueue_ptr.data(), + 1, &external_events[3], nullptr)); + + // Verify update + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_enqueue_ptr[i], patternY); + } +} + +TEST_P(CommandEventSyncUpdateTest, MemBufferWriteRectExp) { + // Get wait event from queue fill on buffer 0 + uint32_t patternX = 42; + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, buffers[0], &patternX, + sizeof(patternX), 0, allocation_size, + 0, nullptr, &external_events[0])); + + // Test command overwriting buffer 0 based on queue event + std::array host_command_ptr{}; + uint32_t patternY = 0xA; + std::fill(host_command_ptr.begin(), host_command_ptr.end(), patternY); + + ur_rect_offset_t buffer_offset = {0, 0, 0}; + ur_rect_offset_t host_offset = {0, 0, 0}; + constexpr size_t rect_buffer_row_size = 16; + ur_rect_region_t region = {rect_buffer_row_size, rect_buffer_row_size, 1}; + size_t buffer_row_pitch = rect_buffer_row_size; + size_t buffer_slice_pitch = allocation_size; + size_t host_row_pitch = rect_buffer_row_size; + size_t host_slice_pitch = allocation_size; + ASSERT_SUCCESS(urCommandBufferAppendMemBufferWriteRectExp( + updatable_cmd_buf_handle, buffers[0], buffer_offset, host_offset, + region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch, + host_slice_pitch, host_command_ptr.data(), 0, nullptr, 1, + &external_events[0], nullptr, &external_events[1], + &command_handles[0])); + ASSERT_NE(nullptr, command_handles[0]); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + + // Read back buffer 0 based on event returned from command-buffer command + std::array host_enqueue_ptr{}; + ASSERT_SUCCESS(urEnqueueMemBufferRead( + queue, buffers[0], false, 0, allocation_size, host_enqueue_ptr.data(), + 1, &external_events[1], nullptr)); + + // Verify + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_enqueue_ptr[i], patternY) << i; + } + + uint32_t patternZ = 666; + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, buffers[0], &patternZ, + sizeof(patternZ), 0, allocation_size, + 0, nullptr, &external_events[2])); + + // Update command command-wait event to wait on fill of new value + ASSERT_SUCCESS(urCommandBufferUpdateWaitEventsExp(command_handles[0], 1, + &external_events[2])); + + // Get a new signal event for command-buffer + ASSERT_SUCCESS(urCommandBufferUpdateSignalEventExp(command_handles[0], + &external_events[3])); + + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urEnqueueMemBufferRead( + queue, buffers[0], false, 0, allocation_size, host_enqueue_ptr.data(), + 1, &external_events[3], nullptr)); + + // Verify update + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_enqueue_ptr[i], patternY); + } +} + +TEST_P(CommandEventSyncUpdateTest, MemBufferFillExp) { + // Get wait event from queue fill on buffer 0 + uint32_t patternX = 42; + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, buffers[0], &patternX, + sizeof(patternX), 0, allocation_size, + 0, nullptr, &external_events[0])); + + // Test fill command overwriting buffer 0 based on queue event + uint32_t patternY = 0xA; + ASSERT_SUCCESS(urCommandBufferAppendMemBufferFillExp( + updatable_cmd_buf_handle, buffers[0], &patternY, sizeof(patternY), 0, + allocation_size, 0, nullptr, 1, &external_events[0], nullptr, + &external_events[1], &command_handles[0])); + ASSERT_NE(nullptr, command_handles[0]); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + + // Queue read buffer 0 based on event returned from command-buffer command + std::array host_enqueue_ptr{}; + ASSERT_SUCCESS(urEnqueueMemBufferRead( + queue, buffers[0], false, 0, allocation_size, host_enqueue_ptr.data(), + 1, &external_events[1], nullptr)); + + // Verify + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_enqueue_ptr[i], patternY); + } + + uint32_t patternZ = 666; + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, buffers[0], &patternZ, + sizeof(patternZ), 0, allocation_size, + 0, nullptr, &external_events[2])); + + // Update command command-wait event to wait on fill of new value + ASSERT_SUCCESS(urCommandBufferUpdateWaitEventsExp(command_handles[0], 1, + &external_events[2])); + + // Get a new signal event for command-buffer + ASSERT_SUCCESS(urCommandBufferUpdateSignalEventExp(command_handles[0], + &external_events[3])); + + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urEnqueueMemBufferRead( + queue, buffers[0], false, 0, allocation_size, host_enqueue_ptr.data(), + 1, &external_events[3], nullptr)); + + // Verify update + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_enqueue_ptr[i], patternY); + } +} + +TEST_P(CommandEventSyncUpdateTest, USMPrefetchExp) { + // Get wait event from queue fill on ptr 0 + uint32_t patternX = 42; + ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptrs[0], sizeof(patternX), + &patternX, allocation_size, 0, nullptr, + &external_events[0])); + + // Test prefetch command waiting on queue event + ASSERT_SUCCESS(urCommandBufferAppendUSMPrefetchExp( + updatable_cmd_buf_handle, device_ptrs[1], allocation_size, + 0 /* migration flags*/, 0, nullptr, 1, &external_events[0], nullptr, + &external_events[1], &command_handles[0])); + ASSERT_NE(nullptr, command_handles[0]); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + + // Queue read ptr 0 based on event returned from command-buffer command + std::array host_enqueue_ptr{}; + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, false, host_enqueue_ptr.data(), + device_ptrs[0], allocation_size, 1, + &external_events[1], nullptr)); + + // Verify + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_enqueue_ptr[i], patternX); + } + + uint32_t patternY = 42; + ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptrs[0], sizeof(patternY), + &patternY, allocation_size, 0, nullptr, + &external_events[2])); + + // Update command command-wait event to wait on fill of new value + ASSERT_SUCCESS(urCommandBufferUpdateWaitEventsExp(command_handles[0], 1, + &external_events[2])); + + // Get a new signal event for command-buffer + ASSERT_SUCCESS(urCommandBufferUpdateSignalEventExp(command_handles[0], + &external_events[3])); + + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, false, host_enqueue_ptr.data(), + device_ptrs[0], allocation_size, 1, + &external_events[3], nullptr)); + + // Verify update + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_enqueue_ptr[i], patternY); + } +} + +TEST_P(CommandEventSyncUpdateTest, USMAdviseExp) { + // Get wait event from queue fill on ptr 0 + uint32_t patternX = 42; + ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptrs[0], sizeof(patternX), + &patternX, allocation_size, 0, nullptr, + &external_events[0])); + + // Test advise command waiting on queue event + ASSERT_SUCCESS(urCommandBufferAppendUSMAdviseExp( + updatable_cmd_buf_handle, device_ptrs[0], allocation_size, + 0 /* advice flags*/, 0, nullptr, 1, &external_events[0], nullptr, + &external_events[1], &command_handles[0])); + ASSERT_NE(nullptr, command_handles[0]); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + + // Queue read ptr 0 based on event returned from command-buffer command + std::array host_enqueue_ptr{}; + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, false, host_enqueue_ptr.data(), + device_ptrs[0], allocation_size, 1, + &external_events[1], nullptr)); + + // Verify + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_enqueue_ptr[i], patternX); + } + + uint32_t patternY = 42; + ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptrs[0], sizeof(patternY), + &patternY, allocation_size, 0, nullptr, + &external_events[2])); + + // Update command command-wait event to wait on fill of new value + ASSERT_SUCCESS(urCommandBufferUpdateWaitEventsExp(command_handles[0], 1, + &external_events[2])); + + // Get a new signal event for command-buffer + ASSERT_SUCCESS(urCommandBufferUpdateSignalEventExp(command_handles[0], + &external_events[3])); + + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, false, host_enqueue_ptr.data(), + device_ptrs[0], allocation_size, 1, + &external_events[3], nullptr)); + + // Verify update + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_enqueue_ptr[i], patternY); + } +} + +TEST_P(CommandEventSyncUpdateTest, MultipleEventCommands) { + uint32_t patternX = 42; + ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptrs[0], sizeof(patternX), + &patternX, allocation_size, 0, nullptr, + &external_events[0])); + uint32_t patternY = 43; + ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptrs[1], sizeof(patternY), + &patternY, allocation_size, 0, nullptr, + &external_events[1])); + + uint32_t patternZ = 44; + ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptrs[2], sizeof(patternZ), + &patternZ, allocation_size, 0, nullptr, + &external_events[2])); + + // Command to fill ptr 0 + uint32_t patternA = 0xA; + ASSERT_SUCCESS(urCommandBufferAppendUSMFillExp( + updatable_cmd_buf_handle, device_ptrs[0], &patternA, sizeof(patternA), + allocation_size, 0, nullptr, 1, &external_events[0], nullptr, + &external_events[3], &command_handles[0])); + + // Command to fill ptr 1 + uint32_t patternB = 0xB; + ASSERT_SUCCESS(urCommandBufferAppendUSMFillExp( + updatable_cmd_buf_handle, device_ptrs[1], &patternB, sizeof(patternB), + allocation_size, 0, nullptr, 1, &external_events[1], nullptr, + &external_events[4], &command_handles[1])); + + // Command to fill ptr 1 + uint32_t patternC = 0xC; + ASSERT_SUCCESS(urCommandBufferAppendUSMFillExp( + updatable_cmd_buf_handle, device_ptrs[2], &patternC, sizeof(patternC), + allocation_size, 0, nullptr, 1, &external_events[2], nullptr, + &external_events[5], &command_handles[2])); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + + // Queue read ptr 1 based on event returned from command-buffer command + std::array host_enqueue_ptrA1, host_enqueue_ptrB1, + host_enqueue_ptrC1; + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, false, host_enqueue_ptrA1.data(), + device_ptrs[0], allocation_size, 1, + &external_events[3], nullptr)); + + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, false, host_enqueue_ptrB1.data(), + device_ptrs[1], allocation_size, 1, + &external_events[4], nullptr)); + + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, false, host_enqueue_ptrC1.data(), + device_ptrs[2], allocation_size, 1, + &external_events[5], nullptr)); + + // Verify + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_enqueue_ptrA1[i], patternA); + ASSERT_EQ(host_enqueue_ptrB1[i], patternB); + ASSERT_EQ(host_enqueue_ptrC1[i], patternC); + } + + uint32_t pattern1 = 1; + ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptrs[0], sizeof(pattern1), + &pattern1, allocation_size, 0, nullptr, + &external_events[6])); + uint32_t pattern2 = 2; + ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptrs[1], sizeof(pattern2), + &pattern2, allocation_size, 0, nullptr, + &external_events[7])); + + uint32_t pattern3 = 3; + ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptrs[2], sizeof(pattern3), + &pattern3, allocation_size, 0, nullptr, + &external_events[8])); + + // Update command command-wait events to wait on new values + ASSERT_SUCCESS(urCommandBufferUpdateWaitEventsExp(command_handles[0], 1, + &external_events[6])); + ASSERT_SUCCESS(urCommandBufferUpdateWaitEventsExp(command_handles[0], 1, + &external_events[7])); + ASSERT_SUCCESS(urCommandBufferUpdateWaitEventsExp(command_handles[0], 1, + &external_events[8])); + + // Get a new signal events for command-buffer + ASSERT_SUCCESS(urCommandBufferUpdateSignalEventExp(command_handles[0], + &external_events[9])); + ASSERT_SUCCESS(urCommandBufferUpdateSignalEventExp(command_handles[0], + &external_events[10])); + ASSERT_SUCCESS(urCommandBufferUpdateSignalEventExp(command_handles[0], + &external_events[11])); + + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + + std::array host_enqueue_ptrA2, host_enqueue_ptrB2, + host_enqueue_ptrC2; + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, false, host_enqueue_ptrA2.data(), + device_ptrs[0], allocation_size, 1, + &external_events[9], nullptr)); + + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, false, host_enqueue_ptrB2.data(), + device_ptrs[1], allocation_size, 1, + &external_events[10], nullptr)); + + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, false, host_enqueue_ptrC2.data(), + device_ptrs[2], allocation_size, 1, + &external_events[11], nullptr)); + // Verify + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + ASSERT_EQ(host_enqueue_ptrA2[i], patternA); + ASSERT_EQ(host_enqueue_ptrB2[i], patternB); + ASSERT_EQ(host_enqueue_ptrC2[i], patternC); + } +} diff --git a/test/conformance/exp_command_buffer/event_sync_kernel_command.cpp b/test/conformance/exp_command_buffer/event_sync_kernel_command.cpp new file mode 100644 index 0000000000..d38b8e8425 --- /dev/null +++ b/test/conformance/exp_command_buffer/event_sync_kernel_command.cpp @@ -0,0 +1,531 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "fixtures.h" +#include + +// Tests kernel commands using ur events for synchronization work as expected +struct KernelCommandEventSyncTest + : uur::command_buffer::urCommandBufferExpExecutionTest { + + void SetUp() override { + program_name = "saxpy_usm"; + UUR_RETURN_ON_FATAL_FAILURE(urCommandBufferExpExecutionTest::SetUp()); + + ur_bool_t event_support = false; + ASSERT_SUCCESS(urDeviceGetInfo( + device, UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP, + sizeof(ur_bool_t), &event_support, nullptr)); + if (!event_support) { + GTEST_SKIP() << "External event sync is not supported by device."; + } + + for (auto &device_ptr : device_ptrs) { + ASSERT_SUCCESS(urUSMDeviceAlloc(context, device, nullptr, nullptr, + allocation_size, &device_ptr)); + ASSERT_NE(device_ptr, nullptr); + } + + // Index 0 is output + ASSERT_SUCCESS( + urKernelSetArgPointer(kernel, 0, nullptr, device_ptrs[2])); + // Index 1 is A + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(A), nullptr, &A)); + // Index 2 is X + ASSERT_SUCCESS( + urKernelSetArgPointer(kernel, 2, nullptr, device_ptrs[0])); + // Index 3 is Y + ASSERT_SUCCESS( + urKernelSetArgPointer(kernel, 3, nullptr, device_ptrs[1])); + + // Create second command-buffer + ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, nullptr, + &second_cmd_buf_handle)); + ASSERT_NE(second_cmd_buf_handle, nullptr); + } + + virtual void TearDown() override { + for (auto &device_ptr : device_ptrs) { + if (device_ptr) { + EXPECT_SUCCESS(urUSMFree(context, device_ptr)); + } + } + + for (auto &event : external_events) { + if (event) { + EXPECT_SUCCESS(urEventRelease(event)); + } + } + + if (second_cmd_buf_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseExp(second_cmd_buf_handle)); + } + + UUR_RETURN_ON_FATAL_FAILURE( + urCommandBufferExpExecutionTest::TearDown()); + } + + // First two shared points are inputs to be tested, last is an output + // generated by test command. + std::array device_ptrs = {nullptr, nullptr, nullptr}; + std::array external_events = {nullptr, nullptr}; + std::array sync_points = {0, 0}; + ur_exp_command_buffer_handle_t second_cmd_buf_handle = nullptr; + static constexpr size_t elements = 64; + static constexpr size_t global_offset = 0; + static constexpr size_t allocation_size = sizeof(uint32_t) * elements; + static constexpr size_t A = 2; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(KernelCommandEventSyncTest); + +// Tests a Saxpy kernel as follows: +// 1. Enqueue work initializing X data on a queue to get an external ur event +// to wait on. +// 2. Append a command-buffer command initializing Y data, to be a dependency +// of kernel command to test. +// 3. Append a kernel command with both sync point and external +// event dependencies on initialization of X and Y. Returning both a sync +// point and event to synchronize with completion of command. +// 4. Append a command-buffer command that depends on sync point from +// tested command. +// 5. Finalize and enqueue command-buffer. +// 6. Enqueue a queue command waiting on event returned from command. +// 7. Verify results. +TEST_P(KernelCommandEventSyncTest, Basic) { + // Initialize data X with queue submission + uint32_t patternX = 42; + ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptrs[0], sizeof(patternX), + &patternX, allocation_size, 0, nullptr, + &external_events[0])); + + // Initialize data Y with command-buffer command + uint32_t patternY = 0xA; + ASSERT_SUCCESS(urCommandBufferAppendUSMFillExp( + cmd_buf_handle, device_ptrs[1], &patternY, sizeof(patternY), + allocation_size, 0, nullptr, 0, nullptr, &sync_points[0], nullptr, + nullptr)); + + // Kernel command for SAXPY waiting on command and signal event + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + cmd_buf_handle, kernel, 1, &global_offset, &elements, nullptr, 1, + &sync_points[0], 1, &external_events[0], &sync_points[1], + &external_events[1], nullptr)); + + // command-buffer command that reads output to host + std::array host_command_ptr{}; + ASSERT_SUCCESS(urCommandBufferAppendUSMMemcpyExp( + cmd_buf_handle, host_command_ptr.data(), device_ptrs[2], + allocation_size, 1, &sync_points[1], 0, nullptr, nullptr, nullptr, + nullptr)); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(cmd_buf_handle)); + + ASSERT_SUCCESS( + urCommandBufferEnqueueExp(cmd_buf_handle, queue, 0, nullptr, nullptr)); + + // Queue command that reads output to host + std::array host_enqueue_ptr{}; + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, false, host_enqueue_ptr.data(), + device_ptrs[2], allocation_size, 1, + &external_events[1], nullptr)); + + ASSERT_SUCCESS(urQueueFinish(queue)); + + for (size_t i = 0; i < elements; i++) { + auto ref = (patternX * A) + patternY; + ASSERT_EQ(host_command_ptr[i], ref); + ASSERT_EQ(host_enqueue_ptr[i], ref); + } +} + +// Tests using events to synchronize between command-buffers: +TEST_P(KernelCommandEventSyncTest, InterCommandBuffer) { + // Initialize data X with command-buffer A command + uint32_t patternX = 42; + std::array dataX{}; + std::fill(dataX.begin(), dataX.end(), patternX); + ASSERT_SUCCESS(urCommandBufferAppendUSMMemcpyExp( + cmd_buf_handle, device_ptrs[0], dataX.data(), allocation_size, 0, + nullptr, 0, nullptr, &sync_points[0], nullptr, nullptr)); + + // Initialize data Y with command-buffer A command + uint32_t patternY = 0xA; + ASSERT_SUCCESS(urCommandBufferAppendUSMFillExp( + cmd_buf_handle, device_ptrs[1], &patternY, sizeof(patternY), + allocation_size, 1, &sync_points[0], 0, nullptr, &sync_points[1], + &external_events[0], nullptr)); + + // Run SAXPY kernel with command-buffer B command, waiting on an event. + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + second_cmd_buf_handle, kernel, 1, &global_offset, &elements, nullptr, 0, + nullptr, 1, &external_events[0], &sync_points[1], nullptr, nullptr)); + + // Command-buffer A command that reads output to host, waiting on an event + std::array host_command_ptr{}; + ASSERT_SUCCESS(urCommandBufferAppendUSMMemcpyExp( + second_cmd_buf_handle, host_command_ptr.data(), device_ptrs[2], + allocation_size, 1, &sync_points[1], 0, nullptr, nullptr, nullptr, + nullptr)); + + // Finalize command-buffers + ASSERT_SUCCESS(urCommandBufferFinalizeExp(cmd_buf_handle)); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(second_cmd_buf_handle)); + + // Submit command-buffers + ASSERT_SUCCESS( + urCommandBufferEnqueueExp(cmd_buf_handle, queue, 0, nullptr, nullptr)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(second_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + + // Verify execution + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + auto ref = (patternX * A) + patternY; + ASSERT_EQ(host_command_ptr[i], ref) << i; + } + + // Use new data for patternX + patternX = 666; + std::fill(dataX.begin(), dataX.end(), patternX); + + // Submit command-buffers again to check that dependencies still enforced. + ASSERT_SUCCESS( + urCommandBufferEnqueueExp(cmd_buf_handle, queue, 0, nullptr, nullptr)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(second_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + + // Verify second execution + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + auto ref = (patternX * A) + patternY; + ASSERT_EQ(host_command_ptr[i], ref) << i; + } +} + +// Tests behavior of waiting on signal event before command-buffer has executed +TEST_P(KernelCommandEventSyncTest, SignalWaitBeforeEnqueue) { + // Initialize data X with queue submission + uint32_t patternX = 42; + ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptrs[0], sizeof(patternX), + &patternX, allocation_size, 0, nullptr, + &external_events[0])); + + // Initialize data Y with command-buffer command + uint32_t patternY = 0xA; + ASSERT_SUCCESS(urCommandBufferAppendUSMFillExp( + cmd_buf_handle, device_ptrs[1], &patternY, sizeof(patternY), + allocation_size, 0, nullptr, 0, nullptr, &sync_points[0], nullptr, + nullptr)); + + // Kernel command for SAXPY waiting on command and signal event + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + cmd_buf_handle, kernel, 1, &global_offset, &elements, nullptr, 1, + &sync_points[0], 1, &external_events[0], &sync_points[1], + &external_events[1], nullptr)); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(cmd_buf_handle)); + + // Event will be considered complete before first execution + ASSERT_SUCCESS(urEventWait(1, &external_events[1])); +} + +struct KernelCommandEventSyncUpdateTest + : uur::command_buffer::urUpdatableCommandBufferExpExecutionTest { + void SetUp() override { + program_name = "saxpy_usm"; + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::SetUp()); + + ur_bool_t event_support = false; + ASSERT_SUCCESS(urDeviceGetInfo( + device, UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP, + sizeof(ur_bool_t), &event_support, nullptr)); + if (!event_support) { + GTEST_SKIP() << "External event sync is not supported by device."; + } + + for (auto &device_ptr : device_ptrs) { + ASSERT_SUCCESS(urUSMDeviceAlloc(context, device, nullptr, nullptr, + allocation_size, &device_ptr)); + ASSERT_NE(device_ptr, nullptr); + } + + // Index 0 is output + ASSERT_SUCCESS( + urKernelSetArgPointer(kernel, 0, nullptr, device_ptrs[2])); + // Index 1 is A + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(A), nullptr, &A)); + // Index 2 is X + ASSERT_SUCCESS( + urKernelSetArgPointer(kernel, 2, nullptr, device_ptrs[0])); + // Index 3 is Y + ASSERT_SUCCESS( + urKernelSetArgPointer(kernel, 3, nullptr, device_ptrs[1])); + } + + virtual void TearDown() override { + for (auto &device_ptr : device_ptrs) { + if (device_ptr) { + EXPECT_SUCCESS(urUSMFree(context, device_ptr)); + } + } + + for (auto &event : external_events) { + if (event) { + EXPECT_SUCCESS(urEventRelease(event)); + } + } + + if (command_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); + } + + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::TearDown()); + } + + // First two shared points are inputs to be tested, last is an output + // generated by test command. + std::array device_ptrs = {nullptr, nullptr, nullptr}; + std::array external_events = {nullptr, nullptr, + nullptr, nullptr}; + std::array sync_points = {0, 0}; + ur_exp_command_buffer_command_handle_t command_handle = nullptr; + static constexpr size_t elements = 64; + static constexpr size_t global_offset = 0; + static constexpr size_t allocation_size = sizeof(uint32_t) * elements; + static constexpr size_t A = 2; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(KernelCommandEventSyncUpdateTest); + +// Tests updating the signal and wait event dependencies of the saxpy +// command in a command-buffer. +TEST_P(KernelCommandEventSyncUpdateTest, Basic) { + // Initialize data X with queue submission + uint32_t patternX = 42; + ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptrs[0], sizeof(patternX), + &patternX, allocation_size, 0, nullptr, + &external_events[0])); + + // Initialize data Y with command-buffer command + uint32_t patternY = 0xA; + ASSERT_SUCCESS(urCommandBufferAppendUSMFillExp( + updatable_cmd_buf_handle, device_ptrs[1], &patternY, sizeof(patternY), + allocation_size, 0, nullptr, 0, nullptr, &sync_points[0], nullptr, + nullptr)); + + // Kernel command for SAXPY waiting on command and signal event + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, 1, &global_offset, &elements, nullptr, + 1, &sync_points[0], 1, &external_events[0], &sync_points[1], + &external_events[1], &command_handle)); + ASSERT_NE(command_handle, nullptr); + + // command-buffer command that reads output to host + std::array host_command_ptr{}; + ASSERT_SUCCESS(urCommandBufferAppendUSMMemcpyExp( + updatable_cmd_buf_handle, host_command_ptr.data(), device_ptrs[2], + allocation_size, 1, &sync_points[1], 0, nullptr, nullptr, nullptr, + nullptr)); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + + // Queue command that reads output to host + std::array host_enqueue_ptr{}; + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, false, host_enqueue_ptr.data(), + device_ptrs[2], allocation_size, 1, + &external_events[1], nullptr)); + + ASSERT_SUCCESS(urQueueFinish(queue)); + + for (size_t i = 0; i < elements; i++) { + auto ref = (patternX * A) + patternY; + ASSERT_EQ(host_command_ptr[i], ref); + ASSERT_EQ(host_enqueue_ptr[i], ref); + } + + // Reset output data + std::memset(host_command_ptr.data(), 0, allocation_size); + std::memset(host_enqueue_ptr.data(), 0, allocation_size); + + // Set data X to new value with queue submission + patternX = 0xBEEF; + ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptrs[0], sizeof(patternX), + &patternX, allocation_size, 0, nullptr, + &external_events[2])); + + // Update kernel command-wait event to wait on fill of new x value + ASSERT_SUCCESS(urCommandBufferUpdateWaitEventsExp(command_handle, 1, + &external_events[2])); + + // Get a new signal event for command-buffer + ASSERT_SUCCESS(urCommandBufferUpdateSignalEventExp(command_handle, + &external_events[3])); + + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + + // Read data back with a queue operation waiting on updated kernel command + // signal event. + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, false, host_enqueue_ptr.data(), + device_ptrs[2], allocation_size, 1, + &external_events[3], nullptr)); + + // Verify results + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + auto ref = (patternX * A) + patternY; + ASSERT_EQ(host_command_ptr[i], ref); + ASSERT_EQ(host_enqueue_ptr[i], ref); + } +} + +// Test updating wait events to a command with multiple wait events +TEST_P(KernelCommandEventSyncUpdateTest, TwoWaitEvents) { + // Initialize data X with queue submission + uint32_t patternX = 42; + ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptrs[0], sizeof(patternX), + &patternX, allocation_size, 0, nullptr, + &external_events[0])); + + // Initialize data Y with command-buffer command + uint32_t patternY = 0xA; + ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptrs[1], sizeof(patternY), + &patternY, allocation_size, 0, nullptr, + &external_events[1])); + + // Kernel command for SAXPY waiting on command and signal event + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, 1, &global_offset, &elements, nullptr, + 0, nullptr, 2, &external_events[0], &sync_points[0], + &external_events[2], &command_handle)); + ASSERT_NE(command_handle, nullptr); + + // command-buffer command that reads output to host + std::array host_command_ptr{}; + ASSERT_SUCCESS(urCommandBufferAppendUSMMemcpyExp( + updatable_cmd_buf_handle, host_command_ptr.data(), device_ptrs[2], + allocation_size, 1, &sync_points[0], 0, nullptr, nullptr, nullptr, + nullptr)); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + + // Queue command that reads output to host + std::array host_enqueue_ptr{}; + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, false, host_enqueue_ptr.data(), + device_ptrs[2], allocation_size, 1, + &external_events[2], nullptr)); + + ASSERT_SUCCESS(urQueueFinish(queue)); + + for (size_t i = 0; i < elements; i++) { + auto ref = (patternX * A) + patternY; + ASSERT_EQ(host_command_ptr[i], ref); + ASSERT_EQ(host_enqueue_ptr[i], ref); + } + + // Reset output data + std::memset(host_command_ptr.data(), 0, allocation_size); + std::memset(host_enqueue_ptr.data(), 0, allocation_size); + + // Set data X to new value with queue submission + patternX = 0xBEEF; + ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptrs[0], sizeof(patternX), + &patternX, allocation_size, 0, nullptr, + &external_events[3])); + + // Set data X to new value with queue submission + patternY = 0xBAD; + ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptrs[1], sizeof(patternY), + &patternY, allocation_size, 0, nullptr, + &external_events[4])); + + // Update kernel command-wait event to wait on fill of new x value + ASSERT_SUCCESS(urCommandBufferUpdateWaitEventsExp(command_handle, 2, + &external_events[3])); + + // Get a new signal event for command-buffer + ASSERT_SUCCESS(urCommandBufferUpdateSignalEventExp(command_handle, + &external_events[5])); + + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + + // Read data back with a queue operation waiting on updated kernel command + // signal event. + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, false, host_enqueue_ptr.data(), + device_ptrs[2], allocation_size, 1, + &external_events[5], nullptr)); + + // Verify results + ASSERT_SUCCESS(urQueueFinish(queue)); + for (size_t i = 0; i < elements; i++) { + auto ref = (patternX * A) + patternY; + ASSERT_EQ(host_command_ptr[i], ref); + ASSERT_EQ(host_enqueue_ptr[i], ref); + } +} + +// Tests the correct error is returned when a different number +// of wait events is passed during update. +TEST_P(KernelCommandEventSyncUpdateTest, InvalidWaitUpdate) { + // Initialize data X with queue submission + uint32_t patternX = 42; + ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptrs[0], sizeof(patternX), + &patternX, allocation_size, 0, nullptr, + &external_events[0])); + + // Initialize data Y with queue submission + uint32_t patternY = 0xA; + ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptrs[1], sizeof(patternY), + &patternY, allocation_size, 0, nullptr, + &external_events[1])); + + // Initialize data Z with queue submission + int32_t zero_pattern = 0; + ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptrs[2], sizeof(zero_pattern), + &zero_pattern, allocation_size, 0, nullptr, + &external_events[2])); + + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, 1, &global_offset, &elements, nullptr, + 0, nullptr, 1, &external_events[0], nullptr, nullptr, &command_handle)); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + + // Increase number of events should be an error + ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST, + urCommandBufferUpdateWaitEventsExp(command_handle, 2, + &external_events[1])); + + // decrease number of events should be an error + ASSERT_EQ_RESULT( + UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST, + urCommandBufferUpdateWaitEventsExp(command_handle, 0, nullptr)); +} + +// Tests the correct error is returned when trying to update the +// signal event from a command that was not created with one. +TEST_P(KernelCommandEventSyncUpdateTest, InvalidSignalUpdate) { + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, 1, &global_offset, &elements, nullptr, + 0, nullptr, 0, nullptr, nullptr, nullptr, &command_handle)); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + + uint32_t patternX = 42; + ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptrs[0], sizeof(patternX), + &patternX, allocation_size, 0, nullptr, + &external_events[0])); + + // Increase number of events should be an error + ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_OPERATION, + urCommandBufferUpdateSignalEventExp(command_handle, + &external_events[0])); +} diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero_v2.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero_v2.match index 7c222d70a6..6f5366b092 100644 --- a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero_v2.match +++ b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero_v2.match @@ -1,57 +1,89 @@ -BufferFillCommandTest.UpdateParameters/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -BufferFillCommandTest.UpdateGlobalSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -BufferFillCommandTest.SeparateUpdateCalls/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -BufferFillCommandTest.OverrideUpdate/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -BufferFillCommandTest.OverrideArgList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -USMFillCommandTest.UpdateParameters/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -USMFillCommandTest.UpdateBeforeEnqueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -USMMultipleFillCommandTest.UpdateAllKernels/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -BufferSaxpyKernelTest.UpdateParameters/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -USMSaxpyKernelTest.UpdateParameters/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -USMMultiSaxpyKernelTest.UpdateParameters/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -USMMultiSaxpyKernelTest.UpdateWithoutBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -NDRangeUpdateTest.Update3D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -NDRangeUpdateTest.Update2D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -NDRangeUpdateTest.Update1D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -NDRangeUpdateTest.Invalid/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferReleaseExpTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferReleaseExpTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferReleaseCommandExpTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferReleaseCommandExpTest.ReleaseCmdBufBeforeHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferReleaseCommandExpTest.ReleaseCmdBufMultipleHandles/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferReleaseCommandExpTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferRetainExpTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferRetainExpTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferRetainCommandExpTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferRetainCommandExpTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -InvalidUpdateTest.NotFinalizedCommandBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -InvalidUpdateTest.NotUpdatableCommandBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -InvalidUpdateTest.GlobalLocalSizeMistach/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -InvalidUpdateTest.ImplToUserDefinedLocalSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -InvalidUpdateTest.UserToImplDefinedLocalSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferCommandsTest.urCommandBufferAppendUSMMemcpyExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferCommandsTest.urCommandBufferAppendUSMFillExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferCommandsTest.urCommandBufferAppendMemBufferCopyExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferCommandsTest.urCommandBufferAppendMemBufferCopyRectExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferCommandsTest.urCommandBufferAppendMemBufferReadExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferCommandsTest.urCommandBufferAppendMemBufferReadRectExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferCommandsTest.urCommandBufferAppendMemBufferWriteExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferCommandsTest.urCommandBufferAppendMemBufferWriteRectExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferCommandsTest.urCommandBufferAppendMemBufferFillExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferCommandsTest.urCommandBufferAppendUSMPrefetchExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferCommandsTest.urCommandBufferAppendUSMAdviseExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferAppendKernelLaunchExpTest.Basic/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferFillCommandsTest.Buffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__1__patternSize__1 -urCommandBufferFillCommandsTest.Buffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__256 -urCommandBufferFillCommandsTest.Buffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__1024__patternSize__256 -urCommandBufferFillCommandsTest.Buffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__4 -urCommandBufferFillCommandsTest.Buffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__8 -urCommandBufferFillCommandsTest.Buffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__16 -urCommandBufferFillCommandsTest.Buffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__32 -urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__1__patternSize__1 -urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__256 -urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__1024__patternSize__256 -urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__4 -urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__8 -urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__16 -urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__32 +{{OPT}}BufferFillCommandTest.UpdateParameters/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}BufferFillCommandTest.UpdateGlobalSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}BufferFillCommandTest.SeparateUpdateCalls/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}BufferFillCommandTest.OverrideUpdate/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}BufferFillCommandTest.OverrideArgList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}USMFillCommandTest.UpdateParameters/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}USMFillCommandTest.UpdateBeforeEnqueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}USMMultipleFillCommandTest.UpdateAllKernels/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}BufferSaxpyKernelTest.UpdateParameters/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}USMSaxpyKernelTest.UpdateParameters/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}USMMultiSaxpyKernelTest.UpdateParameters/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}USMMultiSaxpyKernelTest.UpdateWithoutBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}NDRangeUpdateTest.Update3D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}NDRangeUpdateTest.Update2D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}NDRangeUpdateTest.Update1D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}NDRangeUpdateTest.Invalid/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urCommandBufferReleaseExpTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urCommandBufferReleaseExpTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urCommandBufferReleaseCommandExpTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urCommandBufferReleaseCommandExpTest.ReleaseCmdBufBeforeHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urCommandBufferReleaseCommandExpTest.ReleaseCmdBufMultipleHandles/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urCommandBufferReleaseCommandExpTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urCommandBufferRetainExpTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urCommandBufferRetainExpTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urCommandBufferRetainCommandExpTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urCommandBufferRetainCommandExpTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}InvalidUpdateTest.NotFinalizedCommandBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}InvalidUpdateTest.NotUpdatableCommandBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}InvalidUpdateTest.GlobalLocalSizeMistach/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}InvalidUpdateTest.ImplToUserDefinedLocalSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}InvalidUpdateTest.UserToImplDefinedLocalSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urCommandBufferCommandsTest.urCommandBufferAppendUSMMemcpyExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urCommandBufferCommandsTest.urCommandBufferAppendUSMFillExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urCommandBufferCommandsTest.urCommandBufferAppendMemBufferCopyExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urCommandBufferCommandsTest.urCommandBufferAppendMemBufferCopyRectExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urCommandBufferCommandsTest.urCommandBufferAppendMemBufferReadExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urCommandBufferCommandsTest.urCommandBufferAppendMemBufferReadRectExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urCommandBufferCommandsTest.urCommandBufferAppendMemBufferWriteExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urCommandBufferCommandsTest.urCommandBufferAppendMemBufferWriteRectExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urCommandBufferCommandsTest.urCommandBufferAppendMemBufferFillExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urCommandBufferCommandsTest.urCommandBufferAppendUSMPrefetchExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urCommandBufferCommandsTest.urCommandBufferAppendUSMAdviseExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urCommandBufferAppendKernelLaunchExpTest.Basic/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urCommandBufferFillCommandsTest.Buffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__1__patternSize__1 +{{OPT}}urCommandBufferFillCommandsTest.Buffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__256 +{{OPT}}urCommandBufferFillCommandsTest.Buffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__1024__patternSize__256 +{{OPT}}urCommandBufferFillCommandsTest.Buffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__4 +{{OPT}}urCommandBufferFillCommandsTest.Buffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__8 +{{OPT}}urCommandBufferFillCommandsTest.Buffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__16 +{{OPT}}urCommandBufferFillCommandsTest.Buffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__32 +{{OPT}}urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__1__patternSize__1 +{{OPT}}urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__256 +{{OPT}}urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__1024__patternSize__256 +{{OPT}}urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__4 +{{OPT}}urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__8 +{{OPT}}urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__16 +{{OPT}}urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__32 +{{OPT}}KernelCommandEventSyncTest.Basic/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}KernelCommandEventSyncTest.InterCommandBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}KernelCommandEventSyncTest.SignalWaitBeforeEnqueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}KernelCommandEventSyncUpdateTest.Basic/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}KernelCommandEventSyncUpdateTest.TwoWaitEvents/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}KernelCommandEventSyncUpdateTest.InvalidWaitUpdate/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}KernelCommandEventSyncUpdateTest.InvalidSignalUpdate/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}CommandEventSyncTest.USMMemcpyExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}CommandEventSyncTest.USMFillExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}CommandEventSyncTest.MemBufferCopyExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}CommandEventSyncTest.MemBufferCopyRectExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}CommandEventSyncTest.MemBufferReadExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}CommandEventSyncTest.MemBufferReadRectExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}CommandEventSyncTest.MemBufferWriteExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}CommandEventSyncTest.MemBufferWriteRectExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}CommandEventSyncTest.MemBufferFillExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}CommandEventSyncTest.USMPrefetchExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}CommandEventSyncTest.USMAdviseExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}CommandEventSyncTest.MultipleEventCommands/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}CommandEventSyncTest.MultipleEventCommandsBetweenCommandBuffers/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}CommandEventSyncUpdateTest.USMMemcpyExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}CommandEventSyncUpdateTest.USMFillExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}CommandEventSyncUpdateTest.MemBufferCopyExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}CommandEventSyncUpdateTest.MemBufferCopyRectExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}CommandEventSyncUpdateTest.MemBufferReadExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}CommandEventSyncUpdateTest.MemBufferReadRectExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}CommandEventSyncUpdateTest.MemBufferWriteExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}CommandEventSyncUpdateTest.MemBufferWriteRectExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}CommandEventSyncUpdateTest.MemBufferFillExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}CommandEventSyncUpdateTest.USMPrefetchExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}CommandEventSyncUpdateTest.USMAdviseExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}CommandEventSyncUpdateTest.MultipleEventCommands/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match index 2508f92fed..f0f1cf262c 100644 --- a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match +++ b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match @@ -26,3 +26,10 @@ {{OPT}}InvalidUpdateTest.ImplToUserDefinedLocalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}InvalidUpdateTest.UserToImplDefinedLocalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}urCommandBufferAppendKernelLaunchExpTest.Basic/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}KernelCommandEventSyncTest.Basic/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}KernelCommandEventSyncTest.InterCommandBuffer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}KernelCommandEventSyncTest.SignalWaitBeforeEnqueue/SYCL_NATIVE_CPU__{{.*}} +{{OPT}}KernelCommandEventSyncUpdateTest.Basic/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}KernelCommandEventSyncUpdateTest.TwoWaitEvents/SYCL_NATIVE_CPU___SYCL_Native_CPU__X_{{.*}} +{{OPT}}KernelCommandEventSyncUpdateTest.InvalidWaitUpdate/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}KernelCommandEventSyncUpdateTest.InvalidSignalUpdate/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} diff --git a/test/conformance/exp_command_buffer/fill.cpp b/test/conformance/exp_command_buffer/fill.cpp index 2b9a27cf2a..278cc4578e 100644 --- a/test/conformance/exp_command_buffer/fill.cpp +++ b/test/conformance/exp_command_buffer/fill.cpp @@ -104,12 +104,12 @@ UUR_TEST_SUITE_P(urCommandBufferFillCommandsTest, testing::ValuesIn(test_cases), TEST_P(urCommandBufferFillCommandsTest, Buffer) { ASSERT_SUCCESS(urCommandBufferAppendMemBufferFillExp( cmd_buf_handle, buffer, pattern.data(), pattern_size, 0, size, 0, - nullptr, &sync_point)); + nullptr, 0, nullptr, &sync_point, nullptr, nullptr)); std::vector output(size, 1); ASSERT_SUCCESS(urCommandBufferAppendMemBufferReadExp( - cmd_buf_handle, buffer, 0, size, output.data(), 1, &sync_point, - nullptr)); + cmd_buf_handle, buffer, 0, size, output.data(), 1, &sync_point, 0, + nullptr, nullptr, nullptr, nullptr)); ASSERT_SUCCESS(urCommandBufferFinalizeExp(cmd_buf_handle)); @@ -123,12 +123,12 @@ TEST_P(urCommandBufferFillCommandsTest, Buffer) { TEST_P(urCommandBufferFillCommandsTest, USM) { ASSERT_SUCCESS(urCommandBufferAppendUSMFillExp( cmd_buf_handle, device_ptr, pattern.data(), pattern_size, size, 0, - nullptr, &sync_point)); + nullptr, 0, nullptr, &sync_point, nullptr, nullptr)); std::vector output(size, 1); ASSERT_SUCCESS(urCommandBufferAppendUSMMemcpyExp( - cmd_buf_handle, output.data(), device_ptr, size, 1, &sync_point, - nullptr)); + cmd_buf_handle, output.data(), device_ptr, size, 1, &sync_point, 0, + nullptr, nullptr, nullptr, nullptr)); ASSERT_SUCCESS(urCommandBufferFinalizeExp(cmd_buf_handle)); diff --git a/test/conformance/exp_command_buffer/fixtures.h b/test/conformance/exp_command_buffer/fixtures.h index 85457bea97..a24b0876b3 100644 --- a/test/conformance/exp_command_buffer/fixtures.h +++ b/test/conformance/exp_command_buffer/fixtures.h @@ -201,12 +201,14 @@ struct urCommandBufferCommandExpTest // Append 2 kernel commands to command-buffer and close command-buffer ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &global_size, &local_size, 0, nullptr, nullptr, &command_handle)); + &global_size, &local_size, 0, nullptr, 0, nullptr, nullptr, nullptr, + &command_handle)); ASSERT_NE(command_handle, nullptr); ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &global_size, &local_size, 0, nullptr, nullptr, &command_handle_2)); + &global_size, &local_size, 0, nullptr, 0, nullptr, nullptr, nullptr, + &command_handle_2)); ASSERT_NE(command_handle_2, nullptr); ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); diff --git a/test/conformance/exp_command_buffer/invalid_update.cpp b/test/conformance/exp_command_buffer/invalid_update.cpp index afcb279fa9..cfeea04036 100644 --- a/test/conformance/exp_command_buffer/invalid_update.cpp +++ b/test/conformance/exp_command_buffer/invalid_update.cpp @@ -36,7 +36,8 @@ struct InvalidUpdateTest // Append kernel command to command-buffer ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &global_size, &local_size, 0, nullptr, nullptr, &command_handle)); + &global_size, &local_size, 0, nullptr, 0, nullptr, nullptr, nullptr, + &command_handle)); ASSERT_NE(command_handle, nullptr); } @@ -119,7 +120,8 @@ TEST_P(InvalidUpdateTest, NotUpdatableCommandBuffer) { ur_exp_command_buffer_command_handle_t test_command_handle = nullptr; EXPECT_SUCCESS(urCommandBufferAppendKernelLaunchExp( test_cmd_buf_handle, kernel, n_dimensions, &global_offset, &global_size, - &local_size, 0, nullptr, nullptr, &test_command_handle)); + &local_size, 0, nullptr, 0, nullptr, nullptr, nullptr, + &test_command_handle)); EXPECT_NE(test_command_handle, nullptr); EXPECT_SUCCESS(urCommandBufferFinalizeExp(test_cmd_buf_handle)); @@ -200,7 +202,8 @@ TEST_P(InvalidUpdateTest, ImplToUserDefinedLocalSize) { ur_exp_command_buffer_command_handle_t second_command_handle = nullptr; ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &global_size, nullptr, 0, nullptr, nullptr, &second_command_handle)); + &global_size, nullptr, 0, nullptr, 0, nullptr, nullptr, nullptr, + &second_command_handle)); ASSERT_NE(second_command_handle, nullptr); EXPECT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); diff --git a/test/conformance/exp_command_buffer/ndrange_update.cpp b/test/conformance/exp_command_buffer/ndrange_update.cpp index 3c053fe4b9..8373b31ca5 100644 --- a/test/conformance/exp_command_buffer/ndrange_update.cpp +++ b/test/conformance/exp_command_buffer/ndrange_update.cpp @@ -35,7 +35,7 @@ struct NDRangeUpdateTest ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( updatable_cmd_buf_handle, kernel, n_dimensions, global_offset.data(), global_size.data(), local_size.data(), 0, - nullptr, nullptr, &command_handle)); + nullptr, 0, nullptr, nullptr, nullptr, &command_handle)); ASSERT_NE(command_handle, nullptr); ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); diff --git a/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp index 606744cd86..0d078e05c2 100644 --- a/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp @@ -37,7 +37,8 @@ struct USMFillCommandTest // Append kernel command to command-buffer and close command-buffer ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &global_size, &local_size, 0, nullptr, nullptr, &command_handle)); + &global_size, &local_size, 0, nullptr, 0, nullptr, nullptr, nullptr, + &command_handle)); ASSERT_NE(command_handle, nullptr); ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); @@ -233,8 +234,8 @@ struct USMMultipleFillCommandTest // Append kernel and store returned handle ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &elements, &local_size, 0, nullptr, nullptr, - &command_handles[k])); + &elements, &local_size, 0, nullptr, 0, nullptr, nullptr, + nullptr, &command_handles[k])); ASSERT_NE(command_handles[k], nullptr); } diff --git a/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp index 0cb50cb3f1..d79104d03b 100644 --- a/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp @@ -81,7 +81,8 @@ struct USMSaxpyKernelTest : USMSaxpyKernelTestBase { // Append kernel command to command-buffer and close command-buffer ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &global_size, &local_size, 0, nullptr, nullptr, &command_handle)); + &global_size, &local_size, 0, nullptr, 0, nullptr, nullptr, nullptr, + &command_handle)); ASSERT_NE(command_handle, nullptr); ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); @@ -181,8 +182,8 @@ struct USMMultiSaxpyKernelTest : USMSaxpyKernelTestBase { for (unsigned node = 0; node < nodes; node++) { ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &global_size, &local_size, 0, nullptr, nullptr, - &command_handles[node])); + &global_size, &local_size, 0, nullptr, 0, nullptr, nullptr, + nullptr, &command_handles[node])); ASSERT_NE(command_handles[node], nullptr); } diff --git a/tools/urinfo/urinfo.hpp b/tools/urinfo/urinfo.hpp index 22f4ec6413..521bac0aa0 100644 --- a/tools/urinfo/urinfo.hpp +++ b/tools/urinfo/urinfo.hpp @@ -337,6 +337,9 @@ inline void printDeviceInfos(ur_device_handle_t hDevice, printDeviceInfo( hDevice, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP); std::cout << prefix; + printDeviceInfo(hDevice, + UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP); + std::cout << prefix; printDeviceInfo(hDevice, UR_DEVICE_INFO_CLUSTER_LAUNCH_EXP); std::cout << prefix; printDeviceInfo(hDevice,