diff --git a/include/ur_api.h b/include/ur_api.h index 42012ce3b5..8579ff0326 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -196,6 +196,7 @@ typedef enum ur_function_t { UR_FUNCTION_ADAPTER_RETAIN = 179, ///< Enumerator for ::urAdapterRetain UR_FUNCTION_ADAPTER_GET_LAST_ERROR = 180, ///< Enumerator for ::urAdapterGetLastError UR_FUNCTION_ADAPTER_GET_INFO = 181, ///< Enumerator for ::urAdapterGetInfo + UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP = 182, ///< Enumerator for ::urCommandBufferUpdateKernelLaunchExp UR_FUNCTION_PROGRAM_BUILD_EXP = 197, ///< Enumerator for ::urProgramBuildExp UR_FUNCTION_PROGRAM_COMPILE_EXP = 198, ///< Enumerator for ::urProgramCompileExp UR_FUNCTION_PROGRAM_LINK_EXP = 199, ///< Enumerator for ::urProgramLinkExp @@ -215,6 +216,10 @@ typedef enum ur_function_t { UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_ADVISE_EXP = 213, ///< Enumerator for ::urCommandBufferAppendUSMAdviseExp UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP = 214, ///< Enumerator for ::urEnqueueCooperativeKernelLaunchExp UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP = 215, ///< Enumerator for ::urKernelSuggestMaxCooperativeGroupCountExp + UR_FUNCTION_COMMAND_BUFFER_RETAIN_COMMAND_EXP = 216, ///< Enumerator for ::urCommandBufferRetainCommandExp + UR_FUNCTION_COMMAND_BUFFER_RELEASE_COMMAND_EXP = 217, ///< Enumerator for ::urCommandBufferReleaseCommandExp + UR_FUNCTION_COMMAND_BUFFER_GET_INFO_EXP = 218, ///< Enumerator for ::urCommandBufferGetInfoExp + UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP = 219, ///< Enumerator for ::urCommandBufferCommandGetInfoExp /// @cond UR_FUNCTION_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -224,48 +229,53 @@ typedef enum ur_function_t { /////////////////////////////////////////////////////////////////////////////// /// @brief Defines structure types typedef enum ur_structure_type_t { - UR_STRUCTURE_TYPE_CONTEXT_PROPERTIES = 0, ///< ::ur_context_properties_t - UR_STRUCTURE_TYPE_IMAGE_DESC = 1, ///< ::ur_image_desc_t - UR_STRUCTURE_TYPE_BUFFER_PROPERTIES = 2, ///< ::ur_buffer_properties_t - UR_STRUCTURE_TYPE_BUFFER_REGION = 3, ///< ::ur_buffer_region_t - UR_STRUCTURE_TYPE_BUFFER_CHANNEL_PROPERTIES = 4, ///< ::ur_buffer_channel_properties_t - UR_STRUCTURE_TYPE_BUFFER_ALLOC_LOCATION_PROPERTIES = 5, ///< ::ur_buffer_alloc_location_properties_t - UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES = 6, ///< ::ur_program_properties_t - UR_STRUCTURE_TYPE_USM_DESC = 7, ///< ::ur_usm_desc_t - UR_STRUCTURE_TYPE_USM_HOST_DESC = 8, ///< ::ur_usm_host_desc_t - UR_STRUCTURE_TYPE_USM_DEVICE_DESC = 9, ///< ::ur_usm_device_desc_t - UR_STRUCTURE_TYPE_USM_POOL_DESC = 10, ///< ::ur_usm_pool_desc_t - UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC = 11, ///< ::ur_usm_pool_limits_desc_t - UR_STRUCTURE_TYPE_DEVICE_BINARY = 12, ///< ::ur_device_binary_t - UR_STRUCTURE_TYPE_SAMPLER_DESC = 13, ///< ::ur_sampler_desc_t - UR_STRUCTURE_TYPE_QUEUE_PROPERTIES = 14, ///< ::ur_queue_properties_t - UR_STRUCTURE_TYPE_QUEUE_INDEX_PROPERTIES = 15, ///< ::ur_queue_index_properties_t - UR_STRUCTURE_TYPE_CONTEXT_NATIVE_PROPERTIES = 16, ///< ::ur_context_native_properties_t - UR_STRUCTURE_TYPE_KERNEL_NATIVE_PROPERTIES = 17, ///< ::ur_kernel_native_properties_t - UR_STRUCTURE_TYPE_QUEUE_NATIVE_PROPERTIES = 18, ///< ::ur_queue_native_properties_t - UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES = 19, ///< ::ur_mem_native_properties_t - UR_STRUCTURE_TYPE_EVENT_NATIVE_PROPERTIES = 20, ///< ::ur_event_native_properties_t - UR_STRUCTURE_TYPE_PLATFORM_NATIVE_PROPERTIES = 21, ///< ::ur_platform_native_properties_t - UR_STRUCTURE_TYPE_DEVICE_NATIVE_PROPERTIES = 22, ///< ::ur_device_native_properties_t - UR_STRUCTURE_TYPE_PROGRAM_NATIVE_PROPERTIES = 23, ///< ::ur_program_native_properties_t - UR_STRUCTURE_TYPE_SAMPLER_NATIVE_PROPERTIES = 24, ///< ::ur_sampler_native_properties_t - UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC = 25, ///< ::ur_queue_native_desc_t - UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES = 26, ///< ::ur_device_partition_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES = 27, ///< ::ur_kernel_arg_mem_obj_properties_t - UR_STRUCTURE_TYPE_PHYSICAL_MEM_PROPERTIES = 28, ///< ::ur_physical_mem_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_POINTER_PROPERTIES = 29, ///< ::ur_kernel_arg_pointer_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_SAMPLER_PROPERTIES = 30, ///< ::ur_kernel_arg_sampler_properties_t - UR_STRUCTURE_TYPE_KERNEL_EXEC_INFO_PROPERTIES = 31, ///< ::ur_kernel_exec_info_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_VALUE_PROPERTIES = 32, ///< ::ur_kernel_arg_value_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_LOCAL_PROPERTIES = 33, ///< ::ur_kernel_arg_local_properties_t - UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC = 35, ///< ::ur_usm_alloc_location_desc_t - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC = 0x1000, ///< ::ur_exp_command_buffer_desc_t - UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES = 0x2000, ///< ::ur_exp_sampler_mip_properties_t - UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC = 0x2001, ///< ::ur_exp_interop_mem_desc_t - UR_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC = 0x2002, ///< ::ur_exp_interop_semaphore_desc_t - UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR = 0x2003, ///< ::ur_exp_file_descriptor_t - UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE = 0x2004, ///< ::ur_exp_win32_handle_t - UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES = 0x2005, ///< ::ur_exp_sampler_addr_modes_t + UR_STRUCTURE_TYPE_CONTEXT_PROPERTIES = 0, ///< ::ur_context_properties_t + UR_STRUCTURE_TYPE_IMAGE_DESC = 1, ///< ::ur_image_desc_t + UR_STRUCTURE_TYPE_BUFFER_PROPERTIES = 2, ///< ::ur_buffer_properties_t + UR_STRUCTURE_TYPE_BUFFER_REGION = 3, ///< ::ur_buffer_region_t + UR_STRUCTURE_TYPE_BUFFER_CHANNEL_PROPERTIES = 4, ///< ::ur_buffer_channel_properties_t + UR_STRUCTURE_TYPE_BUFFER_ALLOC_LOCATION_PROPERTIES = 5, ///< ::ur_buffer_alloc_location_properties_t + UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES = 6, ///< ::ur_program_properties_t + UR_STRUCTURE_TYPE_USM_DESC = 7, ///< ::ur_usm_desc_t + UR_STRUCTURE_TYPE_USM_HOST_DESC = 8, ///< ::ur_usm_host_desc_t + UR_STRUCTURE_TYPE_USM_DEVICE_DESC = 9, ///< ::ur_usm_device_desc_t + UR_STRUCTURE_TYPE_USM_POOL_DESC = 10, ///< ::ur_usm_pool_desc_t + UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC = 11, ///< ::ur_usm_pool_limits_desc_t + UR_STRUCTURE_TYPE_DEVICE_BINARY = 12, ///< ::ur_device_binary_t + UR_STRUCTURE_TYPE_SAMPLER_DESC = 13, ///< ::ur_sampler_desc_t + UR_STRUCTURE_TYPE_QUEUE_PROPERTIES = 14, ///< ::ur_queue_properties_t + UR_STRUCTURE_TYPE_QUEUE_INDEX_PROPERTIES = 15, ///< ::ur_queue_index_properties_t + UR_STRUCTURE_TYPE_CONTEXT_NATIVE_PROPERTIES = 16, ///< ::ur_context_native_properties_t + UR_STRUCTURE_TYPE_KERNEL_NATIVE_PROPERTIES = 17, ///< ::ur_kernel_native_properties_t + UR_STRUCTURE_TYPE_QUEUE_NATIVE_PROPERTIES = 18, ///< ::ur_queue_native_properties_t + UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES = 19, ///< ::ur_mem_native_properties_t + UR_STRUCTURE_TYPE_EVENT_NATIVE_PROPERTIES = 20, ///< ::ur_event_native_properties_t + UR_STRUCTURE_TYPE_PLATFORM_NATIVE_PROPERTIES = 21, ///< ::ur_platform_native_properties_t + UR_STRUCTURE_TYPE_DEVICE_NATIVE_PROPERTIES = 22, ///< ::ur_device_native_properties_t + UR_STRUCTURE_TYPE_PROGRAM_NATIVE_PROPERTIES = 23, ///< ::ur_program_native_properties_t + UR_STRUCTURE_TYPE_SAMPLER_NATIVE_PROPERTIES = 24, ///< ::ur_sampler_native_properties_t + UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC = 25, ///< ::ur_queue_native_desc_t + UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES = 26, ///< ::ur_device_partition_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES = 27, ///< ::ur_kernel_arg_mem_obj_properties_t + UR_STRUCTURE_TYPE_PHYSICAL_MEM_PROPERTIES = 28, ///< ::ur_physical_mem_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_POINTER_PROPERTIES = 29, ///< ::ur_kernel_arg_pointer_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_SAMPLER_PROPERTIES = 30, ///< ::ur_kernel_arg_sampler_properties_t + UR_STRUCTURE_TYPE_KERNEL_EXEC_INFO_PROPERTIES = 31, ///< ::ur_kernel_exec_info_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_VALUE_PROPERTIES = 32, ///< ::ur_kernel_arg_value_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_LOCAL_PROPERTIES = 33, ///< ::ur_kernel_arg_local_properties_t + UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC = 35, ///< ::ur_usm_alloc_location_desc_t + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC = 0x1000, ///< ::ur_exp_command_buffer_desc_t + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC = 0x1001, ///< ::ur_exp_command_buffer_update_kernel_launch_desc_t + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC = 0x1002, ///< ::ur_exp_command_buffer_update_memobj_arg_desc_t + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC = 0x1003, ///< ::ur_exp_command_buffer_update_pointer_arg_desc_t + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC = 0x1004, ///< ::ur_exp_command_buffer_update_value_arg_desc_t + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC = 0x1005, ///< ::ur_exp_command_buffer_update_exec_info_desc_t + UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES = 0x2000, ///< ::ur_exp_sampler_mip_properties_t + UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC = 0x2001, ///< ::ur_exp_interop_mem_desc_t + UR_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC = 0x2002, ///< ::ur_exp_interop_semaphore_desc_t + UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR = 0x2003, ///< ::ur_exp_file_descriptor_t + UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE = 0x2004, ///< ::ur_exp_win32_handle_t + UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES = 0x2005, ///< ::ur_exp_sampler_addr_modes_t /// @cond UR_STRUCTURE_TYPE_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -483,6 +493,7 @@ typedef enum ur_result_t { UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP = 0x1000, ///< Invalid Command-Buffer UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP = 0x1001, ///< Sync point is not valid for the command-buffer UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP = 0x1002, ///< Sync point wait list is invalid + UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP = 0x1003, ///< Handle to command-buffer command is invalid UR_RESULT_ERROR_UNKNOWN = 0x7ffffffe, ///< Unknown or internal error /// @cond UR_RESULT_FORCE_UINT32 = 0x7fffffff @@ -1534,6 +1545,10 @@ typedef enum ur_device_info_t { ///< this composite device. UR_DEVICE_INFO_COMPOSITE_DEVICE = 117, ///< [::ur_device_handle_t] The composite device containing this component ///< device. + UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP = 0x1000, ///< [::ur_bool_t] Returns true if the device supports the use of + ///< command-buffers. + UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP = 0x1001, ///< [::ur_bool_t] Returns true if the device supports updating the kernel + ///< commands in a command-buffer. UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP = 0x2000, ///< [::ur_bool_t] returns true if the device supports the creation of ///< bindless images UR_DEVICE_INFO_BINDLESS_IMAGES_SHARED_USM_SUPPORT_EXP = 0x2001, ///< [::ur_bool_t] returns true if the device supports the creation of @@ -7758,6 +7773,32 @@ urBindlessImagesSignalExternalSemaphoreExp( #if !defined(__GNUC__) #pragma region command buffer(experimental) #endif +/////////////////////////////////////////////////////////////////////////////// +/// @brief Command-buffer query information type +typedef enum ur_exp_command_buffer_info_t { + UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT = 0, ///< [uint32_t] Reference count of the command-buffer object. + ///< The reference count returned should be considered immediately stale. + ///< It is unsuitable for general use in applications. This feature is + ///< provided for identifying memory leaks. + /// @cond + UR_EXP_COMMAND_BUFFER_INFO_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ur_exp_command_buffer_info_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Command-buffer command query information type +typedef enum ur_exp_command_buffer_command_info_t { + UR_EXP_COMMAND_BUFFER_COMMAND_INFO_REFERENCE_COUNT = 0, ///< [uint32_t] Reference count of the command-buffer object. + ///< The reference count returned should be considered immediately stale. + ///< It is unsuitable for general use in applications. This feature is + ///< provided for identifying memory leaks. + /// @cond + UR_EXP_COMMAND_BUFFER_COMMAND_INFO_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ur_exp_command_buffer_command_info_t; + /////////////////////////////////////////////////////////////////////////////// #ifndef UR_COMMAND_BUFFER_EXTENSION_STRING_EXP /// @brief The extension string which defines support for command-buffers which @@ -7771,9 +7812,92 @@ typedef struct ur_exp_command_buffer_desc_t { ur_structure_type_t stype; ///< [in] type of this structure, must be ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC const void *pNext; ///< [in][optional] pointer to extension-specific structure + ur_bool_t isUpdatable; ///< [in] Commands in a finalized command-buffer can be updated. } ur_exp_command_buffer_desc_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Descriptor type for updating a kernel command memobj argument. +typedef struct ur_exp_command_buffer_update_memobj_arg_desc_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC + const void *pNext; ///< [in][optional] pointer to extension-specific structure + uint32_t argIndex; ///< [in] Argument index. + const ur_kernel_arg_mem_obj_properties_t *pProperties; ///< [in][optinal] Pointer to memory object properties. + ur_mem_handle_t hNewMemObjArg; ///< [in][optional] Handle of memory object to set at argument index. + +} ur_exp_command_buffer_update_memobj_arg_desc_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Descriptor type for updating a kernel command pointer argument. +typedef struct ur_exp_command_buffer_update_pointer_arg_desc_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC + const void *pNext; ///< [in][optional] pointer to extension-specific structure + uint32_t argIndex; ///< [in] Argument index. + const ur_kernel_arg_pointer_properties_t *pProperties; ///< [in][optinal] Pointer to USM pointer properties. + const void *pNewPointerArg; ///< [in][optional] USM pointer to memory location holding the argument + ///< value to set at argument index. + +} ur_exp_command_buffer_update_pointer_arg_desc_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Descriptor type for updating a kernel command value argument. +typedef struct ur_exp_command_buffer_update_value_arg_desc_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC + const void *pNext; ///< [in][optional] pointer to extension-specific structure + uint32_t argIndex; ///< [in] Argument index. + uint32_t argSize; ///< [in] Argument size. + const ur_kernel_arg_value_properties_t *pProperties; ///< [in][optinal] Pointer to value properties. + const void *pNewValueArg; ///< [in][optional] Argument value representing matching kernel arg type to + ///< set at argument index. + +} ur_exp_command_buffer_update_value_arg_desc_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Descriptor type for updating kernel command execution info. +typedef struct ur_exp_command_buffer_update_exec_info_desc_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC + const void *pNext; ///< [in][optional] pointer to extension-specific structure + ur_kernel_exec_info_t propName; ///< [in] Name of execution attribute. + size_t propSize; ///< [in] Size of execution attribute. + const ur_kernel_exec_info_properties_t *pProperties; ///< [in][optional] Pointer to execution info properties. + const void *pNewExecInfo; ///< [in] Pointer to memory location holding the execution info value. + +} ur_exp_command_buffer_update_exec_info_desc_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Descriptor type for updating a kernel launch command. +typedef struct ur_exp_command_buffer_update_kernel_launch_desc_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC + const void *pNext; ///< [in][optional] pointer to extension-specific structure + uint32_t numNewMemObjArgs; ///< [in] Length of pNewMemObjArgList. + uint32_t numNewPointerArgs; ///< [in] Length of pNewPointerArgList. + uint32_t numNewValueArgs; ///< [in] Length of pNewValueArgList. + uint32_t numNewExecInfos; ///< [in] Length of pNewExecInfoList. + uint32_t newWorkDim; ///< [in] Number of work dimensions in the kernel ND-range, from 1-3. + const ur_exp_command_buffer_update_memobj_arg_desc_t *pNewMemObjArgList; ///< [in][optional][range(0, numNewMemObjArgs)] An array describing the new + ///< kernel mem obj arguments for the command. + const ur_exp_command_buffer_update_pointer_arg_desc_t *pNewPointerArgList; ///< [in][optional][range(0, numNewPointerArgs)] An array describing the + ///< new kernel pointer arguments for the command. + const ur_exp_command_buffer_update_value_arg_desc_t *pNewValueArgList; ///< [in][optional][range(0, numNewValueArgs)] An array describing the new + ///< kernel value arguments for the command. + const ur_exp_command_buffer_update_exec_info_desc_t *pNewExecInfoList; ///< [in][optional][range(0, numNewExecInfos)] An array describing the + ///< execution info objects for the command. + size_t *pNewGlobalWorkOffset; ///< [in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned + ///< values that describe the offset used to calculate the global ID. + size_t *pNewGlobalWorkSize; ///< [in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned + ///< values that describe the number of global work-items. + size_t *pNewLocalWorkSize; ///< [in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned + ///< values that describe the number of work-items that make up a + ///< work-group. If nullptr, the runtime implementation will choose the + ///< work-group size. + +} ur_exp_command_buffer_update_kernel_launch_desc_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief A value that identifies a command inside of a command-buffer, used for /// defining dependencies between commands in the same command-buffer. @@ -7783,11 +7907,15 @@ typedef uint32_t ur_exp_command_buffer_sync_point_t; /// @brief Handle of Command-Buffer object typedef struct ur_exp_command_buffer_handle_t_ *ur_exp_command_buffer_handle_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Handle of a Command-Buffer command +typedef struct ur_exp_command_buffer_command_handle_t_ *ur_exp_command_buffer_command_handle_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Create a Command-Buffer object /// /// @details -/// - Create a command-buffer object +/// - Create a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7805,10 +7933,10 @@ typedef struct ur_exp_command_buffer_handle_t_ *ur_exp_command_buffer_handle_t; /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object - const ur_exp_command_buffer_desc_t *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor - ur_exp_command_buffer_handle_t *phCommandBuffer ///< [out] pointer to Command-Buffer handle + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. + const ur_exp_command_buffer_desc_t *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. + ur_exp_command_buffer_handle_t *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ); /////////////////////////////////////////////////////////////////////////////// @@ -7826,7 +7954,7 @@ urCommandBufferCreateExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainExp( - ur_exp_command_buffer_handle_t hCommandBuffer ///< [in] handle of the command-buffer object + ur_exp_command_buffer_handle_t hCommandBuffer ///< [in] Handle of the command-buffer object. ); /////////////////////////////////////////////////////////////////////////////// @@ -7845,7 +7973,7 @@ urCommandBufferRetainExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseExp( - ur_exp_command_buffer_handle_t hCommandBuffer ///< [in] handle of the command-buffer object + ur_exp_command_buffer_handle_t hCommandBuffer ///< [in] Handle of the command-buffer object. ); /////////////////////////////////////////////////////////////////////////////// @@ -7864,11 +7992,11 @@ urCommandBufferReleaseExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferFinalizeExp( - ur_exp_command_buffer_handle_t hCommandBuffer ///< [in] handle of the command-buffer object + ur_exp_command_buffer_handle_t hCommandBuffer ///< [in] Handle of the command-buffer object. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a kernel execution command to a command-buffer object +/// @brief Append a kernel execution command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7895,19 +8023,20 @@ urCommandBufferFinalizeExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t *pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. const size_t *pLocalWorkSize, ///< [in] Local work size to use when executing kernel. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t *phCommand ///< [out][optional] Handle to this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM memcpy command to a command-buffer object +/// @brief Append a USM memcpy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7932,17 +8061,17 @@ urCommandBufferAppendKernelLaunchExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM fill command to a command-buffer object +/// @brief Append a USM fill command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7981,7 +8110,7 @@ urCommandBufferAppendUSMFillExp( ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory copy command to a command-buffer object +/// @brief Append a memory copy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8002,7 +8131,7 @@ urCommandBufferAppendUSMFillExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -8010,11 +8139,11 @@ urCommandBufferAppendMemBufferCopyExp( size_t size, ///< [in] The number of bytes to be copied. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory write command to a command-buffer object +/// @brief Append a memory write command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8036,18 +8165,18 @@ urCommandBufferAppendMemBufferCopyExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - const void *pSrc, ///< [in] pointer to host memory where data is to be written from. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + const void *pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory read command to a command-buffer object +/// @brief Append a memory read command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8069,18 +8198,18 @@ urCommandBufferAppendMemBufferWriteExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory copy command to a command-buffer object +/// @brief Append a rectangular memory copy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8101,7 +8230,7 @@ urCommandBufferAppendMemBufferReadExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t srcOrigin, ///< [in] Origin for the region of data to be copied from the source. @@ -8113,11 +8242,11 @@ urCommandBufferAppendMemBufferCopyRectExp( size_t dstSlicePitch, ///< [in] Slice pitch of the destination memory. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory write command to a command-buffer object +/// @brief Append a rectangular memory write command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8139,26 +8268,26 @@ urCommandBufferAppendMemBufferCopyRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. - size_t bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. - size_t bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + size_t bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. + size_t bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. - size_t hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + size_t hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. - size_t hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + size_t hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. - void *pSrc, ///< [in] pointer to host memory where data is to be written from. + void *pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory read command to a command-buffer object +/// @brief Append a rectangular memory read command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8180,25 +8309,25 @@ urCommandBufferAppendMemBufferWriteRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. - size_t bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. - size_t bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. - size_t hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + size_t bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. + size_t bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. + size_t hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. - size_t hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + size_t hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory fill command to a command-buffer object +/// @brief Append a memory fill command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8234,7 +8363,7 @@ urCommandBufferAppendMemBufferFillExp( ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM Prefetch command to a command-buffer object +/// @brief Append a USM Prefetch command to a command-buffer object. /// /// @details /// - Prefetching may not be supported for all devices or allocation types. @@ -8275,7 +8404,7 @@ urCommandBufferAppendUSMPrefetchExp( ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM Advise command to a command-buffer object +/// @brief Append a USM Advise command to a command-buffer object. /// /// @details /// - Not all memory advice hints may be supported for all devices or @@ -8337,17 +8466,152 @@ urCommandBufferAppendUSMAdviseExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_queue_handle_t hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_queue_handle_t hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t *phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. ); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Increment the command object's reference count. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferRetainCommandExp( + ur_exp_command_buffer_command_handle_t hCommand ///< [in] Handle of the command-buffer command. +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Decrement the command object's reference count and delete the command +/// object if the reference count becomes zero. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferReleaseCommandExp( + ur_exp_command_buffer_command_handle_t hCommand ///< [in] Handle of the command-buffer command. +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Update a kernel launch command in a finalized command-buffer. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pUpdateKernelLaunch` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If update functionality is not supported by the device. +/// - ::UR_RESULT_ERROR_INVALID_OPERATION +/// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. +/// + If the command-buffer `hCommand` belongs to has not been finalized. +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX +/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION +/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t *pUpdateKernelLaunch ///< [in] Struct defining how the kernel command is to be updated. +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Get command-buffer object information. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT < propName` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION +/// + If `propName` is not supported by the adapter. +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `propSize == 0 && pPropValue != NULL` +/// + If `propSize` is less than the real number of bytes needed to return the info. +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `propSize != 0 && pPropValue == NULL` +/// + `pPropValue == NULL && pPropSizeRet == NULL` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferGetInfoExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object + ur_exp_command_buffer_info_t propName, ///< [in] the name of the command-buffer property to query + size_t propSize, ///< [in] size in bytes of the command-buffer property value + void *pPropValue, ///< [out][optional][typename(propName, propSize)] value of the + ///< command-buffer property + size_t *pPropSizeRet ///< [out][optional] bytes returned in command-buffer property +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Get command-buffer object information. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_EXP_COMMAND_BUFFER_COMMAND_INFO_REFERENCE_COUNT < propName` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION +/// + If `propName` is not supported by the adapter. +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `propSize == 0 && pPropValue != NULL` +/// + If `propSize` is less than the real number of bytes needed to return the info. +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `propSize != 0 && pPropValue == NULL` +/// + `pPropValue == NULL && pPropSizeRet == NULL` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferCommandGetInfoExp( + ur_exp_command_buffer_command_handle_t hCommand, ///< [in] handle of the command-buffer command object + ur_exp_command_buffer_command_info_t propName, ///< [in] the name of the command-buffer command property to query + size_t propSize, ///< [in] size in bytes of the command-buffer command property value + void *pPropValue, ///< [out][optional][typename(propName, propSize)] value of the + ///< command-buffer command property + size_t *pPropSizeRet ///< [out][optional] bytes returned in command-buffer command property +); + #if !defined(__GNUC__) #pragma endregion #endif @@ -10507,6 +10771,7 @@ typedef struct ur_command_buffer_append_kernel_launch_exp_params_t { uint32_t *pnumSyncPointsInWaitList; const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; ur_exp_command_buffer_sync_point_t **ppSyncPoint; + ur_exp_command_buffer_command_handle_t **pphCommand; } ur_command_buffer_append_kernel_launch_exp_params_t; /////////////////////////////////////////////////////////////////////////////// @@ -10700,6 +10965,55 @@ typedef struct ur_command_buffer_enqueue_exp_params_t { ur_event_handle_t **pphEvent; } ur_command_buffer_enqueue_exp_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urCommandBufferRetainCommandExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_command_buffer_retain_command_exp_params_t { + ur_exp_command_buffer_command_handle_t *phCommand; +} ur_command_buffer_retain_command_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urCommandBufferReleaseCommandExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_command_buffer_release_command_exp_params_t { + ur_exp_command_buffer_command_handle_t *phCommand; +} ur_command_buffer_release_command_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urCommandBufferUpdateKernelLaunchExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_command_buffer_update_kernel_launch_exp_params_t { + ur_exp_command_buffer_command_handle_t *phCommand; + const ur_exp_command_buffer_update_kernel_launch_desc_t **ppUpdateKernelLaunch; +} ur_command_buffer_update_kernel_launch_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urCommandBufferGetInfoExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_command_buffer_get_info_exp_params_t { + ur_exp_command_buffer_handle_t *phCommandBuffer; + ur_exp_command_buffer_info_t *ppropName; + size_t *ppropSize; + void **ppPropValue; + size_t **ppPropSizeRet; +} ur_command_buffer_get_info_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urCommandBufferCommandGetInfoExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_command_buffer_command_get_info_exp_params_t { + ur_exp_command_buffer_command_handle_t *phCommand; + ur_exp_command_buffer_command_info_t *ppropName; + size_t *ppropSize; + void **ppPropValue; + size_t **ppPropSizeRet; +} ur_command_buffer_command_get_info_exp_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urUsmP2PEnablePeerAccessExp /// @details Each entry is a pointer to the parameter passed to the function; diff --git a/include/ur_ddi.h b/include/ur_ddi.h index 6e1bf577f8..891d8bc7f4 100644 --- a/include/ur_ddi.h +++ b/include/ur_ddi.h @@ -1854,7 +1854,8 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendKernelLaunchExp_t)( const size_t *, uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *); + ur_exp_command_buffer_sync_point_t *, + ur_exp_command_buffer_command_handle_t *); /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urCommandBufferAppendUSMMemcpyExp @@ -2011,6 +2012,40 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferEnqueueExp_t)( const ur_event_handle_t *, ur_event_handle_t *); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urCommandBufferRetainCommandExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferRetainCommandExp_t)( + ur_exp_command_buffer_command_handle_t); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urCommandBufferReleaseCommandExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferReleaseCommandExp_t)( + ur_exp_command_buffer_command_handle_t); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urCommandBufferUpdateKernelLaunchExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferUpdateKernelLaunchExp_t)( + ur_exp_command_buffer_command_handle_t, + const ur_exp_command_buffer_update_kernel_launch_desc_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urCommandBufferGetInfoExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferGetInfoExp_t)( + ur_exp_command_buffer_handle_t, + ur_exp_command_buffer_info_t, + size_t, + void *, + size_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urCommandBufferCommandGetInfoExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferCommandGetInfoExp_t)( + ur_exp_command_buffer_command_handle_t, + ur_exp_command_buffer_command_info_t, + size_t, + void *, + size_t *); + /////////////////////////////////////////////////////////////////////////////// /// @brief Table of CommandBufferExp functions pointers typedef struct ur_command_buffer_exp_dditable_t { @@ -2031,6 +2066,11 @@ typedef struct ur_command_buffer_exp_dditable_t { ur_pfnCommandBufferAppendUSMPrefetchExp_t pfnAppendUSMPrefetchExp; ur_pfnCommandBufferAppendUSMAdviseExp_t pfnAppendUSMAdviseExp; ur_pfnCommandBufferEnqueueExp_t pfnEnqueueExp; + ur_pfnCommandBufferRetainCommandExp_t pfnRetainCommandExp; + ur_pfnCommandBufferReleaseCommandExp_t pfnReleaseCommandExp; + ur_pfnCommandBufferUpdateKernelLaunchExp_t pfnUpdateKernelLaunchExp; + ur_pfnCommandBufferGetInfoExp_t pfnGetInfoExp; + ur_pfnCommandBufferCommandGetInfoExp_t pfnCommandGetInfoExp; } ur_command_buffer_exp_dditable_t; /////////////////////////////////////////////////////////////////////////////// diff --git a/include/ur_print.h b/include/ur_print.h index c847341893..e1718e99f8 100644 --- a/include/ur_print.h +++ b/include/ur_print.h @@ -930,6 +930,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintExpInteropMemDesc(const struct ur_exp /// - `buff_size < out_size` UR_APIEXPORT ur_result_t UR_APICALL urPrintExpInteropSemaphoreDesc(const struct ur_exp_interop_semaphore_desc_t params, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_command_buffer_info_t enum +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpCommandBufferInfo(enum ur_exp_command_buffer_info_t value, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_command_buffer_command_info_t enum +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpCommandBufferCommandInfo(enum ur_exp_command_buffer_command_info_t value, char *buffer, const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_exp_command_buffer_desc_t struct /// @returns @@ -938,6 +954,46 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintExpInteropSemaphoreDesc(const struct /// - `buff_size < out_size` UR_APIEXPORT ur_result_t UR_APICALL urPrintExpCommandBufferDesc(const struct ur_exp_command_buffer_desc_t params, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_command_buffer_update_memobj_arg_desc_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpCommandBufferUpdateMemobjArgDesc(const struct ur_exp_command_buffer_update_memobj_arg_desc_t params, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_command_buffer_update_pointer_arg_desc_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpCommandBufferUpdatePointerArgDesc(const struct ur_exp_command_buffer_update_pointer_arg_desc_t params, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_command_buffer_update_value_arg_desc_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpCommandBufferUpdateValueArgDesc(const struct ur_exp_command_buffer_update_value_arg_desc_t params, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_command_buffer_update_exec_info_desc_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpCommandBufferUpdateExecInfoDesc(const struct ur_exp_command_buffer_update_exec_info_desc_t params, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_command_buffer_update_kernel_launch_desc_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpCommandBufferUpdateKernelLaunchDesc(const struct ur_exp_command_buffer_update_kernel_launch_desc_t params, char *buffer, const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_exp_peer_info_t enum /// @returns @@ -2250,6 +2306,46 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintCommandBufferAppendUsmAdviseExpParams /// - `buff_size < out_size` UR_APIEXPORT ur_result_t UR_APICALL urPrintCommandBufferEnqueueExpParams(const struct ur_command_buffer_enqueue_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_command_buffer_retain_command_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintCommandBufferRetainCommandExpParams(const struct ur_command_buffer_retain_command_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_command_buffer_release_command_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintCommandBufferReleaseCommandExpParams(const struct ur_command_buffer_release_command_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_command_buffer_update_kernel_launch_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintCommandBufferUpdateKernelLaunchExpParams(const struct ur_command_buffer_update_kernel_launch_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_command_buffer_get_info_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintCommandBufferGetInfoExpParams(const struct ur_command_buffer_get_info_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_command_buffer_command_get_info_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintCommandBufferCommandGetInfoExpParams(const struct ur_command_buffer_command_get_info_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_usm_p2p_enable_peer_access_exp_params_t struct /// @returns diff --git a/include/ur_print.hpp b/include/ur_print.hpp index cd147bc10e..b4c777b77d 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -58,6 +58,8 @@ template <> struct is_handle : std::true_type {}; template <> struct is_handle : std::true_type {}; +template <> +struct is_handle : std::true_type {}; template inline constexpr bool is_handle_v = is_handle::value; template @@ -199,6 +201,12 @@ inline ur_result_t printFlag(std::ostream &os, uint32_t template <> inline ur_result_t printFlag(std::ostream &os, uint32_t flag); +template <> +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_exp_command_buffer_info_t value, size_t size); + +template <> +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_exp_command_buffer_command_info_t value, size_t size); + template <> inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_exp_peer_info_t value, size_t size); @@ -318,7 +326,14 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_sampler_addr_modes_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_interop_mem_desc_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_interop_semaphore_desc_t params); +inline std::ostream &operator<<(std::ostream &os, ur_exp_command_buffer_info_t value); +inline std::ostream &operator<<(std::ostream &os, ur_exp_command_buffer_command_info_t value); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_memobj_arg_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_pointer_arg_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_value_arg_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_exec_info_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_kernel_launch_desc_t params); inline std::ostream &operator<<(std::ostream &os, ur_exp_peer_info_t value); /////////////////////////////////////////////////////////////////////////////// @@ -822,6 +837,9 @@ inline std::ostream &operator<<(std::ostream &os, ur_function_t value) { case UR_FUNCTION_ADAPTER_GET_INFO: os << "UR_FUNCTION_ADAPTER_GET_INFO"; break; + case UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP"; + break; case UR_FUNCTION_PROGRAM_BUILD_EXP: os << "UR_FUNCTION_PROGRAM_BUILD_EXP"; break; @@ -879,6 +897,18 @@ inline std::ostream &operator<<(std::ostream &os, ur_function_t value) { case UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP: os << "UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP"; break; + case UR_FUNCTION_COMMAND_BUFFER_RETAIN_COMMAND_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_RETAIN_COMMAND_EXP"; + break; + case UR_FUNCTION_COMMAND_BUFFER_RELEASE_COMMAND_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_RELEASE_COMMAND_EXP"; + break; + case UR_FUNCTION_COMMAND_BUFFER_GET_INFO_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_GET_INFO_EXP"; + break; + case UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP"; + break; default: os << "unknown enumerator"; break; @@ -999,6 +1029,21 @@ inline std::ostream &operator<<(std::ostream &os, ur_structure_type_t value) { case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC: os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC"; break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC: + os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC"; + break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC: + os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC"; + break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC: + os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC"; + break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC: + os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC"; + break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC: + os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC"; + break; case UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES: os << "UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES"; break; @@ -1214,6 +1259,31 @@ inline ur_result_t printStruct(std::ostream &os, const void *ptr) { printPtr(os, pstruct); } break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC: { + const ur_exp_command_buffer_update_kernel_launch_desc_t *pstruct = (const ur_exp_command_buffer_update_kernel_launch_desc_t *)ptr; + printPtr(os, pstruct); + } break; + + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC: { + const ur_exp_command_buffer_update_memobj_arg_desc_t *pstruct = (const ur_exp_command_buffer_update_memobj_arg_desc_t *)ptr; + printPtr(os, pstruct); + } break; + + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC: { + const ur_exp_command_buffer_update_pointer_arg_desc_t *pstruct = (const ur_exp_command_buffer_update_pointer_arg_desc_t *)ptr; + printPtr(os, pstruct); + } break; + + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC: { + const ur_exp_command_buffer_update_value_arg_desc_t *pstruct = (const ur_exp_command_buffer_update_value_arg_desc_t *)ptr; + printPtr(os, pstruct); + } break; + + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC: { + const ur_exp_command_buffer_update_exec_info_desc_t *pstruct = (const ur_exp_command_buffer_update_exec_info_desc_t *)ptr; + printPtr(os, pstruct); + } break; + case UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES: { const ur_exp_sampler_mip_properties_t *pstruct = (const ur_exp_sampler_mip_properties_t *)ptr; printPtr(os, pstruct); @@ -1472,6 +1542,9 @@ inline std::ostream &operator<<(std::ostream &os, ur_result_t value) { case UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP: os << "UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP"; break; + case UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP: + os << "UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP"; + break; case UR_RESULT_ERROR_UNKNOWN: os << "UR_RESULT_ERROR_UNKNOWN"; break; @@ -2407,6 +2480,12 @@ inline std::ostream &operator<<(std::ostream &os, ur_device_info_t value) { case UR_DEVICE_INFO_COMPOSITE_DEVICE: os << "UR_DEVICE_INFO_COMPOSITE_DEVICE"; break; + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: + os << "UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP"; + break; + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: + os << "UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP"; + break; case UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP: os << "UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP"; break; @@ -3843,6 +3922,30 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info os << ")"; } break; + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: { + const ur_bool_t *tptr = (const ur_bool_t *)ptr; + if (sizeof(ur_bool_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: { + const ur_bool_t *tptr = (const ur_bool_t *)ptr; + if (sizeof(ur_bool_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; case UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { @@ -9180,6 +9283,96 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_interop_se os << "}"; return os; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_command_buffer_info_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_exp_command_buffer_info_t value) { + switch (value) { + case UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT: + os << "UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_command_buffer_info_t enum value +template <> +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_exp_command_buffer_info_t value, size_t size) { + if (ptr == NULL) { + return printPtr(os, ptr); + } + + switch (value) { + case UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT: { + const uint32_t *tptr = (const uint32_t *)ptr; + if (sizeof(uint32_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; + default: + os << "unknown enumerator"; + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + return UR_RESULT_SUCCESS; +} +} // namespace ur::details + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_command_buffer_command_info_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, ur_exp_command_buffer_command_info_t value) { + switch (value) { + case UR_EXP_COMMAND_BUFFER_COMMAND_INFO_REFERENCE_COUNT: + os << "UR_EXP_COMMAND_BUFFER_COMMAND_INFO_REFERENCE_COUNT"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_command_buffer_command_info_t enum value +template <> +inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_exp_command_buffer_command_info_t value, size_t size) { + if (ptr == NULL) { + return printPtr(os, ptr); + } + + switch (value) { + case UR_EXP_COMMAND_BUFFER_COMMAND_INFO_REFERENCE_COUNT: { + const uint32_t *tptr = (const uint32_t *)ptr; + if (sizeof(uint32_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; + default: + os << "unknown enumerator"; + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + return UR_RESULT_SUCCESS; +} +} // namespace ur::details + /////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_exp_command_buffer_desc_t type /// @returns @@ -9197,6 +9390,284 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_bu ur::details::printStruct(os, (params.pNext)); + os << ", "; + os << ".isUpdatable = "; + + os << (params.isUpdatable); + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_command_buffer_update_memobj_arg_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_buffer_update_memobj_arg_desc_t params) { + os << "(struct ur_exp_command_buffer_update_memobj_arg_desc_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, + (params.pNext)); + + os << ", "; + os << ".argIndex = "; + + os << (params.argIndex); + + os << ", "; + os << ".pProperties = "; + + os << (params.pProperties); + + os << ", "; + os << ".hNewMemObjArg = "; + + ur::details::printPtr(os, + (params.hNewMemObjArg)); + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_command_buffer_update_pointer_arg_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_buffer_update_pointer_arg_desc_t params) { + os << "(struct ur_exp_command_buffer_update_pointer_arg_desc_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, + (params.pNext)); + + os << ", "; + os << ".argIndex = "; + + os << (params.argIndex); + + os << ", "; + os << ".pProperties = "; + + os << (params.pProperties); + + os << ", "; + os << ".pNewPointerArg = "; + + os << (params.pNewPointerArg); + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_command_buffer_update_value_arg_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_buffer_update_value_arg_desc_t params) { + os << "(struct ur_exp_command_buffer_update_value_arg_desc_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, + (params.pNext)); + + os << ", "; + os << ".argIndex = "; + + os << (params.argIndex); + + os << ", "; + os << ".argSize = "; + + os << (params.argSize); + + os << ", "; + os << ".pProperties = "; + + os << (params.pProperties); + + os << ", "; + os << ".pNewValueArg = "; + + os << (params.pNewValueArg); + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_command_buffer_update_exec_info_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_buffer_update_exec_info_desc_t params) { + os << "(struct ur_exp_command_buffer_update_exec_info_desc_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, + (params.pNext)); + + os << ", "; + os << ".propName = "; + + os << (params.propName); + + os << ", "; + os << ".propSize = "; + + os << (params.propSize); + + os << ", "; + os << ".pProperties = "; + + os << (params.pProperties); + + os << ", "; + os << ".pNewExecInfo = "; + + os << (params.pNewExecInfo); + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_command_buffer_update_kernel_launch_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_buffer_update_kernel_launch_desc_t params) { + os << "(struct ur_exp_command_buffer_update_kernel_launch_desc_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, + (params.pNext)); + + os << ", "; + os << ".numNewMemObjArgs = "; + + os << (params.numNewMemObjArgs); + + os << ", "; + os << ".numNewPointerArgs = "; + + os << (params.numNewPointerArgs); + + os << ", "; + os << ".numNewValueArgs = "; + + os << (params.numNewValueArgs); + + os << ", "; + os << ".numNewExecInfos = "; + + os << (params.numNewExecInfos); + + os << ", "; + os << ".newWorkDim = "; + + os << (params.newWorkDim); + + os << ", "; + os << ".pNewMemObjArgList = {"; + for (size_t i = 0; (params.pNewMemObjArgList) != NULL && i < params.numNewMemObjArgs; ++i) { + if (i != 0) { + os << ", "; + } + + os << ((params.pNewMemObjArgList))[i]; + } + os << "}"; + + os << ", "; + os << ".pNewPointerArgList = {"; + for (size_t i = 0; (params.pNewPointerArgList) != NULL && i < params.numNewPointerArgs; ++i) { + if (i != 0) { + os << ", "; + } + + os << ((params.pNewPointerArgList))[i]; + } + os << "}"; + + os << ", "; + os << ".pNewValueArgList = {"; + for (size_t i = 0; (params.pNewValueArgList) != NULL && i < params.numNewValueArgs; ++i) { + if (i != 0) { + os << ", "; + } + + os << ((params.pNewValueArgList))[i]; + } + os << "}"; + + os << ", "; + os << ".pNewExecInfoList = {"; + for (size_t i = 0; (params.pNewExecInfoList) != NULL && i < params.numNewExecInfos; ++i) { + if (i != 0) { + os << ", "; + } + + os << ((params.pNewExecInfoList))[i]; + } + os << "}"; + + os << ", "; + os << ".pNewGlobalWorkOffset = {"; + for (size_t i = 0; (params.pNewGlobalWorkOffset) != NULL && i < params.newWorkDim; ++i) { + if (i != 0) { + os << ", "; + } + + os << ((params.pNewGlobalWorkOffset))[i]; + } + os << "}"; + + os << ", "; + os << ".pNewGlobalWorkSize = {"; + for (size_t i = 0; (params.pNewGlobalWorkSize) != NULL && i < params.newWorkDim; ++i) { + if (i != 0) { + os << ", "; + } + + os << ((params.pNewGlobalWorkSize))[i]; + } + os << "}"; + + os << ", "; + os << ".pNewLocalWorkSize = {"; + for (size_t i = 0; (params.pNewLocalWorkSize) != NULL && i < params.newWorkDim; ++i) { + if (i != 0) { + os << ", "; + } + + os << ((params.pNewLocalWorkSize))[i]; + } + os << "}"; + os << "}"; return os; } @@ -14600,6 +15071,12 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur::details::printPtr(os, *(params->ppSyncPoint)); + os << ", "; + os << ".phCommand = "; + + ur::details::printPtr(os, + *(params->pphCommand)); + return os; } @@ -15299,6 +15776,122 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct return os; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_command_buffer_retain_command_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_command_buffer_retain_command_exp_params_t *params) { + + os << ".hCommand = "; + + ur::details::printPtr(os, + *(params->phCommand)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_command_buffer_release_command_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_command_buffer_release_command_exp_params_t *params) { + + os << ".hCommand = "; + + ur::details::printPtr(os, + *(params->phCommand)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_command_buffer_update_kernel_launch_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_command_buffer_update_kernel_launch_exp_params_t *params) { + + os << ".hCommand = "; + + ur::details::printPtr(os, + *(params->phCommand)); + + os << ", "; + os << ".pUpdateKernelLaunch = "; + + ur::details::printPtr(os, + *(params->ppUpdateKernelLaunch)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_command_buffer_get_info_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_command_buffer_get_info_exp_params_t *params) { + + os << ".hCommandBuffer = "; + + ur::details::printPtr(os, + *(params->phCommandBuffer)); + + os << ", "; + os << ".propName = "; + + os << *(params->ppropName); + + os << ", "; + os << ".propSize = "; + + os << *(params->ppropSize); + + os << ", "; + os << ".pPropValue = "; + ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), *(params->ppropSize)); + + os << ", "; + os << ".pPropSizeRet = "; + + ur::details::printPtr(os, + *(params->ppPropSizeRet)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_command_buffer_command_get_info_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_command_buffer_command_get_info_exp_params_t *params) { + + os << ".hCommand = "; + + ur::details::printPtr(os, + *(params->phCommand)); + + os << ", "; + os << ".propName = "; + + os << *(params->ppropName); + + os << ", "; + os << ".propSize = "; + + os << *(params->ppropSize); + + os << ", "; + os << ".pPropValue = "; + ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), *(params->ppropSize)); + + os << ", "; + os << ".pPropSizeRet = "; + + ur::details::printPtr(os, + *(params->ppPropSizeRet)); + + return os; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_usm_p2p_enable_peer_access_exp_params_t type /// @returns @@ -16433,6 +17026,21 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, ur_function_ case UR_FUNCTION_COMMAND_BUFFER_ENQUEUE_EXP: { os << (const struct ur_command_buffer_enqueue_exp_params_t *)params; } break; + case UR_FUNCTION_COMMAND_BUFFER_RETAIN_COMMAND_EXP: { + os << (const struct ur_command_buffer_retain_command_exp_params_t *)params; + } break; + case UR_FUNCTION_COMMAND_BUFFER_RELEASE_COMMAND_EXP: { + os << (const struct ur_command_buffer_release_command_exp_params_t *)params; + } break; + case UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP: { + os << (const struct ur_command_buffer_update_kernel_launch_exp_params_t *)params; + } break; + case UR_FUNCTION_COMMAND_BUFFER_GET_INFO_EXP: { + os << (const struct ur_command_buffer_get_info_exp_params_t *)params; + } break; + case UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP: { + os << (const struct ur_command_buffer_command_get_info_exp_params_t *)params; + } break; case UR_FUNCTION_USM_P2P_ENABLE_PEER_ACCESS_EXP: { os << (const struct ur_usm_p2p_enable_peer_access_exp_params_t *)params; } break; diff --git a/scripts/core/EXP-COMMAND-BUFFER.rst b/scripts/core/EXP-COMMAND-BUFFER.rst index a6a32a66a1..0143b72c77 100644 --- a/scripts/core/EXP-COMMAND-BUFFER.rst +++ b/scripts/core/EXP-COMMAND-BUFFER.rst @@ -57,24 +57,29 @@ returned list of supported extensions. ${x}DeviceGetInfo(hDevice, ${X}_DEVICE_INFO_EXTENSIONS, 0, nullptr, &returnedSize); - // Retrieve extension string + // Retrieve extension string std::unique_ptr returnedExtensions(new char[returnedSize]); - ${x}DeviceGetInfo(hDevice, ${X}_DEVICE_INFO_EXTENSIONS, returnedSize, + ${x}DeviceGetInfo(hDevice, ${X}_DEVICE_INFO_EXTENSIONS, returnedSize, returnedExtensions.get(), nullptr); - + std::string_view ExtensionsString(returnedExtensions.get()); - bool CmdBufferSupport = + bool CmdBufferSupport = ExtensionsString.find(${X}_COMMAND_BUFFER_EXTENSION_STRING_EXP) != std::string::npos; +.. note:: + The ${X}_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP device info query exists to + serve the same purpose as ${X}_COMMAND_BUFFER_EXTENSION_STRING_EXP. + Command-Buffer Creation -------------------------------------------------------------------------------- Command-Buffers are tied to a specific ${x}_context_handle_t and ${x}_device_handle_t. ${x}CommandBufferCreateExp optionally takes a descriptor to provide additional properties for how the command-buffer should be -constructed. There are currently no unique members defined for -${x}_exp_command_buffer_desc_t, however they may be added in the future. +constructed. The only unique member defined in ${x}_exp_command_buffer_desc_t +is ``isUpdatable``, which should be set to ``true`` to support :ref:`updating +command-buffer commands`. Command-buffers are reference counted and can be retained and released by calling ${x}CommandBufferRetainExp and ${x}CommandBufferReleaseExp respectively. @@ -89,6 +94,11 @@ However, they differ in that they take a command-buffer handle instead of a queue handle, and the dependencies and return parameters are sync-points instead of event handles. +The entry-point for appending a kernel launch command also returns an optional +handle to the command being appended. This handle can be used to update the +command configuration between command-buffer executions, see the section on +:ref:`updating command-buffer commands`. + Currently only the following commands are supported: * ${x}CommandBufferAppendKernelLaunchExp @@ -103,9 +113,9 @@ Currently only the following commands are supported: * ${x}CommandBufferAppendMemBufferFillExp * ${x}CommandBufferAppendUSMPrefetchExp * ${x}CommandBufferAppendUSMAdviseExp - + It is planned to eventually support any command type from the Core API which can -actually be appended to the equiavalent adapter native constructs. +actually be appended to the equivalent adapter native constructs. Sync-Points -------------------------------------------------------------------------------- @@ -122,15 +132,15 @@ were obtained from. // Append a memcpy with no sync-point dependencies ${x}_exp_command_buffer_sync_point_t syncPoint; - ${x}CommandBufferAppendUSMMemcpyExp(hCommandBuffer, pDst, pSrc, size, 0, + ${x}CommandBufferAppendUSMMemcpyExp(hCommandBuffer, pDst, pSrc, size, 0, nullptr, &syncPoint); - + // Append a kernel launch with syncPoint as a dependency, ignore returned // sync-point - ${x}CommandBufferAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, - pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, 1, &syncPoint, - nullptr); + ${x}CommandBufferAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, + pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize, 1, &syncPoint, + nullptr, nullptr); Enqueueing Command-Buffers -------------------------------------------------------------------------------- @@ -147,6 +157,82 @@ enqueued or executed simultaneously, and submissions may be serialized. ${x}CommandBufferEnqueueExp(hCommandBuffer, hQueue, 0, nullptr, &executionEvent); +Updating Command-Buffer Commands +-------------------------------------------------------------------------------- + +An adapter implementing the command-buffer experimental feature can optionally +support updating the configuration of kernel commands recorded to a +command-buffer. Support for this is reported by returning true in the +${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP query. + +Updating kernel commands is done by passing the new kernel configuration +to ${x}CommandBufferUpdateKernelLaunchExp along with the command handle of +the kernel command to update. Configurations that can be changed are the +parameters to the kernel and the execution ND-Range. + +.. parsed-literal:: + + // Create a command-buffer with update enabled. + ${x}_exp_command_buffer_desc_t desc { + ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, + nullptr, + true // isUpdatable + }; + ${x}_exp_command_buffer_handle_t hCommandBuffer; + ${x}CommandBufferCreateExp(hContext, hDevice, &desc, &hCommandBuffer); + + // Append a kernel command which has two buffer parameters, an input + // and an output. + ${x}_exp_command_buffer_command_handle_t hCommand; + ${x}CommandBufferAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, + pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize, 0, nullptr, + nullptr, &hCommand); + + // Close the command-buffer before updating + ${x}CommandBufferFinalizeExp(hCommandBuffer); + + // Define kernel argument at index 0 to be a new input buffer object + ${x}_exp_command_buffer_update_memobj_arg_desc_t newInputArg { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC, // stype + nullptr, // pNext + 0, // argIndex + nullptr, // pProperties + newInputBuffer, // hNewMemObjArg + }; + + // Define kernel argument at index 1 to be a new output buffer object + ${x}_exp_command_buffer_update_memobj_arg_desc_t newOutputArg { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC, // stype + nullptr, // pNext + 1, // argIndex + nullptr, // pProperties + newOutputBuffer, // hNewMemObjArg + }; + + // Define the new configuration of the kernel command + ${x}_exp_command_buffer_update_memobj_arg_desc_t updatedArgs[2] = {newInputArg, newOutputArg}; + ${x}_exp_command_buffer_update_kernel_launch_desc_t update { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 2, // numNewMemobjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + 0, // numNewExecInfos + 0, // newWorkDim + new_args, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + nullptr, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Perform the update + ${x}CommandBufferUpdateKernelLaunchExp(hCommand, &update); + + API -------------------------------------------------------------------------------- @@ -156,12 +242,21 @@ Macros Enums ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +* ${x}_device_info_t + * ${X}_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP + * ${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP * ${x}_result_t * ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP * ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP * ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP + * ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP * ${x}_structure_type_t * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC + * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC + * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC + * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC + * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC + * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC * ${x}_command_t * ${X}_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP * ${x}_function_t @@ -182,15 +277,23 @@ Enums * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_USM_ADVISE_EXP - - + * ${X}_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP +* ${x}_exp_command_buffer_info_t + * ${X}_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT +* ${x}_exp_command_buffer_command_info_t + * ${X}_EXP_COMMAND_BUFFER_COMMAND_INFO_REFERENCE_COUNT Types ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * ${x}_exp_command_buffer_desc_t +* ${x}_exp_command_buffer_update_kernel_launch_desc_t +* ${x}_exp_command_buffer_update_memobj_arg_desc_t +* ${x}_exp_command_buffer_update_pointer_arg_desc_t +* ${x}_exp_command_buffer_update_value_arg_desc_t +* ${x}_exp_command_buffer_update_exec_info_desc_t * ${x}_exp_command_buffer_sync_point_t * ${x}_exp_command_buffer_handle_t - +* ${x}_exp_command_buffer_command_handle_t Functions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -211,6 +314,11 @@ Functions * ${x}CommandBufferAppendUSMPrefetchExp * ${x}CommandBufferAppendUSMAdviseExp * ${x}CommandBufferEnqueueExp +* ${x}CommandBufferRetainCommandExp +* ${x}CommandBufferReleaseCommandExp +* ${x}CommandBufferUpdateKernelLaunchExp +* ${x}CommandBufferGetInfoExp +* ${x}CommandBufferCommandGetInfoExp Changelog -------------------------------------------------------------------------------- @@ -227,6 +335,8 @@ Changelog | 1.3 | Add function definitions for Prefetch and Advise | | | commands | +-----------+-------------------------------------------------------+ +| 1.4 | Add function definitions for kernel command update | ++-----------+-------------------------------------------------------+ Contributors -------------------------------------------------------------------------------- @@ -234,3 +344,4 @@ Contributors * Ben Tracy `ben.tracy@codeplay.com `_ * Ewan Crawford `ewan@codeplay.com `_ * Maxime France-Pillois `maxime.francepillois@codeplay.com `_ +* Aaron Greig `aaron.greig@codeplay.com `_ diff --git a/scripts/core/exp-command-buffer.yml b/scripts/core/exp-command-buffer.yml index 7d1b686aab..d2292ceb22 100644 --- a/scripts/core/exp-command-buffer.yml +++ b/scripts/core/exp-command-buffer.yml @@ -14,6 +14,19 @@ ordinal: "99" --- #-------------------------------------------------------------------------- type: enum extend: true +typed_etors: true +desc: "Extension enums to $x_device_info_t to support command-buffers." +name: $x_device_info_t +etors: + - name: COMMAND_BUFFER_SUPPORT_EXP + value: "0x1000" + desc: "[$x_bool_t] Returns true if the device supports the use of command-buffers." + - name: COMMAND_BUFFER_UPDATE_SUPPORT_EXP + value: "0x1001" + desc: "[$x_bool_t] Returns true if the device supports updating the kernel commands in a command-buffer." +--- #-------------------------------------------------------------------------- +type: enum +extend: true desc: "Experimental Command Buffer result type enums." name: $x_result_t etors: @@ -26,6 +39,9 @@ etors: - name: ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP value: "0x1002" desc: "Sync point wait list is invalid" + - name: ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP + value: "0x1003" + desc: "Handle to command-buffer command is invalid" --- #-------------------------------------------------------------------------- type: enum extend: true @@ -35,6 +51,21 @@ etors: - name: EXP_COMMAND_BUFFER_DESC desc: $x_exp_command_buffer_desc_t value: "0x1000" + - name: EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC + desc: $x_exp_command_buffer_update_kernel_launch_desc_t + value: "0x1001" + - name: EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC + desc: $x_exp_command_buffer_update_memobj_arg_desc_t + value: "0x1002" + - name: EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC + desc: $x_exp_command_buffer_update_pointer_arg_desc_t + value: "0x1003" + - name: EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC + desc: $x_exp_command_buffer_update_value_arg_desc_t + value: "0x1004" + - name: EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC + desc: $x_exp_command_buffer_update_exec_info_desc_t + value: "0x1005" --- #-------------------------------------------------------------------------- type: enum extend: true @@ -45,6 +76,30 @@ etors: desc: Event created by $xCommandBufferEnqueueExp value: "0x1000" --- #-------------------------------------------------------------------------- +type: enum +desc: "Command-buffer query information type" +class: $xCommandBuffer +name: $x_exp_command_buffer_info_t +typed_etors: True +etors: + - name: REFERENCE_COUNT + desc: | + [uint32_t] Reference count of the command-buffer object. + The reference count returned should be considered immediately stale. + It is unsuitable for general use in applications. This feature is provided for identifying memory leaks. +--- #-------------------------------------------------------------------------- +type: enum +desc: "Command-buffer command query information type" +class: $xCommandBuffer +name: $x_exp_command_buffer_command_info_t +typed_etors: True +etors: + - name: REFERENCE_COUNT + desc: | + [uint32_t] Reference count of the command-buffer object. + The reference count returned should be considered immediately stale. + It is unsuitable for general use in applications. This feature is provided for identifying memory leaks. +--- #-------------------------------------------------------------------------- type: macro desc: "The extension string which defines support for command-buffers which is returned when querying device extensions." name: $X_COMMAND_BUFFER_EXTENSION_STRING_EXP @@ -54,7 +109,118 @@ type: struct desc: "Command-Buffer Descriptor Type" name: $x_exp_command_buffer_desc_t base: $x_base_desc_t -members: [] +members: + - type: $x_bool_t + name: isUpdatable + desc: "[in] Commands in a finalized command-buffer can be updated." +--- #-------------------------------------------------------------------------- +type: struct +desc: "Descriptor type for updating a kernel command memobj argument." +base: $x_base_desc_t +name: $x_exp_command_buffer_update_memobj_arg_desc_t +members: + - type: uint32_t + name: argIndex + desc: "[in] Argument index." + - type: "const ur_kernel_arg_mem_obj_properties_t *" + name: pProperties + desc: "[in][optinal] Pointer to memory object properties." + - type: $x_mem_handle_t + name: hNewMemObjArg + desc: "[in][optional] Handle of memory object to set at argument index." +--- #-------------------------------------------------------------------------- +type: struct +desc: "Descriptor type for updating a kernel command pointer argument." +base: $x_base_desc_t +name: $x_exp_command_buffer_update_pointer_arg_desc_t +members: + - type: uint32_t + name: argIndex + desc: "[in] Argument index." + - type: "const ur_kernel_arg_pointer_properties_t *" + name: pProperties + desc: "[in][optinal] Pointer to USM pointer properties." + - type: "const void *" + name: pNewPointerArg + desc: "[in][optional] USM pointer to memory location holding the argument value to set at argument index." +--- #-------------------------------------------------------------------------- +type: struct +desc: "Descriptor type for updating a kernel command value argument." +base: $x_base_desc_t +name: $x_exp_command_buffer_update_value_arg_desc_t +members: + - type: uint32_t + name: argIndex + desc: "[in] Argument index." + - type: uint32_t + name: argSize + desc: "[in] Argument size." + - type: "const ur_kernel_arg_value_properties_t *" + name: pProperties + desc: "[in][optinal] Pointer to value properties." + - type: "const void *" + name: pNewValueArg + desc: "[in][optional] Argument value representing matching kernel arg type to set at argument index." +--- #-------------------------------------------------------------------------- +type: struct +desc: "Descriptor type for updating kernel command execution info." +base: $x_base_desc_t +name: $x_exp_command_buffer_update_exec_info_desc_t +members: + - type: ur_kernel_exec_info_t + name: propName + desc: "[in] Name of execution attribute." + - type: size_t + name: propSize + desc: "[in] Size of execution attribute." + - type: "const ur_kernel_exec_info_properties_t *" + name: pProperties + desc: "[in][optional] Pointer to execution info properties." + - type: "const void *" + name: pNewExecInfo + desc: "[in] Pointer to memory location holding the execution info value." +--- #-------------------------------------------------------------------------- +type: struct +desc: "Descriptor type for updating a kernel launch command." +base: $x_base_desc_t +name: $x_exp_command_buffer_update_kernel_launch_desc_t +members: + - type: uint32_t + name: numNewMemObjArgs + desc: "[in] Length of pNewMemObjArgList." + - type: uint32_t + name: numNewPointerArgs + desc: "[in] Length of pNewPointerArgList." + - type: uint32_t + name: numNewValueArgs + desc: "[in] Length of pNewValueArgList." + - type: uint32_t + name: numNewExecInfos + desc: "[in] Length of pNewExecInfoList." + - type: uint32_t + name: newWorkDim + desc: "[in] Number of work dimensions in the kernel ND-range, from 1-3." + - type: "const $x_exp_command_buffer_update_memobj_arg_desc_t*" + name: pNewMemObjArgList + desc: "[in][optional][range(0, numNewMemObjArgs)] An array describing the new kernel mem obj arguments for the command." + - type: "const $x_exp_command_buffer_update_pointer_arg_desc_t*" + name: pNewPointerArgList + desc: "[in][optional][range(0, numNewPointerArgs)] An array describing the new kernel pointer arguments for the command." + - type: "const $x_exp_command_buffer_update_value_arg_desc_t*" + name: pNewValueArgList + desc: "[in][optional][range(0, numNewValueArgs)] An array describing the new kernel value arguments for the command." + - type: "const $x_exp_command_buffer_update_exec_info_desc_t*" + name: pNewExecInfoList + desc: "[in][optional][range(0, numNewExecInfos)] An array describing the execution info objects for the command." + - type: "size_t*" + name: pNewGlobalWorkOffset + desc: "[in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned values that describe the offset used to calculate the global ID." + - type: "size_t*" + name: pNewGlobalWorkSize + desc: "[in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned values that describe the number of global work-items." + - type: "size_t*" + name: pNewLocalWorkSize + desc: "[in][optional][range(0, newWorkDim)] Array of newWorkDim unsigned values that describe the number of work-items that make up a work-group. If nullptr, the runtime implementation will choose the work-group size." --- #-------------------------------------------------------------------------- type: typedef desc: "A value that identifies a command inside of a command-buffer, used for defining dependencies between commands in the same command-buffer." @@ -67,26 +233,31 @@ desc: "Handle of Command-Buffer object" class: $xCommandBuffer name: "$x_exp_command_buffer_handle_t" --- #-------------------------------------------------------------------------- +type: handle +desc: "Handle of a Command-Buffer command" +class: $xCommandBuffer +name: "$x_exp_command_buffer_command_handle_t" +--- #-------------------------------------------------------------------------- type: function desc: "Create a Command-Buffer object" class: $xCommandBuffer name: CreateExp decl: static details: - - "Create a command-buffer object" + - "Create a command-buffer object." params: - type: $x_context_handle_t name: hContext - desc: "[in] handle of the context object" + desc: "[in] Handle of the context object." - type: $x_device_handle_t name: hDevice - desc: "[in] handle of the device object" + desc: "[in] Handle of the device object." - type: "const $x_exp_command_buffer_desc_t*" name: pCommandBufferDesc - desc: "[in][optional] CommandBuffer descriptor" + desc: "[in][optional] command-buffer descriptor." - type: "$x_exp_command_buffer_handle_t*" name: phCommandBuffer - desc: "[out] pointer to Command-Buffer handle" + desc: "[out] Pointer to command-Buffer handle." returns: - $X_RESULT_ERROR_INVALID_CONTEXT - $X_RESULT_ERROR_INVALID_DEVICE @@ -100,7 +271,7 @@ name: RetainExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object" + desc: "[in] Handle of the command-buffer object." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_OUT_OF_RESOURCES @@ -113,7 +284,7 @@ name: ReleaseExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object" + desc: "[in] Handle of the command-buffer object." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_OUT_OF_RESOURCES @@ -126,26 +297,26 @@ name: FinalizeExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object" + desc: "[in] Handle of the command-buffer object." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a kernel execution command to a command-buffer object" +desc: "Append a kernel execution command to a command-buffer object." class: $xCommandBuffer name: AppendKernelLaunchExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object" + desc: "[in] Handle of the command-buffer object." - type: $x_kernel_handle_t name: hKernel - desc: "[in] kernel to append" + desc: "[in] Kernel to append." - type: uint32_t name: workDim - desc: "[in] dimension of the kernel execution" + desc: "[in] Dimension of the kernel execution." - type: "const size_t*" name: pGlobalWorkOffset desc: "[in] Offset to use when executing kernel." @@ -163,7 +334,10 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." + - type: "$x_exp_command_buffer_command_handle_t*" + name: phCommand + desc: "[out][optional] Handle to this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_KERNEL @@ -178,13 +352,13 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a USM memcpy command to a command-buffer object" +desc: "Append a USM memcpy command to a command-buffer object." class: $xCommandBuffer name: AppendUSMMemcpyExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: "void*" name: pDst desc: "[in] Location the data will be copied to." @@ -193,7 +367,7 @@ params: desc: "[in] The data to be copied." - type: "size_t" name: size - desc: "[in] The number of bytes to copy" + desc: "[in] The number of bytes to copy." - type: uint32_t name: numSyncPointsInWaitList desc: "[in] The number of sync points in the provided dependency list." @@ -202,7 +376,7 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_SIZE: @@ -217,7 +391,7 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a USM fill command to a command-buffer object" +desc: "Append a USM fill command to a command-buffer object." class: $xCommandBuffer name: AppendUSMFillExp params: @@ -262,13 +436,13 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a memory copy command to a command-buffer object" +desc: "Append a memory copy command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferCopyExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_mem_handle_t name: hSrcMem desc: "[in] The data to be copied." @@ -292,7 +466,7 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -304,25 +478,25 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a memory write command to a command-buffer object" +desc: "Append a memory write command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferWriteExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_mem_handle_t name: hBuffer - desc: "[in] handle of the buffer object." + desc: "[in] Handle of the buffer object." - type: "size_t" name: offset - desc: "[in] offset in bytes in the buffer object." + desc: "[in] Offset in bytes in the buffer object." - type: "size_t" name: size - desc: "[in] size in bytes of data being written." + desc: "[in] Size in bytes of data being written." - type: "const void*" name: pSrc - desc: "[in] pointer to host memory where data is to be written from." + desc: "[in] Pointer to host memory where data is to be written from." - type: uint32_t name: numSyncPointsInWaitList desc: "[in] The number of sync points in the provided dependency list." @@ -331,7 +505,7 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -343,25 +517,25 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a memory read command to a command-buffer object" +desc: "Append a memory read command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferReadExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_mem_handle_t name: hBuffer - desc: "[in] handle of the buffer object." + desc: "[in] Handle of the buffer object." - type: "size_t" name: offset - desc: "[in] offset in bytes in the buffer object." + desc: "[in] Offset in bytes in the buffer object." - type: "size_t" name: size - desc: "[in] size in bytes of data being written." + desc: "[in] Size in bytes of data being written." - type: "void*" name: pDst - desc: "[in] pointer to host memory where data is to be written to." + desc: "[in] Pointer to host memory where data is to be written to." - type: uint32_t name: numSyncPointsInWaitList desc: "[in] The number of sync points in the provided dependency list." @@ -370,7 +544,7 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -382,13 +556,13 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a rectangular memory copy command to a command-buffer object" +desc: "Append a rectangular memory copy command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferCopyRectExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_mem_handle_t name: hSrcMem desc: "[in] The data to be copied." @@ -424,7 +598,7 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: $x_exp_command_buffer_sync_point_t* name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -436,16 +610,16 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a rectangular memory write command to a command-buffer object" +desc: "Append a rectangular memory write command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferWriteRectExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_mem_handle_t name: hBuffer - desc: "[in] handle of the buffer object." + desc: "[in] Handle of the buffer object." - type: $x_rect_offset_t name: bufferOffset desc: "[in] 3D offset in the buffer." @@ -457,19 +631,19 @@ params: desc: "[in] 3D rectangular region descriptor: width, height, depth." - type: "size_t" name: bufferRowPitch - desc: "[in] length of each row in bytes in the buffer object." + desc: "[in] Length of each row in bytes in the buffer object." - type: "size_t" name: bufferSlicePitch - desc: "[in] length of each 2D slice in bytes in the buffer object being written." + desc: "[in] Length of each 2D slice in bytes in the buffer object being written." - type: "size_t" name: hostRowPitch - desc: "[in] length of each row in bytes in the host memory region pointed to by pSrc." + desc: "[in] Length of each row in bytes in the host memory region pointed to by pSrc." - type: "size_t" name: hostSlicePitch - desc: "[in] length of each 2D slice in bytes in the host memory region pointed to by pSrc." + desc: "[in] Length of each 2D slice in bytes in the host memory region pointed to by pSrc." - type: "void*" name: pSrc - desc: "[in] pointer to host memory where data is to be written from." + desc: "[in] Pointer to host memory where data is to be written from." - type: uint32_t name: numSyncPointsInWaitList desc: "[in] The number of sync points in the provided dependency list." @@ -478,7 +652,7 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: $x_exp_command_buffer_sync_point_t* name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -490,16 +664,16 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a rectangular memory read command to a command-buffer object" +desc: "Append a rectangular memory read command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferReadRectExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_mem_handle_t name: hBuffer - desc: "[in] handle of the buffer object." + desc: "[in] Handle of the buffer object." - type: $x_rect_offset_t name: bufferOffset desc: "[in] 3D offset in the buffer." @@ -511,19 +685,19 @@ params: desc: "[in] 3D rectangular region descriptor: width, height, depth." - type: "size_t" name: bufferRowPitch - desc: "[in] length of each row in bytes in the buffer object." + desc: "[in] Length of each row in bytes in the buffer object." - type: "size_t" name: bufferSlicePitch - desc: "[in] length of each 2D slice in bytes in the buffer object being read." + desc: "[in] Length of each 2D slice in bytes in the buffer object being read." - type: "size_t" name: hostRowPitch - desc: "[in] length of each row in bytes in the host memory region pointed to by pDst." + desc: "[in] Length of each row in bytes in the host memory region pointed to by pDst." - type: "size_t" name: hostSlicePitch - desc: "[in] length of each 2D slice in bytes in the host memory region pointed to by pDst." + desc: "[in] Length of each 2D slice in bytes in the host memory region pointed to by pDst." - type: "void*" name: pDst - desc: "[in] pointer to host memory where data is to be read into." + desc: "[in] Pointer to host memory where data is to be read into." - type: uint32_t name: numSyncPointsInWaitList desc: "[in] The number of sync points in the provided dependency list." @@ -532,7 +706,7 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: $x_exp_command_buffer_sync_point_t* name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -544,7 +718,7 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a memory fill command to a command-buffer object" +desc: "Append a memory fill command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferFillExp params: @@ -588,12 +762,12 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a USM Prefetch command to a command-buffer object" +desc: "Append a USM Prefetch command to a command-buffer object." class: $xCommandBuffer name: AppendUSMPrefetchExp details: - - "Prefetching may not be supported for all devices or allocation types. If memory prefetching - is not supported, the prefetch hint will be ignored." + - "Prefetching may not be supported for all devices or allocation types. If + memory prefetching is not supported, the prefetch hint will be ignored." params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer @@ -630,12 +804,13 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a USM Advise command to a command-buffer object" +desc: "Append a USM Advise command to a command-buffer object." class: $xCommandBuffer name: AppendUSMAdviseExp details: - - "Not all memory advice hints may be supported for all devices or allocation types. - If a memory advice hint is not supported, it will be ignored." + - "Not all memory advice hints may be supported for all devices or + allocation types. If a memory advice hint is not supported, it will be + ignored." params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer @@ -678,18 +853,18 @@ name: EnqueueExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_queue_handle_t name: hQueue - desc: "[in] the queue to submit this command-buffer for execution." + desc: "[in] The queue to submit this command-buffer for execution." - type: uint32_t name: numEventsInWaitList - desc: "[in] size of the event wait list" + desc: "[in] Size of the event wait list." - type: "const $x_event_handle_t*" name: phEventWaitList desc: | [in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the command-buffer execution. - If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + If nullptr, the numEventsInWaitList must be 0, indicating no wait events. - type: $x_event_handle_t* name: phEvent desc: | @@ -704,3 +879,124 @@ returns: - "If event objects in phEventWaitList are not valid events." - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES +--- #-------------------------------------------------------------------------- +type: function +desc: "Increment the command object's reference count." +class: $xCommandBuffer +name: RetainCommandExp +params: + - type: $x_exp_command_buffer_command_handle_t + name: hCommand + desc: "[in] Handle of the command-buffer command." +returns: + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP + - $X_RESULT_ERROR_OUT_OF_RESOURCES + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY +--- #-------------------------------------------------------------------------- +type: function +desc: "Decrement the command object's reference count and delete the command object if the reference count becomes zero." +class: $xCommandBuffer +name: ReleaseCommandExp +params: + - type: $x_exp_command_buffer_command_handle_t + name: hCommand + desc: "[in] Handle of the command-buffer command." +returns: + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP + - $X_RESULT_ERROR_OUT_OF_RESOURCES + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY +--- #-------------------------------------------------------------------------- +type: function +desc: "Update a kernel launch command in a finalized command-buffer." +class: $xCommandBuffer +name: UpdateKernelLaunchExp +params: + - type: $x_exp_command_buffer_command_handle_t + name: hCommand + desc: "[in] Handle of the command-buffer kernel command to update." + - type: "const $x_exp_command_buffer_update_kernel_launch_desc_t*" + name: pUpdateKernelLaunch + desc: "[in] Struct defining how the kernel command is to be updated." + +returns: + - $X_RESULT_ERROR_UNSUPPORTED_FEATURE: + - "If update functionality is not supported by the device." + - $X_RESULT_ERROR_INVALID_OPERATION: + - "If $x_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to." + - "If the command-buffer `hCommand` belongs to has not been finalized." + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP + - $X_RESULT_ERROR_INVALID_MEM_OBJECT + - $X_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX + - $X_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE + - $X_RESULT_ERROR_INVALID_ENUMERATION + - $X_RESULT_ERROR_INVALID_WORK_DIMENSION + - $X_RESULT_ERROR_INVALID_WORK_GROUP_SIZE + - $X_RESULT_ERROR_INVALID_VALUE + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY + - $X_RESULT_ERROR_OUT_OF_RESOURCES +--- #-------------------------------------------------------------------------- +type: function +desc: "Get command-buffer object information." +class: $xCommandBuffer +name: GetInfoExp +params: + - type: $x_exp_command_buffer_handle_t + name: hCommandBuffer + desc: "[in] handle of the command-buffer object" + - type: $x_exp_command_buffer_info_t + name: propName + desc: "[in] the name of the command-buffer property to query" + - type: size_t + name: propSize + desc: "[in] size in bytes of the command-buffer property value" + - type: void* + name: pPropValue + desc: "[out][optional][typename(propName, propSize)] value of the command-buffer property" + - type: size_t* + name: pPropSizeRet + desc: "[out][optional] bytes returned in command-buffer property" +returns: + - $X_RESULT_ERROR_UNSUPPORTED_ENUMERATION: + - "If `propName` is not supported by the adapter." + - $X_RESULT_ERROR_INVALID_SIZE: + - "`propSize == 0 && pPropValue != NULL`" + - "If `propSize` is less than the real number of bytes needed to return the info." + - $X_RESULT_ERROR_INVALID_NULL_POINTER: + - "`propSize != 0 && pPropValue == NULL`" + - "`pPropValue == NULL && pPropSizeRet == NULL`" + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP + - $X_RESULT_ERROR_OUT_OF_RESOURCES + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY +--- #-------------------------------------------------------------------------- +type: function +desc: "Get command-buffer object information." +class: $xCommandBuffer +name: CommandGetInfoExp +params: + - type: $x_exp_command_buffer_command_handle_t + name: hCommand + desc: "[in] handle of the command-buffer command object" + - type: $x_exp_command_buffer_command_info_t + name: propName + desc: "[in] the name of the command-buffer command property to query" + - type: size_t + name: propSize + desc: "[in] size in bytes of the command-buffer command property value" + - type: void* + name: pPropValue + desc: "[out][optional][typename(propName, propSize)] value of the command-buffer command property" + - type: size_t* + name: pPropSizeRet + desc: "[out][optional] bytes returned in command-buffer command property" +returns: + - $X_RESULT_ERROR_UNSUPPORTED_ENUMERATION: + - "If `propName` is not supported by the adapter." + - $X_RESULT_ERROR_INVALID_SIZE: + - "`propSize == 0 && pPropValue != NULL`" + - "If `propSize` is less than the real number of bytes needed to return the info." + - $X_RESULT_ERROR_INVALID_NULL_POINTER: + - "`propSize != 0 && pPropValue == NULL`" + - "`pPropValue == NULL && pPropSizeRet == NULL`" + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP + - $X_RESULT_ERROR_OUT_OF_RESOURCES + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY diff --git a/scripts/core/registry.yml b/scripts/core/registry.yml index 6195cd4980..363531580f 100644 --- a/scripts/core/registry.yml +++ b/scripts/core/registry.yml @@ -502,6 +502,9 @@ etors: - name: ADAPTER_GET_INFO desc: Enumerator for $xAdapterGetInfo value: '181' +- name: COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP + desc: Enumerator for $xCommandBufferUpdateKernelLaunchExp + value: '182' - name: PROGRAM_BUILD_EXP desc: Enumerator for $xProgramBuildExp value: '197' @@ -559,6 +562,18 @@ etors: - name: KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP desc: Enumerator for $xKernelSuggestMaxCooperativeGroupCountExp value: '215' +- name: COMMAND_BUFFER_RETAIN_COMMAND_EXP + desc: Enumerator for $xCommandBufferRetainCommandExp + value: '216' +- name: COMMAND_BUFFER_RELEASE_COMMAND_EXP + desc: Enumerator for $xCommandBufferReleaseCommandExp + value: '217' +- name: COMMAND_BUFFER_GET_INFO_EXP + desc: Enumerator for $xCommandBufferGetInfoExp + value: '218' +- name: COMMAND_BUFFER_COMMAND_GET_INFO_EXP + desc: Enumerator for $xCommandBufferCommandGetInfoExp + value: '219' --- type: enum desc: Defines structure types diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index a65530a1f1..3f7970df53 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -19,12 +19,38 @@ #include +namespace { +ur_result_t +commandBufferReleaseInternal(ur_exp_command_buffer_handle_t CommandBuffer) { + if (CommandBuffer->decrementInternalReferenceCount() != 0) { + return UR_RESULT_SUCCESS; + } + + delete CommandBuffer; + return UR_RESULT_SUCCESS; +} + +ur_result_t +commandHandleReleaseInternal(ur_exp_command_buffer_command_handle_t Command) { + if (Command->decrementInternalReferenceCount() != 0) { + return UR_RESULT_SUCCESS; + } + + // Decrement parent command-buffer internal ref count + commandBufferReleaseInternal(Command->CommandBuffer); + + delete Command; + return UR_RESULT_SUCCESS; +} +} // end anonymous namespace + ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_( - ur_context_handle_t hContext, ur_device_handle_t hDevice) - : Context(hContext), Device(hDevice), CudaGraph{nullptr}, - CudaGraphExec{nullptr}, RefCount{1}, NextSyncPoint{0} { - urContextRetain(hContext); - urDeviceRetain(hDevice); + ur_context_handle_t Context, ur_device_handle_t Device, bool IsUpdatable) + : Context(Context), Device(Device), + IsUpdatable(IsUpdatable), CudaGraph{nullptr}, CudaGraphExec{nullptr}, + RefCountInternal{1}, RefCountExternal{1}, NextSyncPoint{0} { + urContextRetain(Context); + urDeviceRetain(Device); } /// The ur_exp_command_buffer_handle_t_ destructor releases @@ -43,6 +69,33 @@ ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() { cuGraphExecDestroy(CudaGraphExec); } +ur_exp_command_buffer_command_handle_t_:: + ur_exp_command_buffer_command_handle_t_( + ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel, + std::shared_ptr Node, CUDA_KERNEL_NODE_PARAMS Params, + uint32_t WorkDim, const size_t *GlobalWorkOffsetPtr, + const size_t *GlobalWorkSizePtr, const size_t *LocalWorkSizePtr) + : CommandBuffer(CommandBuffer), Kernel(Kernel), Node(Node), Params(Params), + WorkDim(WorkDim), RefCountInternal(1), RefCountExternal(1) { + CommandBuffer->incrementInternalReferenceCount(); + + const size_t CopySize = sizeof(size_t) * WorkDim; + std::memcpy(GlobalWorkOffset, GlobalWorkOffsetPtr, CopySize); + std::memcpy(GlobalWorkSize, GlobalWorkSizePtr, CopySize); + // Local work size may be nullptr + if (LocalWorkSizePtr) { + std::memcpy(LocalWorkSize, LocalWorkSizePtr, CopySize); + } else { + std::memset(LocalWorkSize, 0, sizeof(size_t) * 3); + } + + if (WorkDim < 3) { + const size_t ZeroSize = sizeof(size_t) * (3 - WorkDim); + std::memset(GlobalWorkOffset + WorkDim, 0, ZeroSize); + std::memset(GlobalWorkSize + WorkDim, 0, ZeroSize); + } +} + /// Helper function for finding the Cuda Nodes associated with the /// commands in a command-buffer, each event is pointed to by a sync-point in /// the wait list. @@ -136,7 +189,7 @@ static ur_result_t enqueueCommandBufferFillHelper( // Get sync point and register the cuNode with it. *SyncPoint = - CommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + CommandBuffer->addSyncPoint(std::make_shared(GraphNode)); } else { // CUDA has no memset functions that allow setting values more than 4 @@ -174,7 +227,7 @@ static ur_result_t enqueueCommandBufferFillHelper( CommandBuffer->Device->getContext())); // Get sync point and register the cuNode with it. - *SyncPoint = CommandBuffer->AddSyncPoint( + *SyncPoint = CommandBuffer->addSyncPoint( std::make_shared(GraphNode)); } } @@ -188,10 +241,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_exp_command_buffer_desc_t *pCommandBufferDesc, ur_exp_command_buffer_handle_t *phCommandBuffer) { - (void)pCommandBufferDesc; + + const bool IsUpdatable = + pCommandBufferDesc ? pCommandBufferDesc->isUpdatable : false; try { - *phCommandBuffer = new ur_exp_command_buffer_handle_t_(hContext, hDevice); + *phCommandBuffer = + new ur_exp_command_buffer_handle_t_(hContext, hDevice, IsUpdatable); } catch (const std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; } catch (...) { @@ -209,17 +265,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer) { - hCommandBuffer->incrementReferenceCount(); + hCommandBuffer->incrementInternalReferenceCount(); + hCommandBuffer->incrementExternalReferenceCount(); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) { - if (hCommandBuffer->decrementReferenceCount() != 0) - return UR_RESULT_SUCCESS; + if (hCommandBuffer->decrementExternalReferenceCount() == 0) { + // External ref count has reached zero, internal release of created + // commands. + for (auto Command : hCommandBuffer->CommandHandles) { + commandHandleReleaseInternal(Command); + } + } - delete hCommandBuffer; - return UR_RESULT_SUCCESS; + return commandBufferReleaseInternal(hCommandBuffer); } UR_APIEXPORT ur_result_t UR_APICALL @@ -250,7 +311,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + ur_exp_command_buffer_sync_point_t *pSyncPoint, + ur_exp_command_buffer_command_handle_t *phCommand) { // Preconditions UR_ASSERT(hCommandBuffer->Context == hKernel->getContext(), UR_RESULT_ERROR_INVALID_KERNEL); @@ -277,7 +339,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( DepsList.data(), DepsList.size())); // Get sync point and register the cuNode with it. - *pSyncPoint = hCommandBuffer->AddSyncPoint( + *pSyncPoint = hCommandBuffer->addSyncPoint( std::make_shared(GraphNode)); } catch (ur_result_t Err) { Result = Err; @@ -324,8 +386,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( hKernel->clearLocalSize(); // Get sync point and register the cuNode with it. - *pSyncPoint = - hCommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + auto NodeSP = std::make_shared(GraphNode); + if (pSyncPoint) { + *pSyncPoint = hCommandBuffer->addSyncPoint(NodeSP); + } + + auto NewCommand = new ur_exp_command_buffer_command_handle_t_{ + hCommandBuffer, hKernel, NodeSP, NodeParams, + workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize}; + + NewCommand->incrementInternalReferenceCount(); + hCommandBuffer->CommandHandles.push_back(NewCommand); + + if (phCommand) { + *phCommand = NewCommand; + } + } catch (ur_result_t Err) { Result = Err; } @@ -359,7 +435,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( // Get sync point and register the cuNode with it. *pSyncPoint = - hCommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + hCommandBuffer->addSyncPoint(std::make_shared(GraphNode)); } catch (ur_result_t Err) { Result = Err; } @@ -403,7 +479,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( // Get sync point and register the cuNode with it. *pSyncPoint = - hCommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + hCommandBuffer->addSyncPoint(std::make_shared(GraphNode)); } catch (ur_result_t Err) { Result = Err; } @@ -444,7 +520,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( // Get sync point and register the cuNode with it. *pSyncPoint = - hCommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + hCommandBuffer->addSyncPoint(std::make_shared(GraphNode)); } catch (ur_result_t Err) { Result = Err; } @@ -482,7 +558,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( // Get sync point and register the cuNode with it. *pSyncPoint = - hCommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + hCommandBuffer->addSyncPoint(std::make_shared(GraphNode)); } catch (ur_result_t Err) { Result = Err; } @@ -519,7 +595,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( // Get sync point and register the cuNode with it. *pSyncPoint = - hCommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + hCommandBuffer->addSyncPoint(std::make_shared(GraphNode)); } catch (ur_result_t Err) { Result = Err; } @@ -561,7 +637,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( // Get sync point and register the cuNode with it. *pSyncPoint = - hCommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + hCommandBuffer->addSyncPoint(std::make_shared(GraphNode)); } catch (ur_result_t Err) { Result = Err; } @@ -603,7 +679,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( // Get sync point and register the cuNode with it. *pSyncPoint = - hCommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + hCommandBuffer->addSyncPoint(std::make_shared(GraphNode)); } catch (ur_result_t Err) { Result = Err; } @@ -633,7 +709,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( // Get sync point and register the cuNode with it. *pSyncPoint = - hCommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + hCommandBuffer->addSyncPoint(std::make_shared(GraphNode)); setErrorMessage("Prefetch hint ignored and replaced with empty node as " "prefetch is not supported by CUDA Graph backend", @@ -668,7 +744,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( // Get sync point and register the cuNode with it. *pSyncPoint = - hCommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + hCommandBuffer->addSyncPoint(std::make_shared(GraphNode)); setErrorMessage("Memory advice ignored and replaced with empty node as " "memory advice is not supported by CUDA Graph backend", @@ -762,3 +838,190 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( return Result; } + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainCommandExp( + ur_exp_command_buffer_command_handle_t hCommand) { + hCommand->incrementExternalReferenceCount(); + hCommand->incrementInternalReferenceCount(); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( + ur_exp_command_buffer_command_handle_t hCommand) { + hCommand->decrementExternalReferenceCount(); + return commandHandleReleaseInternal(hCommand); +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t hCommand, + const ur_exp_command_buffer_update_kernel_launch_desc_t + *pUpdateKernelLaunch) { + // Update requires command-buffer to be finalized + ur_exp_command_buffer_handle_t CommandBuffer = hCommand->CommandBuffer; + if (!CommandBuffer->CudaGraphExec) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + + // Update requires command-buffer to be created with update enabled + if (!CommandBuffer->IsUpdatable) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + + // Kernel corresponding to the command to update + ur_kernel_handle_t Kernel = hCommand->Kernel; + + // Update pointer arguments to the kernel + uint32_t NumPointerArgs = pUpdateKernelLaunch->numNewPointerArgs; + const ur_exp_command_buffer_update_pointer_arg_desc_t *ArgPointerList = + pUpdateKernelLaunch->pNewPointerArgList; + for (uint32_t i = 0; i < NumPointerArgs; i++) { + const auto &PointerArgDesc = ArgPointerList[i]; + uint32_t ArgIndex = PointerArgDesc.argIndex; + const void *ArgValue = PointerArgDesc.pNewPointerArg; + + ur_result_t Result = UR_RESULT_SUCCESS; + try { + Kernel->setKernelArg(ArgIndex, sizeof(ArgValue), ArgValue); + } catch (ur_result_t Err) { + Result = Err; + return Result; + } + } + + // Update memobj arguments to the kernel + uint32_t NumMemobjArgs = pUpdateKernelLaunch->numNewMemObjArgs; + const ur_exp_command_buffer_update_memobj_arg_desc_t *ArgMemobjList = + pUpdateKernelLaunch->pNewMemObjArgList; + for (uint32_t i = 0; i < NumMemobjArgs; i++) { + const auto &MemobjArgDesc = ArgMemobjList[i]; + uint32_t ArgIndex = MemobjArgDesc.argIndex; + ur_mem_handle_t ArgValue = MemobjArgDesc.hNewMemObjArg; + + ur_result_t Result = UR_RESULT_SUCCESS; + try { + if (ArgValue == nullptr) { + Kernel->setKernelArg(ArgIndex, 0, nullptr); + } else { + CUdeviceptr CuPtr = std::get(ArgValue->Mem).get(); + Kernel->setKernelArg(ArgIndex, sizeof(CUdeviceptr), (void *)&CuPtr); + } + } catch (ur_result_t Err) { + Result = Err; + return Result; + } + } + + // Update value arguments to the kernel + uint32_t NumValueArgs = pUpdateKernelLaunch->numNewValueArgs; + const ur_exp_command_buffer_update_value_arg_desc_t *ArgValueList = + pUpdateKernelLaunch->pNewValueArgList; + for (uint32_t i = 0; i < NumValueArgs; i++) { + const auto &ValueArgDesc = ArgValueList[i]; + uint32_t ArgIndex = ValueArgDesc.argIndex; + size_t ArgSize = ValueArgDesc.argSize; + const void *ArgValue = ValueArgDesc.pNewValueArg; + + ur_result_t Result = UR_RESULT_SUCCESS; + + try { + Kernel->setKernelArg(ArgIndex, ArgSize, ArgValue); + } catch (ur_result_t Err) { + Result = Err; + return Result; + } + } + + // Set the updated ND range + const uint32_t NewWorkDim = pUpdateKernelLaunch->newWorkDim; + if (NewWorkDim != 0) { + UR_ASSERT(NewWorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + UR_ASSERT(NewWorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + hCommand->WorkDim = NewWorkDim; + } + + if (pUpdateKernelLaunch->pNewGlobalWorkOffset) { + hCommand->setGlobalOffset(pUpdateKernelLaunch->pNewGlobalWorkOffset); + } + + if (pUpdateKernelLaunch->pNewGlobalWorkSize) { + hCommand->setGlobalSize(pUpdateKernelLaunch->pNewGlobalWorkSize); + } + + if (pUpdateKernelLaunch->pNewLocalWorkSize) { + hCommand->setLocalSize(pUpdateKernelLaunch->pNewLocalWorkSize); + } + + size_t *GlobalWorkOffset = hCommand->GlobalWorkOffset; + size_t *GlobalWorkSize = hCommand->GlobalWorkSize; + + const bool ProvidedLocalSize = hCommand->LocalWorkSize[0] != 0 || + hCommand->LocalWorkSize[1] != 0 || + hCommand->LocalWorkSize[2] != 0; + // If no worksize is provided make sure we pass nullptr to setKernelParams so + // it can guess the local work size. + size_t *LocalWorkSize = ProvidedLocalSize ? hCommand->LocalWorkSize : nullptr; + uint32_t WorkDim = hCommand->WorkDim; + + // Set the number of threads per block to the number of threads per warp + // by default unless user has provided a better number + size_t ThreadsPerBlock[3] = {32u, 1u, 1u}; + size_t BlocksPerGrid[3] = {1u, 1u, 1u}; + CUfunction CuFunc = Kernel->get(); + ur_context_handle_t Context = CommandBuffer->Context; + ur_device_handle_t Device = CommandBuffer->Device; + auto Result = setKernelParams(Context, Device, WorkDim, GlobalWorkOffset, + GlobalWorkSize, LocalWorkSize, Kernel, CuFunc, + ThreadsPerBlock, BlocksPerGrid); + if (Result != UR_RESULT_SUCCESS) { + return Result; + } + + CUDA_KERNEL_NODE_PARAMS &Params = hCommand->Params; + + Params.func = CuFunc; + Params.gridDimX = BlocksPerGrid[0]; + Params.gridDimY = BlocksPerGrid[1]; + Params.gridDimZ = BlocksPerGrid[2]; + Params.blockDimX = ThreadsPerBlock[0]; + Params.blockDimY = ThreadsPerBlock[1]; + Params.blockDimZ = ThreadsPerBlock[2]; + Params.sharedMemBytes = Kernel->getLocalSize(); + Params.kernelParams = const_cast(Kernel->getArgIndices().data()); + + CUgraphNode Node = *(hCommand->Node); + CUgraphExec CudaGraphExec = CommandBuffer->CudaGraphExec; + UR_CHECK_ERROR(cuGraphExecKernelNodeSetParams(CudaGraphExec, Node, &Params)); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( + ur_exp_command_buffer_handle_t hCommandBuffer, + ur_exp_command_buffer_info_t propName, size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + + switch (propName) { + case UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT: + return ReturnValue(hCommandBuffer->getExternalReferenceCount()); + default: + assert(!"Command-buffer info request not implemented"); + } + + return UR_RESULT_ERROR_INVALID_ENUMERATION; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( + ur_exp_command_buffer_command_handle_t hCommand, + ur_exp_command_buffer_command_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) { + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + + switch (propName) { + case UR_EXP_COMMAND_BUFFER_COMMAND_INFO_REFERENCE_COUNT: + return ReturnValue(hCommand->getExternalReferenceCount()); + default: + assert(!"Command-buffer command info request not implemented"); + } + + return UR_RESULT_ERROR_INVALID_ENUMERATION; +} diff --git a/source/adapters/cuda/command_buffer.hpp b/source/adapters/cuda/command_buffer.hpp index 18264410c4..e2b09059bf 100644 --- a/source/adapters/cuda/command_buffer.hpp +++ b/source/adapters/cuda/command_buffer.hpp @@ -175,20 +175,91 @@ static inline const char *getUrResultString(ur_result_t Result) { fprintf(stderr, "UR <--- %s(%s)\n", #Call, getUrResultString(Result)); \ } +// Handle to a kernel command. +// +// Struct that stores all the information related to a kernel command in a +// command-buffer, such that the command can be recreated. When handles can +// be returned from other command types this struct will need refactored. +struct ur_exp_command_buffer_command_handle_t_ { + ur_exp_command_buffer_command_handle_t_( + ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel, + std::shared_ptr Node, CUDA_KERNEL_NODE_PARAMS Params, + uint32_t WorkDim, const size_t *GlobalWorkOffsetPtr, + const size_t *GlobalWorkSizePtr, const size_t *LocalWorkSizePtr); + + void setGlobalOffset(const size_t *GlobalWorkOffsetPtr) { + const size_t CopySize = sizeof(size_t) * WorkDim; + std::memcpy(GlobalWorkOffset, GlobalWorkOffsetPtr, CopySize); + if (WorkDim < 3) { + const size_t ZeroSize = sizeof(size_t) * (3 - WorkDim); + std::memset(GlobalWorkOffset + WorkDim, 0, ZeroSize); + } + } + + void setGlobalSize(const size_t *GlobalWorkSizePtr) { + const size_t CopySize = sizeof(size_t) * WorkDim; + std::memcpy(GlobalWorkSize, GlobalWorkSizePtr, CopySize); + if (WorkDim < 3) { + const size_t ZeroSize = sizeof(size_t) * (3 - WorkDim); + std::memset(GlobalWorkSize + WorkDim, 0, ZeroSize); + } + } + + void setLocalSize(const size_t *LocalWorkSizePtr) { + const size_t CopySize = sizeof(size_t) * WorkDim; + std::memcpy(LocalWorkSize, LocalWorkSizePtr, CopySize); + if (WorkDim < 3) { + const size_t ZeroSize = sizeof(size_t) * (3 - WorkDim); + std::memset(LocalWorkSize + WorkDim, 0, ZeroSize); + } + } + + uint32_t incrementInternalReferenceCount() noexcept { + return ++RefCountInternal; + } + uint32_t decrementInternalReferenceCount() noexcept { + return --RefCountInternal; + } + + uint32_t incrementExternalReferenceCount() noexcept { + return ++RefCountExternal; + } + uint32_t decrementExternalReferenceCount() noexcept { + return --RefCountExternal; + } + uint32_t getExternalReferenceCount() const noexcept { + return RefCountExternal; + } + + ur_exp_command_buffer_handle_t CommandBuffer; + ur_kernel_handle_t Kernel; + std::shared_ptr Node; + CUDA_KERNEL_NODE_PARAMS Params; + + uint32_t WorkDim; + size_t GlobalWorkOffset[3]; + size_t GlobalWorkSize[3]; + size_t LocalWorkSize[3]; + +private: + std::atomic_uint32_t RefCountInternal; + std::atomic_uint32_t RefCountExternal; +}; + struct ur_exp_command_buffer_handle_t_ { - ur_exp_command_buffer_handle_t_(ur_context_handle_t hContext, - ur_device_handle_t hDevice); + ur_exp_command_buffer_handle_t_(ur_context_handle_t Context, + ur_device_handle_t Device, bool IsUpdatable); ~ur_exp_command_buffer_handle_t_(); - void RegisterSyncPoint(ur_exp_command_buffer_sync_point_t SyncPoint, + void registerSyncPoint(ur_exp_command_buffer_sync_point_t SyncPoint, std::shared_ptr CuNode) { SyncPoints[SyncPoint] = std::move(CuNode); NextSyncPoint++; } - ur_exp_command_buffer_sync_point_t GetNextSyncPoint() const { + ur_exp_command_buffer_sync_point_t getNextSyncPoint() const { return NextSyncPoint; } @@ -196,23 +267,46 @@ struct ur_exp_command_buffer_handle_t_ { // @param CuNode Node to register as next sync point // @return Pointer to the sync that registers the Node ur_exp_command_buffer_sync_point_t - AddSyncPoint(std::shared_ptr CuNode) { + addSyncPoint(std::shared_ptr CuNode) { ur_exp_command_buffer_sync_point_t SyncPoint = NextSyncPoint; - RegisterSyncPoint(SyncPoint, std::move(CuNode)); + registerSyncPoint(SyncPoint, std::move(CuNode)); return SyncPoint; } + uint32_t incrementInternalReferenceCount() noexcept { + return ++RefCountInternal; + } + uint32_t decrementInternalReferenceCount() noexcept { + return --RefCountInternal; + } + uint32_t getInternalReferenceCount() const noexcept { + return RefCountInternal; + } + + uint32_t incrementExternalReferenceCount() noexcept { + return ++RefCountExternal; + } + uint32_t decrementExternalReferenceCount() noexcept { + return --RefCountExternal; + } + uint32_t getExternalReferenceCount() const noexcept { + return RefCountExternal; + } + // UR context associated with this command-buffer ur_context_handle_t Context; // Device associated with this command buffer ur_device_handle_t Device; + // Whether commands in the command-buffer can be updated + bool IsUpdatable; // Cuda Graph handle CUgraph CudaGraph; // Cuda Graph Exec handle CUgraphExec CudaGraphExec; // Atomic variable counting the number of reference to this command_buffer // using std::atomic prevents data race when incrementing/decrementing. - std::atomic_uint32_t RefCount; + std::atomic_uint32_t RefCountInternal; + std::atomic_uint32_t RefCountExternal; // Map of sync_points to ur_events std::unordered_map CommandHandles; }; diff --git a/source/adapters/cuda/device.cpp b/source/adapters/cuda/device.cpp index 49feced282..b33ad6c792 100644 --- a/source/adapters/cuda/device.cpp +++ b/source/adapters/cuda/device.cpp @@ -1035,6 +1035,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: + return ReturnValue(true); + default: break; } diff --git a/source/adapters/cuda/ur_interface_loader.cpp b/source/adapters/cuda/ur_interface_loader.cpp index f31ffe6d87..670d6c02e9 100644 --- a/source/adapters/cuda/ur_interface_loader.cpp +++ b/source/adapters/cuda/ur_interface_loader.cpp @@ -294,6 +294,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnAppendMemBufferFillExp = urCommandBufferAppendMemBufferFillExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; + pDdiTable->pfnUpdateKernelLaunchExp = urCommandBufferUpdateKernelLaunchExp; + pDdiTable->pfnGetInfoExp = urCommandBufferGetInfoExp; + pDdiTable->pfnCommandGetInfoExp = urCommandBufferCommandGetInfoExp; + pDdiTable->pfnReleaseCommandExp = urCommandBufferReleaseCommandExp; + pDdiTable->pfnRetainCommandExp = urCommandBufferRetainCommandExp; return retVal; } diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index 54a6fa2f4e..0d239bc432 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -46,7 +46,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t, ur_kernel_handle_t, uint32_t, const size_t *, const size_t *, const size_t *, uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *) { + ur_exp_command_buffer_sync_point_t *, + ur_exp_command_buffer_command_handle_t *) { detail::ur::die("Experimental Command-buffer feature is not " "implemented for HIP adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; @@ -162,3 +163,31 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( "implemented for HIP adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferRetainCommandExp(ur_exp_command_buffer_command_handle_t) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferReleaseCommandExp(ur_exp_command_buffer_command_handle_t) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t, + const ur_exp_command_buffer_update_kernel_launch_desc_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( + ur_exp_command_buffer_handle_t, ur_exp_command_buffer_info_t, size_t, + void *, size_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( + ur_exp_command_buffer_command_handle_t, + ur_exp_command_buffer_command_info_t, size_t, void *, size_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/hip/device.cpp b/source/adapters/hip/device.cpp index 08d4e87ae4..bc67fcee71 100644 --- a/source/adapters/hip/device.cpp +++ b/source/adapters/hip/device.cpp @@ -841,6 +841,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_ASYNC_BARRIER: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: + return ReturnValue(false); + default: break; } diff --git a/source/adapters/hip/ur_interface_loader.cpp b/source/adapters/hip/ur_interface_loader.cpp index 7707e78425..cc7a5e1e9f 100644 --- a/source/adapters/hip/ur_interface_loader.cpp +++ b/source/adapters/hip/ur_interface_loader.cpp @@ -291,6 +291,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; pDdiTable->pfnAppendMemBufferFillExp = urCommandBufferAppendMemBufferFillExp; + pDdiTable->pfnUpdateKernelLaunchExp = urCommandBufferUpdateKernelLaunchExp; + pDdiTable->pfnGetInfoExp = urCommandBufferGetInfoExp; + pDdiTable->pfnCommandGetInfoExp = urCommandBufferCommandGetInfoExp; + pDdiTable->pfnReleaseCommandExp = urCommandBufferReleaseCommandExp; + pDdiTable->pfnRetainCommandExp = urCommandBufferRetainCommandExp; return retVal; } diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 56ffbe0145..7dc2a42fd6 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -511,7 +511,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const size_t *GlobalWorkSize, const size_t *LocalWorkSize, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, - ur_exp_command_buffer_sync_point_t *SyncPoint) { + ur_exp_command_buffer_sync_point_t *SyncPoint, + ur_exp_command_buffer_command_handle_t *) { // Lock automatically releases when this goes out of scope. std::scoped_lock Lock( Kernel->Mutex, Kernel->Program->Mutex); @@ -980,3 +981,41 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( return UR_RESULT_SUCCESS; } + +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferRetainCommandExp(ur_exp_command_buffer_command_handle_t) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferReleaseCommandExp(ur_exp_command_buffer_command_handle_t) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t, + const ur_exp_command_buffer_update_kernel_launch_desc_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( + ur_exp_command_buffer_handle_t hCommandBuffer, + ur_exp_command_buffer_info_t propName, size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + + switch (propName) { + case UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT: + return ReturnValue(uint32_t{hCommandBuffer->RefCount.load()}); + default: + assert(!"Command-buffer info request not implemented"); + } + + return UR_RESULT_ERROR_INVALID_ENUMERATION; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( + ur_exp_command_buffer_command_handle_t, + ur_exp_command_buffer_command_info_t, size_t, void *, size_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index af80f1905f..918b04400a 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -915,7 +915,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( return ze2urResult(errc); return ReturnValue(UrRootDev); } - + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: + return ReturnValue(true); + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: + return ReturnValue(false); default: urPrint("Unsupported ParamName in urGetDeviceInfo\n"); urPrint("ParamName=%d(0x%x)\n", ParamName, ParamName); diff --git a/source/adapters/level_zero/ur_interface_loader.cpp b/source/adapters/level_zero/ur_interface_loader.cpp index 74d0706b31..6fdf197904 100644 --- a/source/adapters/level_zero/ur_interface_loader.cpp +++ b/source/adapters/level_zero/ur_interface_loader.cpp @@ -341,6 +341,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnAppendMemBufferFillExp = urCommandBufferAppendMemBufferFillExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; + pDdiTable->pfnUpdateKernelLaunchExp = urCommandBufferUpdateKernelLaunchExp; + pDdiTable->pfnGetInfoExp = urCommandBufferGetInfoExp; + pDdiTable->pfnCommandGetInfoExp = urCommandBufferCommandGetInfoExp; + pDdiTable->pfnReleaseCommandExp = urCommandBufferReleaseCommandExp; + pDdiTable->pfnRetainCommandExp = urCommandBufferRetainCommandExp; return retVal; } diff --git a/source/adapters/native_cpu/command_buffer.cpp b/source/adapters/native_cpu/command_buffer.cpp index 50b38c9d52..fde6c03b86 100644 --- a/source/adapters/native_cpu/command_buffer.cpp +++ b/source/adapters/native_cpu/command_buffer.cpp @@ -50,7 +50,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t, ur_kernel_handle_t, uint32_t, const size_t *, const size_t *, const size_t *, uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *) { + ur_exp_command_buffer_sync_point_t *, + ur_exp_command_buffer_command_handle_t *) { detail::ur::die("Experimental Command-buffer feature is not " "implemented for the NativeCPU adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; @@ -162,3 +163,31 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( ur_exp_command_buffer_sync_point_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferRetainCommandExp(ur_exp_command_buffer_command_handle_t) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferReleaseCommandExp(ur_exp_command_buffer_command_handle_t) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t, + const ur_exp_command_buffer_update_kernel_launch_desc_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( + ur_exp_command_buffer_handle_t, ur_exp_command_buffer_info_t, size_t, + void *, size_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( + ur_exp_command_buffer_command_handle_t, + ur_exp_command_buffer_command_info_t, size_t, void *, size_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/native_cpu/device.cpp b/source/adapters/native_cpu/device.cpp index 68dafdfc1c..dfabfb81e5 100644 --- a/source/adapters/native_cpu/device.cpp +++ b/source/adapters/native_cpu/device.cpp @@ -308,6 +308,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, CASE_UR_UNSUPPORTED(UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH); case UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT: return ReturnValue(false); + + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: + return ReturnValue(false); + default: DIE_NO_IMPLEMENTATION; } diff --git a/source/adapters/native_cpu/ur_interface_loader.cpp b/source/adapters/native_cpu/ur_interface_loader.cpp index 9408101927..0c48ee1fb3 100644 --- a/source/adapters/native_cpu/ur_interface_loader.cpp +++ b/source/adapters/native_cpu/ur_interface_loader.cpp @@ -283,6 +283,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendMemBufferWriteRectExp = urCommandBufferAppendMemBufferWriteRectExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; + pDdiTable->pfnUpdateKernelLaunchExp = urCommandBufferUpdateKernelLaunchExp; + pDdiTable->pfnGetInfoExp = urCommandBufferGetInfoExp; + pDdiTable->pfnCommandGetInfoExp = urCommandBufferCommandGetInfoExp; + pDdiTable->pfnReleaseCommandExp = urCommandBufferReleaseCommandExp; + pDdiTable->pfnRetainCommandExp = urCommandBufferRetainCommandExp; return retVal; } diff --git a/source/adapters/null/ur_nullddi.cpp b/source/adapters/null/ur_nullddi.cpp index 439246658a..3ca48743ac 100644 --- a/source/adapters/null/ur_nullddi.cpp +++ b/source/adapters/null/ur_nullddi.cpp @@ -4674,12 +4674,12 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferCreateExp __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. const ur_exp_command_buffer_desc_t - *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor + *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. ur_exp_command_buffer_handle_t - *phCommandBuffer ///< [out] pointer to Command-Buffer handle + *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4703,7 +4703,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( /// @brief Intercept function for urCommandBufferRetainExp __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4724,7 +4724,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( /// @brief Intercept function for urCommandBufferReleaseExp __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4745,7 +4745,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( /// @brief Intercept function for urCommandBufferFinalizeExp __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4766,9 +4766,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// @brief Intercept function for urCommandBufferAppendKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * @@ -4779,8 +4779,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4791,9 +4793,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( result = pfnAppendKernelLaunchExp( hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint); + pSyncPointWaitList, pSyncPoint, phCommand); } else { // generic implementation + if (nullptr != phCommand) { + *phCommand = + reinterpret_cast( + d_context.get()); + } } return result; @@ -4805,16 +4812,16 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// @brief Intercept function for urCommandBufferAppendUSMMemcpyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4873,7 +4880,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -4883,8 +4890,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4908,18 +4915,18 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. const void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4943,17 +4950,17 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4977,7 +4984,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t @@ -4994,8 +5001,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -5020,31 +5027,31 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -5069,29 +5076,29 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -5218,15 +5225,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( /// @brief Intercept function for urCommandBufferEnqueueExp __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_queue_handle_t - hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. @@ -5250,6 +5256,137 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferRetainCommandExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferRetainCommandExp( + ur_exp_command_buffer_command_handle_t + hCommand ///< [in] Handle of the command-buffer command. + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnRetainCommandExp = + d_context.urDdiTable.CommandBufferExp.pfnRetainCommandExp; + if (nullptr != pfnRetainCommandExp) { + result = pfnRetainCommandExp(hCommand); + } else { + // generic implementation + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferReleaseCommandExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( + ur_exp_command_buffer_command_handle_t + hCommand ///< [in] Handle of the command-buffer command. + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnReleaseCommandExp = + d_context.urDdiTable.CommandBufferExp.pfnReleaseCommandExp; + if (nullptr != pfnReleaseCommandExp) { + result = pfnReleaseCommandExp(hCommand); + } else { + // generic implementation + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferUpdateKernelLaunchExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t * + pUpdateKernelLaunch ///< [in] Struct defining how the kernel command is to be updated. + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnUpdateKernelLaunchExp = + d_context.urDdiTable.CommandBufferExp.pfnUpdateKernelLaunchExp; + if (nullptr != pfnUpdateKernelLaunchExp) { + result = pfnUpdateKernelLaunchExp(hCommand, pUpdateKernelLaunch); + } else { + // generic implementation + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferGetInfoExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferGetInfoExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object + ur_exp_command_buffer_info_t + propName, ///< [in] the name of the command-buffer property to query + size_t + propSize, ///< [in] size in bytes of the command-buffer property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the + ///< command-buffer property + size_t * + pPropSizeRet ///< [out][optional] bytes returned in command-buffer property + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnGetInfoExp = d_context.urDdiTable.CommandBufferExp.pfnGetInfoExp; + if (nullptr != pfnGetInfoExp) { + result = pfnGetInfoExp(hCommandBuffer, propName, propSize, pPropValue, + pPropSizeRet); + } else { + // generic implementation + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferCommandGetInfoExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] handle of the command-buffer command object + ur_exp_command_buffer_command_info_t + propName, ///< [in] the name of the command-buffer command property to query + size_t + propSize, ///< [in] size in bytes of the command-buffer command property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the + ///< command-buffer command property + size_t * + pPropSizeRet ///< [out][optional] bytes returned in command-buffer command property + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnCommandGetInfoExp = + d_context.urDdiTable.CommandBufferExp.pfnCommandGetInfoExp; + if (nullptr != pfnCommandGetInfoExp) { + result = pfnCommandGetInfoExp(hCommand, propName, propSize, pPropValue, + pPropSizeRet); + } else { + // generic implementation + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( @@ -5714,6 +5851,17 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnEnqueueExp = driver::urCommandBufferEnqueueExp; + pDdiTable->pfnRetainCommandExp = driver::urCommandBufferRetainCommandExp; + + pDdiTable->pfnReleaseCommandExp = driver::urCommandBufferReleaseCommandExp; + + pDdiTable->pfnUpdateKernelLaunchExp = + driver::urCommandBufferUpdateKernelLaunchExp; + + pDdiTable->pfnGetInfoExp = driver::urCommandBufferGetInfoExp; + + pDdiTable->pfnCommandGetInfoExp = driver::urCommandBufferCommandGetInfoExp; + return result; } catch (...) { return exceptionToResult(std::current_exception()); diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 74cdd8a03d..88c661b4ae 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -104,7 +104,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + ur_exp_command_buffer_sync_point_t *pSyncPoint, + ur_exp_command_buffer_command_handle_t *) { cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); cl_ext::clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR = nullptr; @@ -356,3 +357,67 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( return UR_RESULT_SUCCESS; } + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainCommandExp( + [[maybe_unused]] ur_exp_command_buffer_command_handle_t hCommand) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( + [[maybe_unused]] ur_exp_command_buffer_command_handle_t hCommand) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + [[maybe_unused]] ur_exp_command_buffer_command_handle_t hCommand, + [[maybe_unused]] const ur_exp_command_buffer_update_kernel_launch_desc_t + *pUpdateKernelLaunch) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( + ur_exp_command_buffer_handle_t hCommandBuffer, + ur_exp_command_buffer_info_t propName, size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { + + cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_ext::clGetCommandBufferInfoKHR_fn clGetCommandBufferInfoKHR = nullptr; + cl_int Res = + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clGetCommandBufferInfoKHRCache, + cl_ext::GetCommandBufferInfoName, &clGetCommandBufferInfoKHR); + + if (!clGetCommandBufferInfoKHR || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + if (propName != UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (pPropSizeRet) { + *pPropSizeRet = sizeof(cl_uint); + } + + cl_uint ref_count; + CL_RETURN_ON_FAILURE(clGetCommandBufferInfoKHR( + hCommandBuffer->CLCommandBuffer, CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR, + sizeof(ref_count), &ref_count, nullptr)); + + if (pPropValue) { + if (propSize != sizeof(cl_uint)) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + static_assert(sizeof(cl_uint) == sizeof(uint32_t)); + *static_cast(pPropValue) = static_cast(ref_count); + } + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( + [[maybe_unused]] ur_exp_command_buffer_command_handle_t hCommand, + [[maybe_unused]] ur_exp_command_buffer_command_info_t propName, + [[maybe_unused]] size_t propSize, [[maybe_unused]] void *pPropValue, + [[maybe_unused]] size_t *pPropSizeRet) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/opencl/common.hpp b/source/adapters/opencl/common.hpp index 0cb19694a6..0667cd3d17 100644 --- a/source/adapters/opencl/common.hpp +++ b/source/adapters/opencl/common.hpp @@ -214,6 +214,7 @@ CONSTFIX char CommandCopyBufferName[] = "clCommandCopyBufferKHR"; CONSTFIX char CommandCopyBufferRectName[] = "clCommandCopyBufferRectKHR"; CONSTFIX char CommandFillBufferName[] = "clCommandFillBufferKHR"; CONSTFIX char EnqueueCommandBufferName[] = "clEnqueueCommandBufferKHR"; +CONSTFIX char GetCommandBufferInfoName[] = "clGetCommandBufferInfoKHR"; #undef CONSTFIX @@ -300,6 +301,10 @@ cl_int(CL_API_CALL *)(cl_uint num_queues, cl_command_queue *queues, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event); +using clGetCommandBufferInfoKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)( + cl_command_buffer_khr command_buffer, cl_command_buffer_info_khr param_name, + size_t param_value_size, void *param_value, size_t *param_value_size_ret); + template struct FuncPtrCache { std::map Map; std::mutex Mutex; @@ -338,6 +343,7 @@ struct ExtFuncPtrCacheT { FuncPtrCache clCommandCopyBufferRectKHRCache; FuncPtrCache clCommandFillBufferKHRCache; FuncPtrCache clEnqueueCommandBufferKHRCache; + FuncPtrCache clGetCommandBufferInfoKHRCache; }; // A raw pointer is used here since the lifetime of this map has to be tied to // piTeardown to avoid issues with static destruction order (a user application diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index 5b0d5332db..115b9b2e09 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -951,6 +951,24 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_ASYNC_BARRIER: { return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } + + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: { + cl_device_id Dev = cl_adapter::cast(hDevice); + size_t ExtSize = 0; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize)); + + std::string ExtStr(ExtSize, '\0'); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, ExtSize, + ExtStr.data(), nullptr)); + + std::string SupportedExtensions(ExtStr.c_str()); + return ReturnValue(ExtStr.find("cl_khr_command_buffer") != + std::string::npos); + } + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: { + return ReturnValue(false); + } default: { return UR_RESULT_ERROR_INVALID_ENUMERATION; } diff --git a/source/adapters/opencl/ur_interface_loader.cpp b/source/adapters/opencl/ur_interface_loader.cpp index ac2c33475b..eb64df5c6f 100644 --- a/source/adapters/opencl/ur_interface_loader.cpp +++ b/source/adapters/opencl/ur_interface_loader.cpp @@ -301,6 +301,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnAppendMemBufferFillExp = urCommandBufferAppendMemBufferFillExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; + pDdiTable->pfnUpdateKernelLaunchExp = urCommandBufferUpdateKernelLaunchExp; + pDdiTable->pfnGetInfoExp = urCommandBufferGetInfoExp; + pDdiTable->pfnCommandGetInfoExp = urCommandBufferCommandGetInfoExp; + pDdiTable->pfnReleaseCommandExp = urCommandBufferReleaseCommandExp; + pDdiTable->pfnRetainCommandExp = urCommandBufferRetainCommandExp; return retVal; } diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index 5867d295ae..ca0c67b217 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -5050,12 +5050,12 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferCreateExp __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. const ur_exp_command_buffer_desc_t - *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor + *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. ur_exp_command_buffer_handle_t - *phCommandBuffer ///< [out] pointer to Command-Buffer handle + *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ) { auto pfnCreateExp = context.urDdiTable.CommandBufferExp.pfnCreateExp; @@ -5082,7 +5082,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( /// @brief Intercept function for urCommandBufferRetainExp __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { auto pfnRetainExp = context.urDdiTable.CommandBufferExp.pfnRetainExp; @@ -5107,7 +5107,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( /// @brief Intercept function for urCommandBufferReleaseExp __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { auto pfnReleaseExp = context.urDdiTable.CommandBufferExp.pfnReleaseExp; @@ -5132,7 +5132,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( /// @brief Intercept function for urCommandBufferFinalizeExp __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { auto pfnFinalizeExp = context.urDdiTable.CommandBufferExp.pfnFinalizeExp; @@ -5158,9 +5158,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// @brief Intercept function for urCommandBufferAppendKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * @@ -5171,8 +5171,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { auto pfnAppendKernelLaunchExp = context.urDdiTable.CommandBufferExp.pfnAppendKernelLaunchExp; @@ -5190,15 +5192,16 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( &pLocalWorkSize, &numSyncPointsInWaitList, &pSyncPointWaitList, - &pSyncPoint}; + &pSyncPoint, + &phCommand}; uint64_t instance = context.notify_begin( UR_FUNCTION_COMMAND_BUFFER_APPEND_KERNEL_LAUNCH_EXP, "urCommandBufferAppendKernelLaunchExp", ¶ms); ur_result_t result = pfnAppendKernelLaunchExp( hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, - pSyncPoint); + pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, + phCommand); context.notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_KERNEL_LAUNCH_EXP, "urCommandBufferAppendKernelLaunchExp", ¶ms, &result, @@ -5211,16 +5214,16 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// @brief Intercept function for urCommandBufferAppendUSMMemcpyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendUSMMemcpyExp = context.urDdiTable.CommandBufferExp.pfnAppendUSMMemcpyExp; @@ -5294,7 +5297,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -5304,8 +5307,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferCopyExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyExp; @@ -5343,18 +5346,18 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. const void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferWriteExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteExp; @@ -5391,17 +5394,17 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferReadExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferReadExp; @@ -5438,7 +5441,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t @@ -5455,8 +5458,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferCopyRectExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyRectExp; @@ -5500,31 +5503,31 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferWriteRectExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteRectExp; @@ -5568,29 +5571,29 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferReadRectExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferReadRectExp; @@ -5774,15 +5777,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( /// @brief Intercept function for urCommandBufferEnqueueExp __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_queue_handle_t - hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. @@ -5809,6 +5811,165 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferRetainCommandExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferRetainCommandExp( + ur_exp_command_buffer_command_handle_t + hCommand ///< [in] Handle of the command-buffer command. +) { + auto pfnRetainCommandExp = + context.urDdiTable.CommandBufferExp.pfnRetainCommandExp; + + if (nullptr == pfnRetainCommandExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_command_buffer_retain_command_exp_params_t params = {&hCommand}; + uint64_t instance = + context.notify_begin(UR_FUNCTION_COMMAND_BUFFER_RETAIN_COMMAND_EXP, + "urCommandBufferRetainCommandExp", ¶ms); + + ur_result_t result = pfnRetainCommandExp(hCommand); + + context.notify_end(UR_FUNCTION_COMMAND_BUFFER_RETAIN_COMMAND_EXP, + "urCommandBufferRetainCommandExp", ¶ms, &result, + instance); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferReleaseCommandExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( + ur_exp_command_buffer_command_handle_t + hCommand ///< [in] Handle of the command-buffer command. +) { + auto pfnReleaseCommandExp = + context.urDdiTable.CommandBufferExp.pfnReleaseCommandExp; + + if (nullptr == pfnReleaseCommandExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_command_buffer_release_command_exp_params_t params = {&hCommand}; + uint64_t instance = + context.notify_begin(UR_FUNCTION_COMMAND_BUFFER_RELEASE_COMMAND_EXP, + "urCommandBufferReleaseCommandExp", ¶ms); + + ur_result_t result = pfnReleaseCommandExp(hCommand); + + context.notify_end(UR_FUNCTION_COMMAND_BUFFER_RELEASE_COMMAND_EXP, + "urCommandBufferReleaseCommandExp", ¶ms, &result, + instance); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferUpdateKernelLaunchExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t * + pUpdateKernelLaunch ///< [in] Struct defining how the kernel command is to be updated. +) { + auto pfnUpdateKernelLaunchExp = + context.urDdiTable.CommandBufferExp.pfnUpdateKernelLaunchExp; + + if (nullptr == pfnUpdateKernelLaunchExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_command_buffer_update_kernel_launch_exp_params_t params = { + &hCommand, &pUpdateKernelLaunch}; + uint64_t instance = context.notify_begin( + UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP, + "urCommandBufferUpdateKernelLaunchExp", ¶ms); + + ur_result_t result = + pfnUpdateKernelLaunchExp(hCommand, pUpdateKernelLaunch); + + context.notify_end(UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP, + "urCommandBufferUpdateKernelLaunchExp", ¶ms, &result, + instance); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferGetInfoExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferGetInfoExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object + ur_exp_command_buffer_info_t + propName, ///< [in] the name of the command-buffer property to query + size_t + propSize, ///< [in] size in bytes of the command-buffer property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the + ///< command-buffer property + size_t * + pPropSizeRet ///< [out][optional] bytes returned in command-buffer property +) { + auto pfnGetInfoExp = context.urDdiTable.CommandBufferExp.pfnGetInfoExp; + + if (nullptr == pfnGetInfoExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_command_buffer_get_info_exp_params_t params = { + &hCommandBuffer, &propName, &propSize, &pPropValue, &pPropSizeRet}; + uint64_t instance = + context.notify_begin(UR_FUNCTION_COMMAND_BUFFER_GET_INFO_EXP, + "urCommandBufferGetInfoExp", ¶ms); + + ur_result_t result = pfnGetInfoExp(hCommandBuffer, propName, propSize, + pPropValue, pPropSizeRet); + + context.notify_end(UR_FUNCTION_COMMAND_BUFFER_GET_INFO_EXP, + "urCommandBufferGetInfoExp", ¶ms, &result, instance); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferCommandGetInfoExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] handle of the command-buffer command object + ur_exp_command_buffer_command_info_t + propName, ///< [in] the name of the command-buffer command property to query + size_t + propSize, ///< [in] size in bytes of the command-buffer command property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the + ///< command-buffer command property + size_t * + pPropSizeRet ///< [out][optional] bytes returned in command-buffer command property +) { + auto pfnCommandGetInfoExp = + context.urDdiTable.CommandBufferExp.pfnCommandGetInfoExp; + + if (nullptr == pfnCommandGetInfoExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_command_buffer_command_get_info_exp_params_t params = { + &hCommand, &propName, &propSize, &pPropValue, &pPropSizeRet}; + uint64_t instance = + context.notify_begin(UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP, + "urCommandBufferCommandGetInfoExp", ¶ms); + + ur_result_t result = pfnCommandGetInfoExp(hCommand, propName, propSize, + pPropValue, pPropSizeRet); + + context.notify_end(UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP, + "urCommandBufferCommandGetInfoExp", ¶ms, &result, + instance); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( @@ -6389,6 +6550,25 @@ __urdlllocal ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( dditable.pfnEnqueueExp = pDdiTable->pfnEnqueueExp; pDdiTable->pfnEnqueueExp = ur_tracing_layer::urCommandBufferEnqueueExp; + dditable.pfnRetainCommandExp = pDdiTable->pfnRetainCommandExp; + pDdiTable->pfnRetainCommandExp = + ur_tracing_layer::urCommandBufferRetainCommandExp; + + dditable.pfnReleaseCommandExp = pDdiTable->pfnReleaseCommandExp; + pDdiTable->pfnReleaseCommandExp = + ur_tracing_layer::urCommandBufferReleaseCommandExp; + + dditable.pfnUpdateKernelLaunchExp = pDdiTable->pfnUpdateKernelLaunchExp; + pDdiTable->pfnUpdateKernelLaunchExp = + ur_tracing_layer::urCommandBufferUpdateKernelLaunchExp; + + dditable.pfnGetInfoExp = pDdiTable->pfnGetInfoExp; + pDdiTable->pfnGetInfoExp = ur_tracing_layer::urCommandBufferGetInfoExp; + + dditable.pfnCommandGetInfoExp = pDdiTable->pfnCommandGetInfoExp; + pDdiTable->pfnCommandGetInfoExp = + ur_tracing_layer::urCommandBufferCommandGetInfoExp; + return result; } /////////////////////////////////////////////////////////////////////////////// diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index e4212212b4..5bac5253e0 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -7629,12 +7629,12 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferCreateExp __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. const ur_exp_command_buffer_desc_t - *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor + *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. ur_exp_command_buffer_handle_t - *phCommandBuffer ///< [out] pointer to Command-Buffer handle + *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ) { auto pfnCreateExp = context.urDdiTable.CommandBufferExp.pfnCreateExp; @@ -7676,7 +7676,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( /// @brief Intercept function for urCommandBufferRetainExp __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { auto pfnRetainExp = context.urDdiTable.CommandBufferExp.pfnRetainExp; @@ -7699,7 +7699,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( /// @brief Intercept function for urCommandBufferReleaseExp __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { auto pfnReleaseExp = context.urDdiTable.CommandBufferExp.pfnReleaseExp; @@ -7722,7 +7722,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( /// @brief Intercept function for urCommandBufferFinalizeExp __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { auto pfnFinalizeExp = context.urDdiTable.CommandBufferExp.pfnFinalizeExp; @@ -7745,9 +7745,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// @brief Intercept function for urCommandBufferAppendKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * @@ -7758,8 +7758,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { auto pfnAppendKernelLaunchExp = context.urDdiTable.CommandBufferExp.pfnAppendKernelLaunchExp; @@ -7805,8 +7807,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_result_t result = pfnAppendKernelLaunchExp( hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, - pSyncPoint); + pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, + phCommand); return result; } @@ -7815,16 +7817,16 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// @brief Intercept function for urCommandBufferAppendUSMMemcpyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendUSMMemcpyExp = context.urDdiTable.CommandBufferExp.pfnAppendUSMMemcpyExp; @@ -7939,7 +7941,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -7949,8 +7951,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferCopyExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyExp; @@ -8002,18 +8004,18 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. const void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferWriteExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteExp; @@ -8060,17 +8062,17 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferReadExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferReadExp; @@ -8117,7 +8119,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t @@ -8134,8 +8136,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferCopyRectExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyRectExp; @@ -8188,31 +8190,31 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferWriteRectExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteRectExp; @@ -8260,29 +8262,29 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferReadRectExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferReadRectExp; @@ -8499,15 +8501,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( /// @brief Intercept function for urCommandBufferEnqueueExp __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_queue_handle_t - hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. @@ -8555,6 +8556,184 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferRetainCommandExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferRetainCommandExp( + ur_exp_command_buffer_command_handle_t + hCommand ///< [in] Handle of the command-buffer command. +) { + auto pfnRetainCommandExp = + context.urDdiTable.CommandBufferExp.pfnRetainCommandExp; + + if (nullptr == pfnRetainCommandExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (context.enableParameterValidation) { + if (NULL == hCommand) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + } + + ur_result_t result = pfnRetainCommandExp(hCommand); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferReleaseCommandExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( + ur_exp_command_buffer_command_handle_t + hCommand ///< [in] Handle of the command-buffer command. +) { + auto pfnReleaseCommandExp = + context.urDdiTable.CommandBufferExp.pfnReleaseCommandExp; + + if (nullptr == pfnReleaseCommandExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (context.enableParameterValidation) { + if (NULL == hCommand) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + } + + ur_result_t result = pfnReleaseCommandExp(hCommand); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferUpdateKernelLaunchExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t * + pUpdateKernelLaunch ///< [in] Struct defining how the kernel command is to be updated. +) { + auto pfnUpdateKernelLaunchExp = + context.urDdiTable.CommandBufferExp.pfnUpdateKernelLaunchExp; + + if (nullptr == pfnUpdateKernelLaunchExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (context.enableParameterValidation) { + if (NULL == hCommand) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == pUpdateKernelLaunch) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + } + + ur_result_t result = + pfnUpdateKernelLaunchExp(hCommand, pUpdateKernelLaunch); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferGetInfoExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferGetInfoExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object + ur_exp_command_buffer_info_t + propName, ///< [in] the name of the command-buffer property to query + size_t + propSize, ///< [in] size in bytes of the command-buffer property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the + ///< command-buffer property + size_t * + pPropSizeRet ///< [out][optional] bytes returned in command-buffer property +) { + auto pfnGetInfoExp = context.urDdiTable.CommandBufferExp.pfnGetInfoExp; + + if (nullptr == pfnGetInfoExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (context.enableParameterValidation) { + if (NULL == hCommandBuffer) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (propSize != 0 && pPropValue == NULL) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (pPropValue == NULL && pPropSizeRet == NULL) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT < propName) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (propSize == 0 && pPropValue != NULL) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + } + + ur_result_t result = pfnGetInfoExp(hCommandBuffer, propName, propSize, + pPropValue, pPropSizeRet); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferCommandGetInfoExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] handle of the command-buffer command object + ur_exp_command_buffer_command_info_t + propName, ///< [in] the name of the command-buffer command property to query + size_t + propSize, ///< [in] size in bytes of the command-buffer command property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the + ///< command-buffer command property + size_t * + pPropSizeRet ///< [out][optional] bytes returned in command-buffer command property +) { + auto pfnCommandGetInfoExp = + context.urDdiTable.CommandBufferExp.pfnCommandGetInfoExp; + + if (nullptr == pfnCommandGetInfoExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (context.enableParameterValidation) { + if (NULL == hCommand) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (propSize != 0 && pPropValue == NULL) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (pPropValue == NULL && pPropSizeRet == NULL) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_EXP_COMMAND_BUFFER_COMMAND_INFO_REFERENCE_COUNT < propName) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (propSize == 0 && pPropValue != NULL) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + } + + ur_result_t result = pfnCommandGetInfoExp(hCommand, propName, propSize, + pPropValue, pPropSizeRet); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( @@ -9263,6 +9442,25 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( dditable.pfnEnqueueExp = pDdiTable->pfnEnqueueExp; pDdiTable->pfnEnqueueExp = ur_validation_layer::urCommandBufferEnqueueExp; + dditable.pfnRetainCommandExp = pDdiTable->pfnRetainCommandExp; + pDdiTable->pfnRetainCommandExp = + ur_validation_layer::urCommandBufferRetainCommandExp; + + dditable.pfnReleaseCommandExp = pDdiTable->pfnReleaseCommandExp; + pDdiTable->pfnReleaseCommandExp = + ur_validation_layer::urCommandBufferReleaseCommandExp; + + dditable.pfnUpdateKernelLaunchExp = pDdiTable->pfnUpdateKernelLaunchExp; + pDdiTable->pfnUpdateKernelLaunchExp = + ur_validation_layer::urCommandBufferUpdateKernelLaunchExp; + + dditable.pfnGetInfoExp = pDdiTable->pfnGetInfoExp; + pDdiTable->pfnGetInfoExp = ur_validation_layer::urCommandBufferGetInfoExp; + + dditable.pfnCommandGetInfoExp = pDdiTable->pfnCommandGetInfoExp; + pDdiTable->pfnCommandGetInfoExp = + ur_validation_layer::urCommandBufferCommandGetInfoExp; + return result; } diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index 43326ff5d0..822448cae8 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -32,6 +32,7 @@ ur_exp_image_mem_factory_t ur_exp_image_mem_factory; ur_exp_interop_mem_factory_t ur_exp_interop_mem_factory; ur_exp_interop_semaphore_factory_t ur_exp_interop_semaphore_factory; ur_exp_command_buffer_factory_t ur_exp_command_buffer_factory; +ur_exp_command_buffer_command_factory_t ur_exp_command_buffer_command_factory; /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urAdapterGet @@ -6526,12 +6527,12 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferCreateExp __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. const ur_exp_command_buffer_desc_t - *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor + *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. ur_exp_command_buffer_handle_t - *phCommandBuffer ///< [out] pointer to Command-Buffer handle + *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6572,7 +6573,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( /// @brief Intercept function for urCommandBufferRetainExp __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6600,7 +6601,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( /// @brief Intercept function for urCommandBufferReleaseExp __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6628,7 +6629,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( /// @brief Intercept function for urCommandBufferFinalizeExp __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6656,9 +6657,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// @brief Intercept function for urCommandBufferAppendKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * @@ -6669,8 +6670,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6693,10 +6696,26 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( hKernel = reinterpret_cast(hKernel)->handle; // forward to device-platform - result = pfnAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, - pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint); + result = pfnAppendKernelLaunchExp( + hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, + phCommand); + + if (UR_RESULT_SUCCESS != result) { + return result; + } + + try { + // convert platform handle to loader handle + if (nullptr != phCommand) { + *phCommand = + reinterpret_cast( + ur_exp_command_buffer_command_factory.getInstance( + *phCommand, dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } return result; } @@ -6705,16 +6724,16 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// @brief Intercept function for urCommandBufferAppendUSMMemcpyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6787,7 +6806,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -6797,8 +6816,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6835,18 +6854,18 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. const void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6880,17 +6899,17 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6924,7 +6943,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t @@ -6941,8 +6960,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6980,31 +6999,31 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -7039,29 +7058,29 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -7222,15 +7241,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( /// @brief Intercept function for urCommandBufferEnqueueExp __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_queue_handle_t - hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. @@ -7283,6 +7301,194 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferRetainCommandExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferRetainCommandExp( + ur_exp_command_buffer_command_handle_t + hCommand ///< [in] Handle of the command-buffer command. +) { + ur_result_t result = UR_RESULT_SUCCESS; + + // extract platform's function pointer table + auto dditable = + reinterpret_cast(hCommand) + ->dditable; + auto pfnRetainCommandExp = + dditable->ur.CommandBufferExp.pfnRetainCommandExp; + if (nullptr == pfnRetainCommandExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hCommand = + reinterpret_cast(hCommand) + ->handle; + + // forward to device-platform + result = pfnRetainCommandExp(hCommand); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferReleaseCommandExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( + ur_exp_command_buffer_command_handle_t + hCommand ///< [in] Handle of the command-buffer command. +) { + ur_result_t result = UR_RESULT_SUCCESS; + + // extract platform's function pointer table + auto dditable = + reinterpret_cast(hCommand) + ->dditable; + auto pfnReleaseCommandExp = + dditable->ur.CommandBufferExp.pfnReleaseCommandExp; + if (nullptr == pfnReleaseCommandExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hCommand = + reinterpret_cast(hCommand) + ->handle; + + // forward to device-platform + result = pfnReleaseCommandExp(hCommand); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferUpdateKernelLaunchExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t * + pUpdateKernelLaunch ///< [in] Struct defining how the kernel command is to be updated. +) { + ur_result_t result = UR_RESULT_SUCCESS; + + // extract platform's function pointer table + auto dditable = + reinterpret_cast(hCommand) + ->dditable; + auto pfnUpdateKernelLaunchExp = + dditable->ur.CommandBufferExp.pfnUpdateKernelLaunchExp; + if (nullptr == pfnUpdateKernelLaunchExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hCommand = + reinterpret_cast(hCommand) + ->handle; + + // Deal with any struct parameters that have handle members we need to convert. + auto pUpdateKernelLaunchLocal = *pUpdateKernelLaunch; + + std::vector + pUpdateKernelLaunchpNewMemObjArgList; + for (uint32_t i = 0; i < pUpdateKernelLaunch->numNewMemObjArgs; i++) { + ur_exp_command_buffer_update_memobj_arg_desc_t NewRangeStruct = + pUpdateKernelLaunchLocal.pNewMemObjArgList[i]; + if (NewRangeStruct.hNewMemObjArg) { + NewRangeStruct.hNewMemObjArg = reinterpret_cast( + NewRangeStruct.hNewMemObjArg) + ->handle; + } + + pUpdateKernelLaunchpNewMemObjArgList.push_back(NewRangeStruct); + } + pUpdateKernelLaunchLocal.pNewMemObjArgList = + pUpdateKernelLaunchpNewMemObjArgList.data(); + + // Now that we've converted all the members update the param pointers + pUpdateKernelLaunch = &pUpdateKernelLaunchLocal; + + // forward to device-platform + result = pfnUpdateKernelLaunchExp(hCommand, pUpdateKernelLaunch); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferGetInfoExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferGetInfoExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object + ur_exp_command_buffer_info_t + propName, ///< [in] the name of the command-buffer property to query + size_t + propSize, ///< [in] size in bytes of the command-buffer property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the + ///< command-buffer property + size_t * + pPropSizeRet ///< [out][optional] bytes returned in command-buffer property +) { + ur_result_t result = UR_RESULT_SUCCESS; + + // extract platform's function pointer table + auto dditable = + reinterpret_cast(hCommandBuffer) + ->dditable; + auto pfnGetInfoExp = dditable->ur.CommandBufferExp.pfnGetInfoExp; + if (nullptr == pfnGetInfoExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hCommandBuffer = + reinterpret_cast(hCommandBuffer) + ->handle; + + // forward to device-platform + result = pfnGetInfoExp(hCommandBuffer, propName, propSize, pPropValue, + pPropSizeRet); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferCommandGetInfoExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] handle of the command-buffer command object + ur_exp_command_buffer_command_info_t + propName, ///< [in] the name of the command-buffer command property to query + size_t + propSize, ///< [in] size in bytes of the command-buffer command property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the + ///< command-buffer command property + size_t * + pPropSizeRet ///< [out][optional] bytes returned in command-buffer command property +) { + ur_result_t result = UR_RESULT_SUCCESS; + + // extract platform's function pointer table + auto dditable = + reinterpret_cast(hCommand) + ->dditable; + auto pfnCommandGetInfoExp = + dditable->ur.CommandBufferExp.pfnCommandGetInfoExp; + if (nullptr == pfnCommandGetInfoExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hCommand = + reinterpret_cast(hCommand) + ->handle; + + // forward to device-platform + result = pfnCommandGetInfoExp(hCommand, propName, propSize, pPropValue, + pPropSizeRet); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( @@ -7898,6 +8104,15 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = ur_loader::urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnEnqueueExp = ur_loader::urCommandBufferEnqueueExp; + pDdiTable->pfnRetainCommandExp = + ur_loader::urCommandBufferRetainCommandExp; + pDdiTable->pfnReleaseCommandExp = + ur_loader::urCommandBufferReleaseCommandExp; + pDdiTable->pfnUpdateKernelLaunchExp = + ur_loader::urCommandBufferUpdateKernelLaunchExp; + pDdiTable->pfnGetInfoExp = ur_loader::urCommandBufferGetInfoExp; + pDdiTable->pfnCommandGetInfoExp = + ur_loader::urCommandBufferCommandGetInfoExp; } else { // return pointers directly to platform's DDIs *pDdiTable = ur_loader::context->platforms.front() diff --git a/source/loader/ur_ldrddi.hpp b/source/loader/ur_ldrddi.hpp index 4edbabbd8b..d98b99a655 100644 --- a/source/loader/ur_ldrddi.hpp +++ b/source/loader/ur_ldrddi.hpp @@ -92,6 +92,12 @@ using ur_exp_command_buffer_factory_t = singleton_factory_t; +using ur_exp_command_buffer_command_object_t = + object_t; +using ur_exp_command_buffer_command_factory_t = + singleton_factory_t; + } // namespace ur_loader #endif /* UR_LOADER_LDRDDI_H */ diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index cd4a70c91e..1e9400aaa4 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -7016,7 +7016,7 @@ ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /// @brief Create a Command-Buffer object /// /// @details -/// - Create a command-buffer object +/// - Create a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7033,12 +7033,12 @@ ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. const ur_exp_command_buffer_desc_t - *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor + *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. ur_exp_command_buffer_handle_t - *phCommandBuffer ///< [out] pointer to Command-Buffer handle + *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ) try { auto pfnCreateExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnCreateExp; @@ -7066,7 +7066,7 @@ ur_result_t UR_APICALL urCommandBufferCreateExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY ur_result_t UR_APICALL urCommandBufferRetainExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) try { auto pfnRetainExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnRetainExp; @@ -7095,7 +7095,7 @@ ur_result_t UR_APICALL urCommandBufferRetainExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY ur_result_t UR_APICALL urCommandBufferReleaseExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) try { auto pfnReleaseExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnReleaseExp; @@ -7124,7 +7124,7 @@ ur_result_t UR_APICALL urCommandBufferReleaseExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferFinalizeExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) try { auto pfnFinalizeExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnFinalizeExp; @@ -7138,7 +7138,7 @@ ur_result_t UR_APICALL urCommandBufferFinalizeExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a kernel execution command to a command-buffer object +/// @brief Append a kernel execution command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7165,9 +7165,9 @@ ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * @@ -7178,8 +7178,10 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) try { auto pfnAppendKernelLaunchExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendKernelLaunchExp; @@ -7190,13 +7192,13 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( return pfnAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint); + pSyncPointWaitList, pSyncPoint, phCommand); } catch (...) { return exceptionToResult(std::current_exception()); } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM memcpy command to a command-buffer object +/// @brief Append a USM memcpy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7221,16 +7223,16 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendUSMMemcpyExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendUSMMemcpyExp; @@ -7246,7 +7248,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM fill command to a command-buffer object +/// @brief Append a USM fill command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7301,7 +7303,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory copy command to a command-buffer object +/// @brief Append a memory copy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7322,7 +7324,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -7332,8 +7334,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendMemBufferCopyExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyExp; @@ -7349,7 +7351,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory write command to a command-buffer object +/// @brief Append a memory write command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7371,18 +7373,18 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. const void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendMemBufferWriteExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteExp; @@ -7398,7 +7400,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory read command to a command-buffer object +/// @brief Append a memory read command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7420,17 +7422,17 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendMemBufferReadExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendMemBufferReadExp; @@ -7446,7 +7448,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory copy command to a command-buffer object +/// @brief Append a rectangular memory copy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7467,7 +7469,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t @@ -7484,8 +7486,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendMemBufferCopyRectExp = ur_lib::context->urDdiTable.CommandBufferExp @@ -7503,7 +7505,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory write command to a command-buffer object +/// @brief Append a rectangular memory write command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7525,31 +7527,31 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendMemBufferWriteRectExp = ur_lib::context->urDdiTable.CommandBufferExp @@ -7567,7 +7569,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory read command to a command-buffer object +/// @brief Append a rectangular memory read command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7589,29 +7591,29 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendMemBufferReadRectExp = ur_lib::context->urDdiTable.CommandBufferExp @@ -7629,7 +7631,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory fill command to a command-buffer object +/// @brief Append a memory fill command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7681,7 +7683,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM Prefetch command to a command-buffer object +/// @brief Append a USM Prefetch command to a command-buffer object. /// /// @details /// - Prefetching may not be supported for all devices or allocation types. @@ -7737,7 +7739,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM Advise command to a command-buffer object +/// @brief Append a USM Advise command to a command-buffer object. /// /// @details /// - Not all memory advice hints may be supported for all devices or @@ -7814,15 +7816,14 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_queue_handle_t - hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. @@ -7839,6 +7840,203 @@ ur_result_t UR_APICALL urCommandBufferEnqueueExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Increment the command object's reference count. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +ur_result_t UR_APICALL urCommandBufferRetainCommandExp( + ur_exp_command_buffer_command_handle_t + hCommand ///< [in] Handle of the command-buffer command. + ) try { + auto pfnRetainCommandExp = + ur_lib::context->urDdiTable.CommandBufferExp.pfnRetainCommandExp; + if (nullptr == pfnRetainCommandExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnRetainCommandExp(hCommand); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Decrement the command object's reference count and delete the command +/// object if the reference count becomes zero. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( + ur_exp_command_buffer_command_handle_t + hCommand ///< [in] Handle of the command-buffer command. + ) try { + auto pfnReleaseCommandExp = + ur_lib::context->urDdiTable.CommandBufferExp.pfnReleaseCommandExp; + if (nullptr == pfnReleaseCommandExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnReleaseCommandExp(hCommand); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Update a kernel launch command in a finalized command-buffer. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pUpdateKernelLaunch` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If update functionality is not supported by the device. +/// - ::UR_RESULT_ERROR_INVALID_OPERATION +/// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. +/// + If the command-buffer `hCommand` belongs to has not been finalized. +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX +/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION +/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t * + pUpdateKernelLaunch ///< [in] Struct defining how the kernel command is to be updated. + ) try { + auto pfnUpdateKernelLaunchExp = + ur_lib::context->urDdiTable.CommandBufferExp.pfnUpdateKernelLaunchExp; + if (nullptr == pfnUpdateKernelLaunchExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnUpdateKernelLaunchExp(hCommand, pUpdateKernelLaunch); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Get command-buffer object information. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT < propName` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION +/// + If `propName` is not supported by the adapter. +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `propSize == 0 && pPropValue != NULL` +/// + If `propSize` is less than the real number of bytes needed to return the info. +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `propSize != 0 && pPropValue == NULL` +/// + `pPropValue == NULL && pPropSizeRet == NULL` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +ur_result_t UR_APICALL urCommandBufferGetInfoExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object + ur_exp_command_buffer_info_t + propName, ///< [in] the name of the command-buffer property to query + size_t + propSize, ///< [in] size in bytes of the command-buffer property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the + ///< command-buffer property + size_t * + pPropSizeRet ///< [out][optional] bytes returned in command-buffer property + ) try { + auto pfnGetInfoExp = + ur_lib::context->urDdiTable.CommandBufferExp.pfnGetInfoExp; + if (nullptr == pfnGetInfoExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnGetInfoExp(hCommandBuffer, propName, propSize, pPropValue, + pPropSizeRet); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Get command-buffer object information. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_EXP_COMMAND_BUFFER_COMMAND_INFO_REFERENCE_COUNT < propName` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION +/// + If `propName` is not supported by the adapter. +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `propSize == 0 && pPropValue != NULL` +/// + If `propSize` is less than the real number of bytes needed to return the info. +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `propSize != 0 && pPropValue == NULL` +/// + `pPropValue == NULL && pPropSizeRet == NULL` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] handle of the command-buffer command object + ur_exp_command_buffer_command_info_t + propName, ///< [in] the name of the command-buffer command property to query + size_t + propSize, ///< [in] size in bytes of the command-buffer command property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the + ///< command-buffer command property + size_t * + pPropSizeRet ///< [out][optional] bytes returned in command-buffer command property + ) try { + auto pfnCommandGetInfoExp = + ur_lib::context->urDdiTable.CommandBufferExp.pfnCommandGetInfoExp; + if (nullptr == pfnCommandGetInfoExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnCommandGetInfoExp(hCommand, propName, propSize, pPropValue, + pPropSizeRet); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Enqueue a command to execute a cooperative kernel /// diff --git a/source/loader/ur_print.cpp b/source/loader/ur_print.cpp index 4503999c50..1d8b3ca9af 100644 --- a/source/loader/ur_print.cpp +++ b/source/loader/ur_print.cpp @@ -938,6 +938,22 @@ ur_result_t urPrintExpInteropSemaphoreDesc( return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t urPrintExpCommandBufferInfo(enum ur_exp_command_buffer_info_t value, + char *buffer, const size_t buff_size, + size_t *out_size) { + std::stringstream ss; + ss << value; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintExpCommandBufferCommandInfo( + enum ur_exp_command_buffer_command_info_t value, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << value; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintExpCommandBufferDesc(const struct ur_exp_command_buffer_desc_t params, char *buffer, const size_t buff_size, @@ -947,6 +963,46 @@ urPrintExpCommandBufferDesc(const struct ur_exp_command_buffer_desc_t params, return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t urPrintExpCommandBufferUpdateMemobjArgDesc( + const struct ur_exp_command_buffer_update_memobj_arg_desc_t params, + char *buffer, const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintExpCommandBufferUpdatePointerArgDesc( + const struct ur_exp_command_buffer_update_pointer_arg_desc_t params, + char *buffer, const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintExpCommandBufferUpdateValueArgDesc( + const struct ur_exp_command_buffer_update_value_arg_desc_t params, + char *buffer, const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintExpCommandBufferUpdateExecInfoDesc( + const struct ur_exp_command_buffer_update_exec_info_desc_t params, + char *buffer, const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintExpCommandBufferUpdateKernelLaunchDesc( + const struct ur_exp_command_buffer_update_kernel_launch_desc_t params, + char *buffer, const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintExpPeerInfo(enum ur_exp_peer_info_t value, char *buffer, const size_t buff_size, size_t *out_size) { std::stringstream ss; @@ -1279,6 +1335,46 @@ ur_result_t urPrintCommandBufferEnqueueExpParams( return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t urPrintCommandBufferRetainCommandExpParams( + const struct ur_command_buffer_retain_command_exp_params_t *params, + char *buffer, const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintCommandBufferReleaseCommandExpParams( + const struct ur_command_buffer_release_command_exp_params_t *params, + char *buffer, const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintCommandBufferUpdateKernelLaunchExpParams( + const struct ur_command_buffer_update_kernel_launch_exp_params_t *params, + char *buffer, const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintCommandBufferGetInfoExpParams( + const struct ur_command_buffer_get_info_exp_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintCommandBufferCommandGetInfoExpParams( + const struct ur_command_buffer_command_get_info_exp_params_t *params, + char *buffer, const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintContextCreateParams(const struct ur_context_create_params_t *params, char *buffer, const size_t buff_size, diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 26f24aba08..5ee68ce529 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -5952,7 +5952,7 @@ ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /// @brief Create a Command-Buffer object /// /// @details -/// - Create a command-buffer object +/// - Create a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -5969,12 +5969,12 @@ ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. const ur_exp_command_buffer_desc_t - *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor + *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. ur_exp_command_buffer_handle_t - *phCommandBuffer ///< [out] pointer to Command-Buffer handle + *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; @@ -5995,7 +5995,7 @@ ur_result_t UR_APICALL urCommandBufferCreateExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY ur_result_t UR_APICALL urCommandBufferRetainExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; @@ -6017,7 +6017,7 @@ ur_result_t UR_APICALL urCommandBufferRetainExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY ur_result_t UR_APICALL urCommandBufferReleaseExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; @@ -6039,14 +6039,14 @@ ur_result_t UR_APICALL urCommandBufferReleaseExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferFinalizeExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a kernel execution command to a command-buffer object +/// @brief Append a kernel execution command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6073,9 +6073,9 @@ ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * @@ -6086,15 +6086,17 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM memcpy command to a command-buffer object +/// @brief Append a USM memcpy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6119,23 +6121,23 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM fill command to a command-buffer object +/// @brief Append a USM fill command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6181,7 +6183,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory copy command to a command-buffer object +/// @brief Append a memory copy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6202,7 +6204,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -6212,15 +6214,15 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory write command to a command-buffer object +/// @brief Append a memory write command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6242,25 +6244,25 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. const void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory read command to a command-buffer object +/// @brief Append a memory read command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6282,24 +6284,24 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory copy command to a command-buffer object +/// @brief Append a rectangular memory copy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6320,7 +6322,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t @@ -6337,15 +6339,15 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory write command to a command-buffer object +/// @brief Append a rectangular memory write command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6367,38 +6369,38 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory read command to a command-buffer object +/// @brief Append a rectangular memory read command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6420,36 +6422,36 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory fill command to a command-buffer object +/// @brief Append a memory fill command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6492,7 +6494,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM Prefetch command to a command-buffer object +/// @brief Append a USM Prefetch command to a command-buffer object. /// /// @details /// - Prefetching may not be supported for all devices or allocation types. @@ -6539,7 +6541,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM Advise command to a command-buffer object +/// @brief Append a USM Advise command to a command-buffer object. /// /// @details /// - Not all memory advice hints may be supported for all devices or @@ -6607,15 +6609,14 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_queue_handle_t - hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. @@ -6624,6 +6625,166 @@ ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Increment the command object's reference count. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +ur_result_t UR_APICALL urCommandBufferRetainCommandExp( + ur_exp_command_buffer_command_handle_t + hCommand ///< [in] Handle of the command-buffer command. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Decrement the command object's reference count and delete the command +/// object if the reference count becomes zero. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( + ur_exp_command_buffer_command_handle_t + hCommand ///< [in] Handle of the command-buffer command. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Update a kernel launch command in a finalized command-buffer. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pUpdateKernelLaunch` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If update functionality is not supported by the device. +/// - ::UR_RESULT_ERROR_INVALID_OPERATION +/// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. +/// + If the command-buffer `hCommand` belongs to has not been finalized. +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX +/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION +/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t * + pUpdateKernelLaunch ///< [in] Struct defining how the kernel command is to be updated. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Get command-buffer object information. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT < propName` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION +/// + If `propName` is not supported by the adapter. +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `propSize == 0 && pPropValue != NULL` +/// + If `propSize` is less than the real number of bytes needed to return the info. +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `propSize != 0 && pPropValue == NULL` +/// + `pPropValue == NULL && pPropSizeRet == NULL` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +ur_result_t UR_APICALL urCommandBufferGetInfoExp( + ur_exp_command_buffer_handle_t + hCommandBuffer, ///< [in] handle of the command-buffer object + ur_exp_command_buffer_info_t + propName, ///< [in] the name of the command-buffer property to query + size_t + propSize, ///< [in] size in bytes of the command-buffer property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the + ///< command-buffer property + size_t * + pPropSizeRet ///< [out][optional] bytes returned in command-buffer property +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Get command-buffer object information. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_EXP_COMMAND_BUFFER_COMMAND_INFO_REFERENCE_COUNT < propName` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION +/// + If `propName` is not supported by the adapter. +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `propSize == 0 && pPropValue != NULL` +/// + If `propSize` is less than the real number of bytes needed to return the info. +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `propSize != 0 && pPropValue == NULL` +/// + `pPropValue == NULL && pPropSizeRet == NULL` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] handle of the command-buffer command object + ur_exp_command_buffer_command_info_t + propName, ///< [in] the name of the command-buffer command property to query + size_t + propSize, ///< [in] size in bytes of the command-buffer command property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the + ///< command-buffer command property + size_t * + pPropSizeRet ///< [out][optional] bytes returned in command-buffer command property +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Enqueue a command to execute a cooperative kernel /// diff --git a/test/conformance/CMakeLists.txt b/test/conformance/CMakeLists.txt index 44f9c43efb..ac48f3a313 100644 --- a/test/conformance/CMakeLists.txt +++ b/test/conformance/CMakeLists.txt @@ -118,6 +118,7 @@ if(UR_DPCXX) add_subdirectory(kernel) add_subdirectory(program) add_subdirectory(enqueue) + add_subdirectory(exp_command_buffer) else() message(WARNING "UR_DPCXX is not defined, the following conformance test executables \ diff --git a/test/conformance/device_code/CMakeLists.txt b/test/conformance/device_code/CMakeLists.txt index 202109c3b5..450733d5ed 100644 --- a/test/conformance/device_code/CMakeLists.txt +++ b/test/conformance/device_code/CMakeLists.txt @@ -94,6 +94,9 @@ add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/image_copy.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/mean.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/spec_constant.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/usm_ll.cpp) +add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/saxpy.cpp) +add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/saxpy_usm.cpp) +add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/indexers_usm.cpp) set(KERNEL_HEADER ${UR_CONFORMANCE_DEVICE_BINARIES_DIR}/kernel_entry_points.h) add_custom_command(OUTPUT ${KERNEL_HEADER} diff --git a/test/conformance/device_code/indexers_usm.cpp b/test/conformance/device_code/indexers_usm.cpp new file mode 100644 index 0000000000..76b0751730 --- /dev/null +++ b/test/conformance/device_code/indexers_usm.cpp @@ -0,0 +1,38 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include + +int main() { + const cl::sycl::range<3> global_range(8, 8, 8); + const cl::sycl::range<3> local_range(2, 2, 2); + const cl::sycl::id<3> global_offset(4, 4, 4); + const cl::sycl::nd_range<3> nd_range(global_range, local_range, + global_offset); + + cl::sycl::queue sycl_queue; + const size_t elements_per_work_item = 6; + int *ptr = cl::sycl::malloc_shared(global_range[0] * global_range[1] * + global_range[2] * + elements_per_work_item, + sycl_queue); + + sycl_queue.submit([&](cl::sycl::handler &cgh) { + cgh.parallel_for( + nd_range, [ptr](cl::sycl::nd_item<3> index) { + int *wi_ptr = + ptr + index.get_global_linear_id() * elements_per_work_item; + + wi_ptr[0] = index.get_global_id(0); + wi_ptr[1] = index.get_global_id(1); + wi_ptr[2] = index.get_global_id(2); + + wi_ptr[3] = index.get_local_id(0); + wi_ptr[4] = index.get_local_id(1); + wi_ptr[5] = index.get_local_id(2); + }); + }); + return 0; +} diff --git a/test/conformance/device_code/saxpy.cpp b/test/conformance/device_code/saxpy.cpp new file mode 100644 index 0000000000..593e8e2435 --- /dev/null +++ b/test/conformance/device_code/saxpy.cpp @@ -0,0 +1,33 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include + +int main() { + size_t array_size = 16; + std::vector X(array_size, 1); + std::vector Y(array_size, 2); + std::vector Z(array_size, 0); + uint32_t A = 42; + auto x_buff = + cl::sycl::buffer(X.data(), cl::sycl::range<1>(array_size)); + auto y_buff = + cl::sycl::buffer(Y.data(), cl::sycl::range<1>(array_size)); + auto z_buff = + cl::sycl::buffer(Z.data(), cl::sycl::range<1>(array_size)); + + cl::sycl::queue sycl_queue; + sycl_queue.submit([&](cl::sycl::handler &cgh) { + auto x_acc = x_buff.get_access(cgh); + auto y_acc = y_buff.get_access(cgh); + auto z_acc = z_buff.get_access(cgh); + cgh.parallel_for(cl::sycl::range<1>{array_size}, + [=](cl::sycl::item<1> itemId) { + auto i = itemId.get_id(0); + z_acc[i] = A * x_acc[i] + y_acc[i]; + }); + }); + return 0; +} diff --git a/test/conformance/device_code/saxpy_usm.cpp b/test/conformance/device_code/saxpy_usm.cpp new file mode 100644 index 0000000000..8772a7e25d --- /dev/null +++ b/test/conformance/device_code/saxpy_usm.cpp @@ -0,0 +1,25 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include + +int main() { + size_t array_size = 16; + + cl::sycl::queue sycl_queue; + uint32_t *X = cl::sycl::malloc_shared(array_size, sycl_queue); + uint32_t *Y = cl::sycl::malloc_shared(array_size, sycl_queue); + uint32_t *Z = cl::sycl::malloc_shared(array_size, sycl_queue); + uint32_t A = 42; + + sycl_queue.submit([&](cl::sycl::handler &cgh) { + cgh.parallel_for(cl::sycl::range<1>{array_size}, + [=](cl::sycl::item<1> itemId) { + auto i = itemId.get_id(0); + Z[i] = A * X[i] + Y[i]; + }); + }); + return 0; +} diff --git a/test/conformance/exp_command_buffer/CMakeLists.txt b/test/conformance/exp_command_buffer/CMakeLists.txt new file mode 100644 index 0000000000..a8ecf793ab --- /dev/null +++ b/test/conformance/exp_command_buffer/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright (C) 2024 Intel Corporation +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +add_conformance_test_with_kernels_environment(exp_command_buffer + buffer_fill_kernel_update.cpp + usm_fill_kernel_update.cpp + buffer_saxpy_kernel_update.cpp + usm_saxpy_kernel_update.cpp + ndrange_update.cpp + release.cpp + retain.cpp + invalid_update.cpp +) diff --git a/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp new file mode 100644 index 0000000000..ea5295dc6b --- /dev/null +++ b/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp @@ -0,0 +1,404 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "fixtures.h" + +// Test that updating a command-buffer with a single kernel command +// taking USM arguments works correctly. +struct BufferFillCommandTest + : uur::command_buffer::urUpdatableCommandBufferExpExecutionTest { + void SetUp() override { + program_name = "fill"; + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::SetUp()); + + // First argument is buffer to fill (will also be hidden accessor arg) + AddBuffer1DArg(sizeof(val) * global_size, &buffer); + // Second argument is scalar to fill with. + AddPodArg(val); + + // Append kernel command to command-buffer and close command-buffer + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &global_size, &local_size, 0, nullptr, nullptr, &command_handle)); + ASSERT_NE(command_handle, nullptr); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + + void TearDown() override { + if (new_buffer) { + EXPECT_SUCCESS(urMemRelease(new_buffer)); + } + + if (command_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); + } + + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::TearDown()); + } + + static constexpr uint32_t val = 42; + static constexpr size_t local_size = 4; + static constexpr size_t global_size = 32; + static constexpr size_t global_offset = 0; + static constexpr size_t n_dimensions = 1; + static constexpr size_t buffer_size = sizeof(val) * global_size; + ur_mem_handle_t buffer = nullptr; + ur_mem_handle_t new_buffer = nullptr; + ur_exp_command_buffer_command_handle_t command_handle = nullptr; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(BufferFillCommandTest); + +// Update kernel arguments to fill with a new scalar value to a new output +// buffer. +TEST_P(BufferFillCommandTest, UpdateParameters) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + ValidateBuffer(buffer, buffer_size, val); + + // Create a new buffer to update kernel output parameter to + ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, + buffer_size, nullptr, &new_buffer)); + char zero = 0; + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, new_buffer, &zero, + sizeof(zero), 0, buffer_size, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Set argument index zero as new buffer + ur_exp_command_buffer_update_memobj_arg_desc_t new_output_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC, // stype + nullptr, // pNext + 0, // argIndex + nullptr, // pProperties + new_buffer, // hArgValue + }; + + // Set argument index 2 as new value to fill (index 1 is buffer accessor) + uint32_t new_val = 33; + ur_exp_command_buffer_update_value_arg_desc_t new_input_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + sizeof(new_val), // argSize + nullptr, // pProperties + &new_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 1, // numNewMemObjArgs + 0, // numNewPointerArgs + 1, // numNewValueArgs + 0, // numNewExecInfos + 0, // newWorkDim + &new_output_desc, // pNewMemObjArgList + nullptr, // pNewPointerArgList + &new_input_desc, // pNewValueArgList + nullptr, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + ValidateBuffer(new_buffer, buffer_size, new_val); +} + +// Test updating the global size so that the fill outputs to a larger buffer +TEST_P(BufferFillCommandTest, UpdateGlobalSize) { + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + ValidateBuffer(buffer, sizeof(val) * global_size, val); + + size_t new_global_size = 64; + const size_t new_buffer_size = sizeof(val) * new_global_size; + ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, + new_buffer_size, nullptr, &new_buffer)); + char zero = 0; + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, new_buffer, &zero, + sizeof(zero), 0, new_buffer_size, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + ur_exp_command_buffer_update_memobj_arg_desc_t new_output_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC, // stype + nullptr, // pNext + 0, // argIndex + nullptr, // pProperties + new_buffer, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 1, // numNewMemObjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + 0, // numNewExecInfos + 0, // newWorkDim + &new_output_desc, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + nullptr, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + &new_global_size, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + ValidateBuffer(new_buffer, new_buffer_size, val); +} + +// Test updating the input & output kernel arguments and global +// size, by calling update individually for each of these configurations. +TEST_P(BufferFillCommandTest, SeparateUpdateCalls) { + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + ValidateBuffer(buffer, sizeof(val) * global_size, val); + + size_t new_global_size = 64; + const size_t new_buffer_size = sizeof(val) * new_global_size; + ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, + new_buffer_size, nullptr, &new_buffer)); + char zero = 0; + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, new_buffer, &zero, + sizeof(zero), 0, new_buffer_size, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + ur_exp_command_buffer_update_memobj_arg_desc_t new_output_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC, // stype + nullptr, // pNext + 0, // argIndex + nullptr, // pProperties + new_buffer, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t output_update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 1, // numNewMemObjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + 0, // numNewExecInfos + 0, // newWorkDim + &new_output_desc, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + nullptr, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(command_handle, + &output_update_desc)); + + uint32_t new_val = 33; + ur_exp_command_buffer_update_value_arg_desc_t new_input_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + sizeof(new_val), // argSize + nullptr, // pProperties + &new_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t input_update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 1, // numNewValueArgs + 0, // numNewExecInfos + 0, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + &new_input_desc, // pNewValueArgList + nullptr, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(command_handle, + &input_update_desc)); + + ur_exp_command_buffer_update_kernel_launch_desc_t global_size_update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + 0, // numNewExecInfos + 0, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + nullptr, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + &new_global_size, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp( + command_handle, &global_size_update_desc)); + + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + ValidateBuffer(new_buffer, new_buffer_size, new_val); +} + +// Test calling update twice on the same command-handle updating the +// input value, and verifying that it's the second call which persists. +TEST_P(BufferFillCommandTest, OverrideUpdate) { + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + ValidateBuffer(buffer, sizeof(val) * global_size, val); + + uint32_t first_val = 33; + ur_exp_command_buffer_update_value_arg_desc_t first_input_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + sizeof(first_val), // argSize + nullptr, // pProperties + &first_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t first_update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 1, // numNewValueArgs + 0, // numNewExecInfos + 0, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + &first_input_desc, // pNewValueArgList + nullptr, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(command_handle, + &first_update_desc)); + + uint32_t second_val = -99; + ur_exp_command_buffer_update_value_arg_desc_t second_input_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + sizeof(second_val), // argSize + nullptr, // pProperties + &second_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t second_update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 1, // numNewValueArgs + 0, // numNewExecInfos + 0, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + &second_input_desc, // pNewValueArgList + nullptr, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(command_handle, + &second_update_desc)); + + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + ValidateBuffer(buffer, sizeof(val) * global_size, second_val); +} + +// Test calling update with multiple ur_exp_command_buffer_update_value_arg_desc_t +// instances updating the same argument, and checking that the last one in the +// list persists. +TEST_P(BufferFillCommandTest, OverrideArgList) { + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + ValidateBuffer(buffer, sizeof(val) * global_size, val); + + ur_exp_command_buffer_update_value_arg_desc_t input_descs[2]; + uint32_t first_val = 33; + input_descs[0] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + sizeof(first_val), // argSize + nullptr, // pProperties + &first_val, // hArgValue + }; + + uint32_t second_val = -99; + input_descs[1] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + sizeof(second_val), // argSize + nullptr, // pProperties + &second_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t second_update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 2, // numNewValueArgs + 0, // numNewExecInfos + 0, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + input_descs, // pNewValueArgList + nullptr, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(command_handle, + &second_update_desc)); + + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + ValidateBuffer(buffer, sizeof(val) * global_size, second_val); +} diff --git a/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp new file mode 100644 index 0000000000..879b3a9bc6 --- /dev/null +++ b/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp @@ -0,0 +1,178 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "fixtures.h" + +// Test that updating a command-buffer with a single kernel command +// taking buffer & scalar arguments works correctly. +struct BufferSaxpyKernelTest + : uur::command_buffer::urUpdatableCommandBufferExpExecutionTest { + void SetUp() override { + program_name = "saxpy"; + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::SetUp()); + + const size_t allocation_size = sizeof(uint32_t) * global_size; + for (auto &buffer : buffers) { + ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, + allocation_size, nullptr, + &buffer)); + ASSERT_NE(buffer, nullptr); + + std::vector init(allocation_size); + uur::generateMemFillPattern(init); + + ASSERT_SUCCESS(urEnqueueMemBufferWrite(queue, buffer, true, 0, + allocation_size, init.data(), + 0, nullptr, nullptr)); + } + + // Index 0 is output buffer + ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, 0, nullptr, buffers[0])); + // Index 1 is output accessor + struct { + size_t offsets[1] = {0}; + } accessor; + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(accessor), nullptr, + &accessor)); + + // Index 2 is A + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 2, sizeof(A), nullptr, &A)); + // Index 3 is X buffer + ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, 3, nullptr, buffers[1])); + + // Index 4 is X buffer accessor + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 4, sizeof(accessor), nullptr, + &accessor)); + // Index 5 is Y buffer + ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, 5, nullptr, buffers[2])); + + // Index 6 is Y buffer accessor + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 6, sizeof(accessor), nullptr, + &accessor)); + + // Append kernel command to command-buffer and close command-buffer + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &global_size, &local_size, 0, nullptr, nullptr, &command_handle)); + ASSERT_NE(command_handle, nullptr); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + + void Validate(ur_mem_handle_t output, ur_mem_handle_t X, ur_mem_handle_t Y, + uint32_t A, size_t length) { + + std::vector output_data(length, 0); + ASSERT_SUCCESS(urEnqueueMemBufferRead(queue, output, true, 0, length, + output_data.data(), 0, nullptr, + nullptr)); + + std::vector X_data(length, 0); + ASSERT_SUCCESS(urEnqueueMemBufferRead( + queue, X, true, 0, length, X_data.data(), 0, nullptr, nullptr)); + + std::vector Y_data(length, 0); + ASSERT_SUCCESS(urEnqueueMemBufferRead( + queue, Y, true, 0, length, Y_data.data(), 0, nullptr, nullptr)); + + for (size_t i = 0; i < length; i++) { + uint32_t result = A * X_data[i] + Y_data[i]; + ASSERT_EQ(result, output_data[i]); + } + } + + void TearDown() override { + for (auto &buffer : buffers) { + if (buffer) { + EXPECT_SUCCESS(urMemRelease(buffer)); + } + } + + if (command_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); + } + + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::TearDown()); + } + + static constexpr size_t local_size = 4; + static constexpr size_t global_size = 32; + static constexpr size_t global_offset = 0; + static constexpr size_t n_dimensions = 1; + static constexpr uint32_t A = 42; + std::array buffers = {nullptr, nullptr, nullptr, + nullptr}; + ur_exp_command_buffer_command_handle_t command_handle = nullptr; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(BufferSaxpyKernelTest); + +TEST_P(BufferSaxpyKernelTest, UpdateParameters) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + + ASSERT_SUCCESS(urQueueFinish(queue)); + Validate(buffers[0], buffers[1], buffers[2], A, global_size); + + ur_exp_command_buffer_update_memobj_arg_desc_t new_input_descs[2]; + // New X at index 3 + new_input_descs[0] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC, // stype + nullptr, // pNext + 3, // argIndex + nullptr, // pProperties + buffers[3], // hArgValue + }; + + // New Y at index 5 + new_input_descs[1] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC, // stype + nullptr, // pNext + 5, // argIndex + nullptr, // pProperties + buffers[4], // hArgValue + }; + + // A at index 2 + uint32_t new_A = 33; + ur_exp_command_buffer_update_value_arg_desc_t new_A_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext, + 2, // argIndex + sizeof(new_A), // argSize + nullptr, // pProperties + &new_A, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 2, // numNewMemObjArgs + 0, // numNewPointerArgs + 1, // numNewValueArgs + 0, // numNewExecInfos + 0, // newWorkDim + new_input_descs, // pNewMemObjArgList + nullptr, // pNewPointerArgList + &new_A_desc, // pNewValueArgList + nullptr, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + Validate(buffers[0], buffers[3], buffers[4], new_A, global_size); +} diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_cuda.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_cuda.match new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match new file mode 100644 index 0000000000..43b50bde41 --- /dev/null +++ b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match @@ -0,0 +1,18 @@ +{{OPT}}{{Segmentation fault|Aborted}} +{{OPT}}BufferFillCommandTest.UpdateParameters/AMD_HIP_BACKEND{{.*}} +{{OPT}}BufferFillCommandTest.UpdateGlobalSize/AMD_HIP_BACKEND{{.*}} +{{OPT}}BufferFillCommandTest.SeparateUpdateCalls/AMD_HIP_BACKEND{{.*}} +{{OPT}}BufferFillCommandTest.OverrideUpdate/AMD_HIP_BACKEND{{.*}} +{{OPT}}BufferFillCommandTest.OverrideArgList/AMD_HIP_BACKEND{{.*}} +{{OPT}}USMFillCommandTest.UpdateParameters/AMD_HIP_BACKEND{{.*}} +{{OPT}}USMFillCommandTest.UpdateExecInfo/AMD_HIP_BACKEND{{.*}} +{{OPT}}USMMultipleFillCommandTest.UpdateAllKernels/AMD_HIP_BACKEND{{.*}} +{{OPT}}BufferSaxpyKernelTest.UpdateParameters/AMD_HIP_BACKEND{{.*}} +{{OPT}}USMSaxpyKernelTest.UpdateParameters/AMD_HIP_BACKEND{{.*}} +{{OPT}}NDRangeUpdateTests.Update3D/AMD_HIP_BACKEND{{.*}} +{{OPT}}NDRangeUpdateTests.Update2D/AMD_HIP_BACKEND{{.*}} +{{OPT}}NDRangeUpdateTests.Update1D/AMD_HIP_BACKEND{{.*}} +{{OPT}}urCommandBufferReleaseCommandExpTest.Success/AMD_HIP_BACKEND{{.*}} +{{OPT}}urCommandBufferReleaseCommandExpTest.InvalidNullHandle/AMD_HIP_BACKEND{{.*}} +{{OPT}}urCommandBufferRetainCommandExpTest.Success/AMD_HIP_BACKEND{{.*}} +{{OPT}}urCommandBufferRetainCommandExpTest.InvalidNullHandle/AMD_HIP_BACKEND{{.*}} diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero.match new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match new file mode 100644 index 0000000000..b4991347ba --- /dev/null +++ b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match @@ -0,0 +1 @@ +{{OPT}}{{Segmentation fault|Aborted}} diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_opencl.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_opencl.match new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/conformance/exp_command_buffer/fixtures.h b/test/conformance/exp_command_buffer/fixtures.h new file mode 100644 index 0000000000..4e9bff35f9 --- /dev/null +++ b/test/conformance/exp_command_buffer/fixtures.h @@ -0,0 +1,175 @@ +// Copyright (C) 2022-2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#ifndef UR_CONFORMANCE_COMMAND_BUFFER_FIXTURES_H_INCLUDED +#define UR_CONFORMANCE_COMMAND_BUFFER_FIXTURES_H_INCLUDED + +#include + +namespace uur { +namespace command_buffer { + +struct urCommandBufferExpTest : uur::urContextTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(uur::urContextTest::SetUp()); + + size_t returned_size; + ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_EXTENSIONS, 0, + nullptr, &returned_size)); + + std::unique_ptr returned_extensions(new char[returned_size]); + + ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_EXTENSIONS, + returned_size, returned_extensions.get(), + nullptr)); + + std::string_view extensions_string(returned_extensions.get()); + bool command_buffer_support = + extensions_string.find(UR_COMMAND_BUFFER_EXTENSION_STRING_EXP) != + std::string::npos; + + if (!command_buffer_support) { + GTEST_SKIP() << "EXP command-buffer feature is not supported."; + } + + ASSERT_SUCCESS(urDeviceGetInfo( + device, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP, + sizeof(ur_bool_t), &updatable_command_buffer_support, nullptr)); + + // Create a command-buffer + ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, nullptr, + &cmd_buf_handle)); + ASSERT_NE(cmd_buf_handle, nullptr); + } + + void TearDown() override { + if (cmd_buf_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseExp(cmd_buf_handle)); + } + UUR_RETURN_ON_FATAL_FAILURE(uur::urContextTest::TearDown()); + } + + ur_exp_command_buffer_handle_t cmd_buf_handle = nullptr; + ur_bool_t updatable_command_buffer_support = false; +}; + +struct urCommandBufferExpExecutionTest : uur::urKernelExecutionTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(uur::urKernelExecutionTest::SetUp()); + + size_t returned_size; + ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_EXTENSIONS, 0, + nullptr, &returned_size)); + + std::unique_ptr returned_extensions(new char[returned_size]); + + ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_EXTENSIONS, + returned_size, returned_extensions.get(), + nullptr)); + + std::string_view extensions_string(returned_extensions.get()); + bool command_buffer_support = + extensions_string.find(UR_COMMAND_BUFFER_EXTENSION_STRING_EXP) != + std::string::npos; + + if (!command_buffer_support) { + GTEST_SKIP() << "EXP command-buffer feature is not supported."; + } + + ASSERT_SUCCESS(urDeviceGetInfo( + device, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP, + sizeof(ur_bool_t), &updatable_command_buffer_support, nullptr)); + + // Create a command-buffer + ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, nullptr, + &cmd_buf_handle)); + ASSERT_NE(cmd_buf_handle, nullptr); + } + + void TearDown() override { + if (cmd_buf_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseExp(cmd_buf_handle)); + } + UUR_RETURN_ON_FATAL_FAILURE(uur::urKernelExecutionTest::TearDown()); + } + + ur_exp_command_buffer_handle_t cmd_buf_handle = nullptr; + ur_bool_t updatable_command_buffer_support = false; +}; + +struct urUpdatableCommandBufferExpExecutionTest + : urCommandBufferExpExecutionTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urCommandBufferExpExecutionTest ::SetUp()); + + if (!updatable_command_buffer_support) { + GTEST_SKIP() << "Updating EXP command-buffers is not supported."; + } + + // Create a command-buffer with update enabled. + ur_exp_command_buffer_desc_t desc{ + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, nullptr, true}; + + ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, &desc, + &updatable_cmd_buf_handle)); + ASSERT_NE(updatable_cmd_buf_handle, nullptr); + } + + void TearDown() override { + if (updatable_cmd_buf_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseExp(updatable_cmd_buf_handle)); + } + UUR_RETURN_ON_FATAL_FAILURE( + urCommandBufferExpExecutionTest::TearDown()); + } + + ur_exp_command_buffer_handle_t updatable_cmd_buf_handle = nullptr; +}; + +struct urCommandBufferCommandExpTest + : urUpdatableCommandBufferExpExecutionTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::SetUp()); + + // Append 2 kernel commands to command-buffer and close command-buffer + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &global_size, &local_size, 0, nullptr, nullptr, &command_handle)); + ASSERT_NE(command_handle, nullptr); + + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &global_size, &local_size, 0, nullptr, nullptr, &command_handle_2)); + ASSERT_NE(command_handle_2, nullptr); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + + void TearDown() override { + if (command_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); + } + + if (command_handle_2) { + EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle_2)); + } + + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::TearDown()); + } + + static constexpr size_t local_size = 4; + static constexpr size_t global_size = 32; + static constexpr size_t global_offset = 0; + static constexpr size_t n_dimensions = 1; + + ur_exp_command_buffer_command_handle_t command_handle = nullptr; + ur_exp_command_buffer_command_handle_t command_handle_2 = nullptr; +}; +} // namespace command_buffer +} // namespace uur + +#endif // UR_CONFORMANCE_EVENT_COMMAND_BUFFER_H_INCLUDED diff --git a/test/conformance/exp_command_buffer/invalid_update.cpp b/test/conformance/exp_command_buffer/invalid_update.cpp new file mode 100644 index 0000000000..00cf04ea85 --- /dev/null +++ b/test/conformance/exp_command_buffer/invalid_update.cpp @@ -0,0 +1,161 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "fixtures.h" +#include + +// Negative tests that correct error codes are thrown on invalid update usage. +struct InvalidUpdateTest + : uur::command_buffer::urUpdatableCommandBufferExpExecutionTest { + void SetUp() override { + program_name = "fill_usm"; + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::SetUp()); + + ur_device_usm_access_capability_flags_t shared_usm_flags; + ASSERT_SUCCESS( + uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); + if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { + GTEST_SKIP() << "Shared USM is not supported."; + } + + // Allocate USM pointer to fill + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &shared_ptr)); + ASSERT_NE(shared_ptr, nullptr); + std::memset(shared_ptr, 0, allocation_size); + + // Index 0 is output + ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 0, nullptr, &shared_ptr)); + // Index 1 is input scalar + ASSERT_SUCCESS( + urKernelSetArgValue(kernel, 1, sizeof(val), nullptr, &val)); + + // Append kernel command to command-buffer + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &global_size, &local_size, 0, nullptr, nullptr, &command_handle)); + ASSERT_NE(command_handle, nullptr); + } + + void TearDown() override { + if (shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); + } + + if (command_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); + } + + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::TearDown()); + } + + static constexpr uint32_t val = 42; + static constexpr size_t local_size = 4; + static constexpr size_t global_size = 32; + static constexpr size_t global_offset = 0; + static constexpr size_t n_dimensions = 1; + static constexpr size_t allocation_size = sizeof(val) * global_size; + void *shared_ptr = nullptr; + ur_exp_command_buffer_command_handle_t command_handle = nullptr; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(InvalidUpdateTest); + +// Test error code is returned if command-buffer not finalized +TEST_P(InvalidUpdateTest, NotFinalizedCommandBuffer) { + // Set new value to use for fill at kernel index 1 + uint32_t new_val = 33; + ur_exp_command_buffer_update_value_arg_desc_t new_input_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 1, // argIndex + sizeof(new_val), // argSize + nullptr, // pProperties + &new_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 1, // numNewValueArgs + 0, // numNewExecInfos + 0, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + &new_input_desc, // pNewValueArgList + nullptr, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Update command to command-buffer that has not been finalized + ur_result_t result = + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc); + ASSERT_EQ(UR_RESULT_ERROR_INVALID_OPERATION, result); +} + +// Test error code is returned if command-buffer not created with isUpdatable +TEST_P(InvalidUpdateTest, NotUpdatableCommandBuffer) { + // Create a command-buffer without isUpdatable + ur_exp_command_buffer_handle_t test_cmd_buf_handle = nullptr; + ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, nullptr, + &test_cmd_buf_handle)); + EXPECT_NE(test_cmd_buf_handle, nullptr); + + // Append a kernel commands to command-buffer and close command-buffer + ur_exp_command_buffer_command_handle_t test_command_handle = nullptr; + EXPECT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + test_cmd_buf_handle, kernel, n_dimensions, &global_offset, &global_size, + &local_size, 0, nullptr, nullptr, &test_command_handle)); + EXPECT_NE(test_command_handle, nullptr); + + EXPECT_SUCCESS(urCommandBufferFinalizeExp(test_cmd_buf_handle)); + + // Set new value to use for fill at kernel index 1 + uint32_t new_val = 33; + ur_exp_command_buffer_update_value_arg_desc_t new_input_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 1, // argIndex + sizeof(new_val), // argSize + nullptr, // pProperties + &new_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 1, // numNewValueArgs + 0, // numNewExecInfos + 0, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + &new_input_desc, // pNewValueArgList + nullptr, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Update command to command-buffer that doesn't have updatable set should + // be an error + ur_result_t result = + urCommandBufferUpdateKernelLaunchExp(test_command_handle, &update_desc); + EXPECT_EQ(UR_RESULT_ERROR_INVALID_OPERATION, result); + + if (test_command_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(test_command_handle)); + } + if (test_cmd_buf_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseExp(test_cmd_buf_handle)); + } +} diff --git a/test/conformance/exp_command_buffer/ndrange_update.cpp b/test/conformance/exp_command_buffer/ndrange_update.cpp new file mode 100644 index 0000000000..e5631f9176 --- /dev/null +++ b/test/conformance/exp_command_buffer/ndrange_update.cpp @@ -0,0 +1,248 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "fixtures.h" +#include + +// Test that updating a command-buffer with a single kernel command +// in a way that changes the NDRange configuration. +struct NDRangeUpdateTest + : uur::command_buffer::urUpdatableCommandBufferExpExecutionTest { + void SetUp() override { + program_name = "indexers_usm"; + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::SetUp()); + + ur_device_usm_access_capability_flags_t shared_usm_flags; + ASSERT_SUCCESS( + uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); + if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { + GTEST_SKIP() << "Shared USM is not supported."; + } + + // Allocate a USM pointer for use as kernel output at index 0 + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &shared_ptr)); + ASSERT_NE(shared_ptr, nullptr); + std::memset(shared_ptr, 0, allocation_size); + + ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 0, nullptr, &shared_ptr)); + + // Add a 3 dimension kernel command to command-buffer and close + // command-buffer + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, + global_offset.data(), global_size.data(), local_size.data(), 0, + nullptr, nullptr, &command_handle)); + ASSERT_NE(command_handle, nullptr); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + + // For each work-item the kernel prints the global id and local id in each + // of the 3 dimensions to an offset in the output based on global linear + // id. + void Validate(std::array global_size, + std::array local_size, + std::array global_offset) { + // DPC++ swaps the X & Z dimension for 3 Dimensional kernels + // between those set by user and SPIR-V builtins. + // See `ReverseRangeDimensionsForKernel()` in commands.cpp + + std::swap(global_size[0], global_size[2]); + std::swap(local_size[0], local_size[2]); + std::swap(global_offset[0], global_offset[2]); + + // Verify global ID and local ID of each work item + for (size_t x = 0; x < global_size[0]; x++) { + for (size_t y = 0; y < global_size[1]; y++) { + for (size_t z = 0; z < global_size[2]; z++) { + const size_t global_linear_id = + z + (y * global_size[2]) + + (x * global_size[1] * global_size[0]); + int *wi_ptr = (int *)shared_ptr + + (elements_per_id * global_linear_id); + + const int global_id_x = wi_ptr[0]; + const int global_id_y = wi_ptr[1]; + const int global_id_z = wi_ptr[2]; + + EXPECT_EQ(global_id_x, x + global_offset[0]); + EXPECT_EQ(global_id_y, y + global_offset[1]); + EXPECT_EQ(global_id_z, z + global_offset[2]); + + const int local_id_x = wi_ptr[3]; + const int local_id_y = wi_ptr[4]; + const int local_id_z = wi_ptr[5]; + + EXPECT_EQ(local_id_x, x % local_size[0]); + EXPECT_EQ(local_id_y, y % local_size[1]); + EXPECT_EQ(local_id_z, z % local_size[2]); + } + } + } + } + + void TearDown() override { + if (shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); + } + + if (command_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); + } + + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::TearDown()); + } + + static constexpr size_t elements_per_id = 6; + static constexpr size_t n_dimensions = 3; + static constexpr std::array global_size = {8, 8, 8}; + static constexpr std::array local_size = {1, 2, 2}; + static constexpr std::array global_offset = {0, 4, 4}; + static constexpr size_t allocation_size = sizeof(int) * elements_per_id * + global_size[0] * global_size[1] * + global_size[2]; + void *shared_ptr = nullptr; + ur_exp_command_buffer_command_handle_t command_handle = nullptr; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(NDRangeUpdateTest); + +// Keep the kernel work dimensions as 3, and update local size and global +// offset. +TEST_P(NDRangeUpdateTest, Update3D) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + Validate(global_size, local_size, global_offset); + + // Set local size and global offset to update to + std::array new_local_size = {4, 2, 2}; + std::array new_global_offset = {3, 2, 1}; + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + 0, // numNewExecInfos + 3, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + nullptr, // pNewExecInfoList + new_global_offset.data(), // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + new_local_size.data(), // pNewLocalWorkSize + }; + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + Validate(global_size, new_local_size, new_global_offset); +} + +// Update the kernel work dimensions to 2, and update global size, local size, +// and global offset to new values. +TEST_P(NDRangeUpdateTest, Update2D) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + Validate(global_size, local_size, global_offset); + + // Set ND-Range configuration to update to + std::array new_global_size = {6, 6, 1}; + std::array new_local_size = {3, 3, 1}; + std::array new_global_offset = {3, 3, 0}; + + // Set dimensions as 2 + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + 0, // numNewExecInfos + 2, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + nullptr, // pNewExecInfoList + new_global_offset.data(), // pNewGlobalWorkOffset + new_global_size.data(), // pNewGlobalWorkSize + new_local_size.data(), // pNewLocalWorkSize + }; + + // Reset output to remove old values which will no longer have a + // work-item to overwrite them + std::memset(shared_ptr, 0, allocation_size); + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + Validate(new_global_size, new_local_size, new_global_offset); +} + +// Update the kernel work dimensions to 1, and check that previously +// set global size, local size, and global offset update accordingly. +TEST_P(NDRangeUpdateTest, Update1D) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + Validate(global_size, local_size, global_offset); + + // Set dimensions to 1 + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + 0, // numNewExecInfos + 1, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + nullptr, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Reset output to remove old values which will no longer have a + // work-item to overwrite them + std::memset(shared_ptr, 0, allocation_size); + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + std::array new_global_size = {global_size[0], 1, 1}; + std::array new_local_size = {local_size[0], 1, 1}; + std::array new_global_offset = {global_offset[0], 0, 0}; + Validate(new_global_size, new_local_size, new_global_offset); +} diff --git a/test/conformance/exp_command_buffer/release.cpp b/test/conformance/exp_command_buffer/release.cpp new file mode 100644 index 0000000000..47b6124f74 --- /dev/null +++ b/test/conformance/exp_command_buffer/release.cpp @@ -0,0 +1,74 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "fixtures.h" + +using urCommandBufferReleaseExpTest = + uur::command_buffer::urCommandBufferExpTest; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urCommandBufferReleaseExpTest); + +TEST_P(urCommandBufferReleaseExpTest, Success) { + ASSERT_SUCCESS(urCommandBufferRetainExp(cmd_buf_handle)); + + uint32_t prev_ref_count = 0; + ASSERT_SUCCESS( + uur::GetObjectReferenceCount(cmd_buf_handle, prev_ref_count)); + + ASSERT_SUCCESS(urCommandBufferReleaseExp(cmd_buf_handle)); + + uint32_t ref_count = 0; + ASSERT_SUCCESS(uur::GetObjectReferenceCount(cmd_buf_handle, ref_count)); + + ASSERT_GT(prev_ref_count, ref_count); +} + +TEST_P(urCommandBufferReleaseExpTest, InvalidNullHandle) { + ASSERT_EQ_RESULT(urCommandBufferReleaseExp(nullptr), + UR_RESULT_ERROR_INVALID_NULL_HANDLE); +} + +using urCommandBufferReleaseCommandExpTest = + uur::command_buffer::urCommandBufferCommandExpTest; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urCommandBufferReleaseCommandExpTest); + +TEST_P(urCommandBufferReleaseCommandExpTest, Success) { + ASSERT_SUCCESS(urCommandBufferRetainCommandExp(command_handle)); + + uint32_t prev_ref_count = 0; + ASSERT_SUCCESS( + uur::GetObjectReferenceCount(command_handle, prev_ref_count)); + + ASSERT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); + + uint32_t ref_count = 0; + ASSERT_SUCCESS(uur::GetObjectReferenceCount(command_handle, ref_count)); + + ASSERT_GT(prev_ref_count, ref_count); +} + +TEST_P(urCommandBufferReleaseCommandExpTest, ReleaseCmdBufBeforeHandle) { + ASSERT_SUCCESS(urCommandBufferReleaseExp(updatable_cmd_buf_handle)); + updatable_cmd_buf_handle = nullptr; + ASSERT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); + command_handle = nullptr; +} + +TEST_P(urCommandBufferReleaseCommandExpTest, ReleaseCmdBufMultipleHandles) { + ASSERT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); + command_handle = nullptr; + + ASSERT_SUCCESS(urCommandBufferReleaseExp(updatable_cmd_buf_handle)); + updatable_cmd_buf_handle = nullptr; + + ASSERT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle_2)); + command_handle_2 = nullptr; +} + +TEST_P(urCommandBufferReleaseCommandExpTest, InvalidNullHandle) { + ASSERT_EQ_RESULT(urCommandBufferReleaseCommandExp(nullptr), + UR_RESULT_ERROR_INVALID_NULL_HANDLE); +} diff --git a/test/conformance/exp_command_buffer/retain.cpp b/test/conformance/exp_command_buffer/retain.cpp new file mode 100644 index 0000000000..f2b716fa23 --- /dev/null +++ b/test/conformance/exp_command_buffer/retain.cpp @@ -0,0 +1,56 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "fixtures.h" + +using urCommandBufferRetainExpTest = + uur::command_buffer::urCommandBufferExpTest; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urCommandBufferRetainExpTest); + +TEST_P(urCommandBufferRetainExpTest, Success) { + uint32_t prev_ref_count = 0; + ASSERT_SUCCESS( + uur::GetObjectReferenceCount(cmd_buf_handle, prev_ref_count)); + + ASSERT_SUCCESS(urCommandBufferRetainExp(cmd_buf_handle)); + + uint32_t ref_count = 0; + ASSERT_SUCCESS(uur::GetObjectReferenceCount(cmd_buf_handle, ref_count)); + + ASSERT_LT(prev_ref_count, ref_count); + + ASSERT_SUCCESS(urCommandBufferReleaseExp(cmd_buf_handle)); +} + +TEST_P(urCommandBufferRetainExpTest, InvalidNullHandle) { + ASSERT_EQ_RESULT(urCommandBufferRetainExp(nullptr), + UR_RESULT_ERROR_INVALID_NULL_HANDLE); +} + +using urCommandBufferRetainCommandExpTest = + uur::command_buffer::urCommandBufferCommandExpTest; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urCommandBufferRetainCommandExpTest); + +TEST_P(urCommandBufferRetainCommandExpTest, Success) { + uint32_t prev_ref_count = 0; + ASSERT_SUCCESS( + uur::GetObjectReferenceCount(command_handle, prev_ref_count)); + + ASSERT_SUCCESS(urCommandBufferRetainCommandExp(command_handle)); + + uint32_t ref_count = 0; + ASSERT_SUCCESS(uur::GetObjectReferenceCount(command_handle, ref_count)); + + ASSERT_LT(prev_ref_count, ref_count); + + ASSERT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); +} + +TEST_P(urCommandBufferRetainCommandExpTest, InvalidNullHandle) { + ASSERT_EQ_RESULT(urCommandBufferRetainCommandExp(nullptr), + UR_RESULT_ERROR_INVALID_NULL_HANDLE); +} diff --git a/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp new file mode 100644 index 0000000000..7e6cab6ee3 --- /dev/null +++ b/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp @@ -0,0 +1,378 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "fixtures.h" +#include + +// Test that updating a command-buffer with a single kernel command +// taking USM arguments works correctly. +struct USMFillCommandTest + : uur::command_buffer::urUpdatableCommandBufferExpExecutionTest { + void SetUp() override { + program_name = "fill_usm"; + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::SetUp()); + + ur_device_usm_access_capability_flags_t shared_usm_flags; + ASSERT_SUCCESS( + uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); + if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { + GTEST_SKIP() << "Shared USM is not supported."; + } + + // Allocate USM pointer to fill + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &shared_ptr)); + ASSERT_NE(shared_ptr, nullptr); + std::memset(shared_ptr, 0, allocation_size); + + // Index 0 is output + ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 0, nullptr, &shared_ptr)); + // Index 1 is input scalar + ASSERT_SUCCESS( + urKernelSetArgValue(kernel, 1, sizeof(val), nullptr, &val)); + + // Append kernel command to command-buffer and close command-buffer + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &global_size, &local_size, 0, nullptr, nullptr, &command_handle)); + ASSERT_NE(command_handle, nullptr); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + + void Validate(uint32_t *pointer, size_t length, uint32_t val) { + for (size_t i = 0; i < length; i++) { + ASSERT_EQ(pointer[i], val); + } + } + + void TearDown() override { + if (shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); + } + + if (new_shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, new_shared_ptr)); + } + + if (command_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); + } + + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::TearDown()); + } + + static constexpr uint32_t val = 42; + static constexpr size_t local_size = 4; + static constexpr size_t global_size = 32; + static constexpr size_t global_offset = 0; + static constexpr size_t n_dimensions = 1; + static constexpr size_t allocation_size = sizeof(val) * global_size; + void *shared_ptr = nullptr; + void *new_shared_ptr = nullptr; + ur_exp_command_buffer_command_handle_t command_handle = nullptr; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(USMFillCommandTest); + +// Test using a different global size to fill and larger USM output buffer +TEST_P(USMFillCommandTest, UpdateParameters) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + Validate((uint32_t *)shared_ptr, global_size, val); + + // Allocate a new USM pointer of larger size + size_t new_global_size = 64; + const size_t new_allocation_size = sizeof(val) * new_global_size; + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + new_allocation_size, &new_shared_ptr)); + ASSERT_NE(new_shared_ptr, nullptr); + std::memset(new_shared_ptr, 0, new_allocation_size); + + // Set new USM pointer as kernel output at index 0 + ur_exp_command_buffer_update_pointer_arg_desc_t new_output_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 0, // argIndex + nullptr, // pProperties + &new_shared_ptr, // pArgValue + }; + + // Set new value to use for fill at kernel index 1 + uint32_t new_val = 33; + ur_exp_command_buffer_update_value_arg_desc_t new_input_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 1, // argIndex + sizeof(new_val), // argSize + nullptr, // pProperties + &new_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 1, // numNewPointerArgs + 1, // numNewValueArgs + 0, // numNewExecInfos + 0, // newWorkDim + nullptr, // pNewMemObjArgList + &new_output_desc, // pNewPointerArgList + &new_input_desc, // pNewValueArgList + nullptr, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + &new_global_size, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + Validate((uint32_t *)new_shared_ptr, new_global_size, new_val); +} + +// Test updating the kernel execution info +TEST_P(USMFillCommandTest, UpdateExecInfo) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + Validate((uint32_t *)shared_ptr, global_size, val); + + ur_exp_command_buffer_update_exec_info_desc_t new_exec_info_descs[3]; + + // Update direct access flag + bool indirect_access = false; + new_exec_info_descs[0] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC, // stype + nullptr, // pNext + UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS, // propName + sizeof(indirect_access), // propSize + nullptr, // pProperties + &indirect_access, // pPropValue + }; + + // Update cache config + ur_kernel_cache_config_t cache_config = UR_KERNEL_CACHE_CONFIG_DEFAULT; + new_exec_info_descs[1] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC, // stype + nullptr, // pNext + UR_KERNEL_EXEC_INFO_CACHE_CONFIG, // propName + sizeof(cache_config), // propSize + nullptr, // pProperties + &cache_config, // pPropValue + }; + + // Create a new USM allocation to set indirect access for + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &new_shared_ptr)); + ASSERT_NE(new_shared_ptr, nullptr); + void *pointers = {new_shared_ptr}; + new_exec_info_descs[2] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC, // stype + nullptr, // pNext + UR_KERNEL_EXEC_INFO_USM_PTRS, // propName + sizeof(pointers), // propSize + nullptr, // pProperties + &pointers, // pPropValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + 3, // numNewExecInfos + 0, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + new_exec_info_descs, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify results are correct, although exec info modifications should + // have no effect on output + Validate((uint32_t *)shared_ptr, global_size, val); +} + +// Test updating a command-buffer with multiple USM fill kernel commands +struct USMMultipleFillCommandTest + : uur::command_buffer::urUpdatableCommandBufferExpExecutionTest { + void SetUp() override { + program_name = "fill_usm"; + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::SetUp()); + + ur_device_usm_access_capability_flags_t shared_usm_flags; + ASSERT_SUCCESS( + uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); + if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { + GTEST_SKIP() << "Shared USM is not supported."; + } + + // Create a single USM allocation which will be used by all kernels + // by accessing at pointer offsets + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &shared_ptr)); + ASSERT_NE(shared_ptr, nullptr); + std::memset(shared_ptr, 0, allocation_size); + + // Append multiple kernel commands to command-buffer + for (size_t k = 0; k < num_kernels; k++) { + // Calculate offset into output allocation, and set as + // kernel output. + void *offset_ptr = (uint32_t *)shared_ptr + (k * elements); + ASSERT_SUCCESS( + urKernelSetArgPointer(kernel, 0, nullptr, &offset_ptr)); + + // Each kernel has a unique fill value + uint32_t fill_val = val + k; + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(fill_val), + nullptr, &fill_val)); + + // Append kernel and store returned handle + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &elements, &local_size, 0, nullptr, nullptr, + &command_handles[k])); + ASSERT_NE(command_handles[k], nullptr); + } + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + + void Validate(uint32_t *pointer, size_t length, uint32_t val) { + for (size_t i = 0; i < length; i++) { + ASSERT_EQ(pointer[i], val); + } + } + + void TearDown() override { + if (shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); + } + + if (new_shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, new_shared_ptr)); + } + + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::TearDown()); + } + + static constexpr uint32_t val = 42; + static constexpr size_t local_size = 4; + static constexpr size_t global_size = 64; + static constexpr size_t global_offset = 0; + static constexpr size_t n_dimensions = 1; + static constexpr size_t allocation_size = sizeof(val) * global_size; + static constexpr size_t num_kernels = 8; + static constexpr size_t elements = global_size / num_kernels; + + void *shared_ptr = nullptr; + void *new_shared_ptr = nullptr; + std::array + command_handles; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(USMMultipleFillCommandTest); + +// Test updating all the kernels commands in the command-buffer +TEST_P(USMMultipleFillCommandTest, UpdateAllKernels) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + uint32_t *output = (uint32_t *)shared_ptr; + for (size_t i = 0; i < global_size; i++) { + const uint32_t expected = val + (i / elements); + ASSERT_EQ(expected, output[i]); + } + + // Create a new USM allocation to update kernel outputs to + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &new_shared_ptr)); + ASSERT_NE(new_shared_ptr, nullptr); + std::memset(new_shared_ptr, 0, allocation_size); + + // Update each kernel in the command-buffer. + uint32_t new_val = 33; + for (size_t k = 0; k < num_kernels; k++) { + // Update output pointer to an offset into new USM allocation + void *offset_ptr = (uint32_t *)new_shared_ptr + (k * elements); + ur_exp_command_buffer_update_pointer_arg_desc_t new_output_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 0, // argIndex + nullptr, // pProperties + &offset_ptr, // pArgValue + }; + + // Update fill value + uint32_t new_fill_val = new_val + k; + ur_exp_command_buffer_update_value_arg_desc_t new_input_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 1, // argIndex + sizeof(int), // argSize + nullptr, // pProperties + &new_fill_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 1, // numNewPointerArgs + 1, // numNewValueArgs + 0, // numNewExecInfos + 0, // newWorkDim + nullptr, // pNewMemObjArgList + &new_output_desc, // pNewPointerArgList + &new_input_desc, // pNewValueArgList + nullptr, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(command_handles[k], + &update_desc)); + } + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + uint32_t *updated_output = (uint32_t *)new_shared_ptr; + for (size_t i = 0; i < global_size; i++) { + uint32_t expected = new_val + (i / elements); + ASSERT_EQ(expected, updated_output[i]) << i; + } +} diff --git a/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp new file mode 100644 index 0000000000..b3f9f93fe1 --- /dev/null +++ b/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp @@ -0,0 +1,164 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "fixtures.h" +#include + +// Test that updating a command-buffer with a single kernel command +// taking USM & scalar arguments works correctly. +struct USMSaxpyKernelTest + : uur::command_buffer::urUpdatableCommandBufferExpExecutionTest { + void SetUp() override { + program_name = "saxpy_usm"; + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::SetUp()); + + ur_device_usm_access_capability_flags_t shared_usm_flags; + ASSERT_SUCCESS( + uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); + if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { + GTEST_SKIP() << "Shared USM is not supported."; + } + + const size_t allocation_size = sizeof(uint32_t) * global_size; + for (auto &shared_ptr : shared_ptrs) { + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &shared_ptr)); + ASSERT_NE(shared_ptr, nullptr); + + std::vector pattern(allocation_size); + uur::generateMemFillPattern(pattern); + std::memcpy(shared_ptr, pattern.data(), allocation_size); + } + + // Index 0 is output + ASSERT_SUCCESS( + urKernelSetArgPointer(kernel, 0, nullptr, &shared_ptrs[0])); + // Index 1 is A + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(A), nullptr, &A)); + // Index 2 is X + ASSERT_SUCCESS( + urKernelSetArgPointer(kernel, 2, nullptr, &shared_ptrs[1])); + // Index 3 is Y + ASSERT_SUCCESS( + urKernelSetArgPointer(kernel, 3, nullptr, &shared_ptrs[2])); + + // Append kernel command to command-buffer and close command-buffer + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &global_size, &local_size, 0, nullptr, nullptr, &command_handle)); + ASSERT_NE(command_handle, nullptr); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + + void Validate(uint32_t *output, uint32_t *X, uint32_t *Y, uint32_t A, + size_t length) { + for (size_t i = 0; i < length; i++) { + uint32_t result = A * X[i] + Y[i]; + ASSERT_EQ(result, output[i]); + } + } + + void TearDown() override { + for (auto &shared_ptr : shared_ptrs) { + if (shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); + } + } + + if (command_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); + } + + UUR_RETURN_ON_FATAL_FAILURE( + urUpdatableCommandBufferExpExecutionTest::TearDown()); + } + + static constexpr size_t local_size = 4; + static constexpr size_t global_size = 32; + static constexpr size_t global_offset = 0; + static constexpr size_t n_dimensions = 1; + static constexpr uint32_t A = 42; + std::array shared_ptrs = {nullptr, nullptr, nullptr, nullptr}; + ur_exp_command_buffer_command_handle_t command_handle = nullptr; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(USMSaxpyKernelTest); + +TEST_P(USMSaxpyKernelTest, UpdateParameters) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + uint32_t *output = (uint32_t *)shared_ptrs[0]; + uint32_t *X = (uint32_t *)shared_ptrs[1]; + uint32_t *Y = (uint32_t *)shared_ptrs[2]; + Validate(output, X, Y, A, global_size); + + // Update inputs + ur_exp_command_buffer_update_pointer_arg_desc_t new_input_descs[2]; + + // New X at index 2 + new_input_descs[0] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + nullptr, // pProperties + &shared_ptrs[3], // pArgValue + }; + + // New Y at index 3 + new_input_descs[1] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 3, // argIndex + nullptr, // pProperties + &shared_ptrs[4], // pArgValue + }; + + // New A at index 1 + uint32_t new_A = 33; + ur_exp_command_buffer_update_value_arg_desc_t new_A_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 1, // argIndex + sizeof(new_A), // argSize + nullptr, // pProperties + &new_A, // hArgValue + }; + + // Update kernel inputs + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 2, // numNewPointerArgs + 1, // numNewValueArgs + 0, // numNewExecInfos + 0, // newWorkDim + nullptr, // pNewMemObjArgList + new_input_descs, // pNewPointerArgList + &new_A_desc, // pNewValueArgList + nullptr, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + uint32_t *new_output = (uint32_t *)shared_ptrs[0]; + uint32_t *new_X = (uint32_t *)shared_ptrs[3]; + uint32_t *new_Y = (uint32_t *)shared_ptrs[4]; + Validate(new_output, new_X, new_Y, new_A, global_size); +} diff --git a/test/conformance/testing/include/uur/utils.h b/test/conformance/testing/include/uur/utils.h index 4b7649559f..79620e4b11 100644 --- a/test/conformance/testing/include/uur/utils.h +++ b/test/conformance/testing/include/uur/utils.h @@ -118,6 +118,21 @@ auto GetPoolInfo = return GetInfo(pool, info, urUSMPoolGetInfo, out_value); }; +template +auto GetCommandBufferInfo = [](ur_exp_command_buffer_handle_t cmd_buf, + ur_exp_command_buffer_info_t info, + T &out_value) { + return GetInfo(cmd_buf, info, urCommandBufferGetInfoExp, out_value); +}; + +template +auto GetCommandBufferCommandInfo = + [](ur_exp_command_buffer_command_handle_t command, + ur_exp_command_buffer_command_info_t info, T &out_value) { + return GetInfo(command, info, urCommandBufferCommandGetInfoExp, + out_value); + }; + template ur_result_t GetObjectReferenceCount(T object, uint32_t &out_ref_count) { if constexpr (std::is_same_v) { @@ -152,6 +167,16 @@ ur_result_t GetObjectReferenceCount(T object, uint32_t &out_ref_count) { return GetPoolInfo(object, UR_USM_POOL_INFO_REFERENCE_COUNT, out_ref_count); } + if constexpr (std::is_same_v) { + return GetCommandBufferInfo( + object, UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT, out_ref_count); + } + if constexpr (std::is_same_v) { + return GetCommandBufferCommandInfo( + object, UR_EXP_COMMAND_BUFFER_COMMAND_INFO_REFERENCE_COUNT, + out_ref_count); + } + return UR_RESULT_ERROR_INVALID_VALUE; } diff --git a/tools/urinfo/urinfo.hpp b/tools/urinfo/urinfo.hpp index ff024978ca..15894cafb8 100644 --- a/tools/urinfo/urinfo.hpp +++ b/tools/urinfo/urinfo.hpp @@ -329,6 +329,12 @@ inline void printDeviceInfos(ur_device_handle_t hDevice, printDeviceInfo(hDevice, UR_DEVICE_INFO_COMPOSITE_DEVICE); std::cout << prefix; + printDeviceInfo(hDevice, + UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP); + std::cout << prefix; + printDeviceInfo( + hDevice, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP); + std::cout << prefix; printDeviceInfo(hDevice, UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP); std::cout << prefix;