diff --git a/include/ur_api.h b/include/ur_api.h index 5c9c7af5da..41c0054c04 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -196,6 +196,7 @@ typedef enum ur_function_t { UR_FUNCTION_ADAPTER_RETAIN = 179, ///< Enumerator for ::urAdapterRetain UR_FUNCTION_ADAPTER_GET_LAST_ERROR = 180, ///< Enumerator for ::urAdapterGetLastError UR_FUNCTION_ADAPTER_GET_INFO = 181, ///< Enumerator for ::urAdapterGetInfo + UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP = 182, ///< Enumerator for ::urCommandBufferUpdateKernelLaunchExp UR_FUNCTION_PROGRAM_BUILD_EXP = 197, ///< Enumerator for ::urProgramBuildExp UR_FUNCTION_PROGRAM_COMPILE_EXP = 198, ///< Enumerator for ::urProgramCompileExp UR_FUNCTION_PROGRAM_LINK_EXP = 199, ///< Enumerator for ::urProgramLinkExp @@ -224,48 +225,53 @@ typedef enum ur_function_t { /////////////////////////////////////////////////////////////////////////////// /// @brief Defines structure types typedef enum ur_structure_type_t { - UR_STRUCTURE_TYPE_CONTEXT_PROPERTIES = 0, ///< ::ur_context_properties_t - UR_STRUCTURE_TYPE_IMAGE_DESC = 1, ///< ::ur_image_desc_t - UR_STRUCTURE_TYPE_BUFFER_PROPERTIES = 2, ///< ::ur_buffer_properties_t - UR_STRUCTURE_TYPE_BUFFER_REGION = 3, ///< ::ur_buffer_region_t - UR_STRUCTURE_TYPE_BUFFER_CHANNEL_PROPERTIES = 4, ///< ::ur_buffer_channel_properties_t - UR_STRUCTURE_TYPE_BUFFER_ALLOC_LOCATION_PROPERTIES = 5, ///< ::ur_buffer_alloc_location_properties_t - UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES = 6, ///< ::ur_program_properties_t - UR_STRUCTURE_TYPE_USM_DESC = 7, ///< ::ur_usm_desc_t - UR_STRUCTURE_TYPE_USM_HOST_DESC = 8, ///< ::ur_usm_host_desc_t - UR_STRUCTURE_TYPE_USM_DEVICE_DESC = 9, ///< ::ur_usm_device_desc_t - UR_STRUCTURE_TYPE_USM_POOL_DESC = 10, ///< ::ur_usm_pool_desc_t - UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC = 11, ///< ::ur_usm_pool_limits_desc_t - UR_STRUCTURE_TYPE_DEVICE_BINARY = 12, ///< ::ur_device_binary_t - UR_STRUCTURE_TYPE_SAMPLER_DESC = 13, ///< ::ur_sampler_desc_t - UR_STRUCTURE_TYPE_QUEUE_PROPERTIES = 14, ///< ::ur_queue_properties_t - UR_STRUCTURE_TYPE_QUEUE_INDEX_PROPERTIES = 15, ///< ::ur_queue_index_properties_t - UR_STRUCTURE_TYPE_CONTEXT_NATIVE_PROPERTIES = 16, ///< ::ur_context_native_properties_t - UR_STRUCTURE_TYPE_KERNEL_NATIVE_PROPERTIES = 17, ///< ::ur_kernel_native_properties_t - UR_STRUCTURE_TYPE_QUEUE_NATIVE_PROPERTIES = 18, ///< ::ur_queue_native_properties_t - UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES = 19, ///< ::ur_mem_native_properties_t - UR_STRUCTURE_TYPE_EVENT_NATIVE_PROPERTIES = 20, ///< ::ur_event_native_properties_t - UR_STRUCTURE_TYPE_PLATFORM_NATIVE_PROPERTIES = 21, ///< ::ur_platform_native_properties_t - UR_STRUCTURE_TYPE_DEVICE_NATIVE_PROPERTIES = 22, ///< ::ur_device_native_properties_t - UR_STRUCTURE_TYPE_PROGRAM_NATIVE_PROPERTIES = 23, ///< ::ur_program_native_properties_t - UR_STRUCTURE_TYPE_SAMPLER_NATIVE_PROPERTIES = 24, ///< ::ur_sampler_native_properties_t - UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC = 25, ///< ::ur_queue_native_desc_t - UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES = 26, ///< ::ur_device_partition_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES = 27, ///< ::ur_kernel_arg_mem_obj_properties_t - UR_STRUCTURE_TYPE_PHYSICAL_MEM_PROPERTIES = 28, ///< ::ur_physical_mem_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_POINTER_PROPERTIES = 29, ///< ::ur_kernel_arg_pointer_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_SAMPLER_PROPERTIES = 30, ///< ::ur_kernel_arg_sampler_properties_t - UR_STRUCTURE_TYPE_KERNEL_EXEC_INFO_PROPERTIES = 31, ///< ::ur_kernel_exec_info_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_VALUE_PROPERTIES = 32, ///< ::ur_kernel_arg_value_properties_t - UR_STRUCTURE_TYPE_KERNEL_ARG_LOCAL_PROPERTIES = 33, ///< ::ur_kernel_arg_local_properties_t - UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC = 35, ///< ::ur_usm_alloc_location_desc_t - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC = 0x1000, ///< ::ur_exp_command_buffer_desc_t - UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES = 0x2000, ///< ::ur_exp_sampler_mip_properties_t - UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC = 0x2001, ///< ::ur_exp_interop_mem_desc_t - UR_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC = 0x2002, ///< ::ur_exp_interop_semaphore_desc_t - UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR = 0x2003, ///< ::ur_exp_file_descriptor_t - UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE = 0x2004, ///< ::ur_exp_win32_handle_t - UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES = 0x2005, ///< ::ur_exp_sampler_addr_modes_t + UR_STRUCTURE_TYPE_CONTEXT_PROPERTIES = 0, ///< ::ur_context_properties_t + UR_STRUCTURE_TYPE_IMAGE_DESC = 1, ///< ::ur_image_desc_t + UR_STRUCTURE_TYPE_BUFFER_PROPERTIES = 2, ///< ::ur_buffer_properties_t + UR_STRUCTURE_TYPE_BUFFER_REGION = 3, ///< ::ur_buffer_region_t + UR_STRUCTURE_TYPE_BUFFER_CHANNEL_PROPERTIES = 4, ///< ::ur_buffer_channel_properties_t + UR_STRUCTURE_TYPE_BUFFER_ALLOC_LOCATION_PROPERTIES = 5, ///< ::ur_buffer_alloc_location_properties_t + UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES = 6, ///< ::ur_program_properties_t + UR_STRUCTURE_TYPE_USM_DESC = 7, ///< ::ur_usm_desc_t + UR_STRUCTURE_TYPE_USM_HOST_DESC = 8, ///< ::ur_usm_host_desc_t + UR_STRUCTURE_TYPE_USM_DEVICE_DESC = 9, ///< ::ur_usm_device_desc_t + UR_STRUCTURE_TYPE_USM_POOL_DESC = 10, ///< ::ur_usm_pool_desc_t + UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC = 11, ///< ::ur_usm_pool_limits_desc_t + UR_STRUCTURE_TYPE_DEVICE_BINARY = 12, ///< ::ur_device_binary_t + UR_STRUCTURE_TYPE_SAMPLER_DESC = 13, ///< ::ur_sampler_desc_t + UR_STRUCTURE_TYPE_QUEUE_PROPERTIES = 14, ///< ::ur_queue_properties_t + UR_STRUCTURE_TYPE_QUEUE_INDEX_PROPERTIES = 15, ///< ::ur_queue_index_properties_t + UR_STRUCTURE_TYPE_CONTEXT_NATIVE_PROPERTIES = 16, ///< ::ur_context_native_properties_t + UR_STRUCTURE_TYPE_KERNEL_NATIVE_PROPERTIES = 17, ///< ::ur_kernel_native_properties_t + UR_STRUCTURE_TYPE_QUEUE_NATIVE_PROPERTIES = 18, ///< ::ur_queue_native_properties_t + UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES = 19, ///< ::ur_mem_native_properties_t + UR_STRUCTURE_TYPE_EVENT_NATIVE_PROPERTIES = 20, ///< ::ur_event_native_properties_t + UR_STRUCTURE_TYPE_PLATFORM_NATIVE_PROPERTIES = 21, ///< ::ur_platform_native_properties_t + UR_STRUCTURE_TYPE_DEVICE_NATIVE_PROPERTIES = 22, ///< ::ur_device_native_properties_t + UR_STRUCTURE_TYPE_PROGRAM_NATIVE_PROPERTIES = 23, ///< ::ur_program_native_properties_t + UR_STRUCTURE_TYPE_SAMPLER_NATIVE_PROPERTIES = 24, ///< ::ur_sampler_native_properties_t + UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC = 25, ///< ::ur_queue_native_desc_t + UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES = 26, ///< ::ur_device_partition_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES = 27, ///< ::ur_kernel_arg_mem_obj_properties_t + UR_STRUCTURE_TYPE_PHYSICAL_MEM_PROPERTIES = 28, ///< ::ur_physical_mem_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_POINTER_PROPERTIES = 29, ///< ::ur_kernel_arg_pointer_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_SAMPLER_PROPERTIES = 30, ///< ::ur_kernel_arg_sampler_properties_t + UR_STRUCTURE_TYPE_KERNEL_EXEC_INFO_PROPERTIES = 31, ///< ::ur_kernel_exec_info_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_VALUE_PROPERTIES = 32, ///< ::ur_kernel_arg_value_properties_t + UR_STRUCTURE_TYPE_KERNEL_ARG_LOCAL_PROPERTIES = 33, ///< ::ur_kernel_arg_local_properties_t + UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC = 35, ///< ::ur_usm_alloc_location_desc_t + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC = 0x1000, ///< ::ur_exp_command_buffer_desc_t + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC = 0x1001, ///< ::ur_exp_command_buffer_update_kernel_launch_desc_t + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC = 0x1002, ///< ::ur_exp_command_buffer_update_memobj_arg_desc_t + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC = 0x1003, ///< ::ur_exp_command_buffer_update_pointer_arg_desc_t + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC = 0x1004, ///< ::ur_exp_command_buffer_update_value_arg_desc_t + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC = 0x1005, ///< ::ur_exp_command_buffer_update_exec_info_desc_t + UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES = 0x2000, ///< ::ur_exp_sampler_mip_properties_t + UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC = 0x2001, ///< ::ur_exp_interop_mem_desc_t + UR_STRUCTURE_TYPE_EXP_INTEROP_SEMAPHORE_DESC = 0x2002, ///< ::ur_exp_interop_semaphore_desc_t + UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR = 0x2003, ///< ::ur_exp_file_descriptor_t + UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE = 0x2004, ///< ::ur_exp_win32_handle_t + UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES = 0x2005, ///< ::ur_exp_sampler_addr_modes_t /// @cond UR_STRUCTURE_TYPE_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -483,6 +489,7 @@ typedef enum ur_result_t { UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP = 0x1000, ///< Invalid Command-Buffer UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP = 0x1001, ///< Sync point is not valid for the command-buffer UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP = 0x1002, ///< Sync point wait list is invalid + UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP = 0x1003, ///< Handle to command-buffer command is invalid UR_RESULT_ERROR_UNKNOWN = 0x7ffffffe, ///< Unknown or internal error /// @cond UR_RESULT_FORCE_UINT32 = 0x7fffffff @@ -1530,6 +1537,10 @@ typedef enum ur_device_info_t { ///< version than older devices. UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT = 114, ///< [::ur_bool_t] return true if the device supports virtual memory. UR_DEVICE_INFO_ESIMD_SUPPORT = 115, ///< [::ur_bool_t] return true if the device supports ESIMD. + UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP = 0x1000, ///< [::ur_bool_t] Returns true if the device supports the use of + ///< command-buffers. + UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP = 0x1001, ///< [::ur_bool_t] Returns true if the device supports updating the kernel + ///< commands in a command-buffer. UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP = 0x2000, ///< [::ur_bool_t] returns true if the device supports the creation of ///< bindless images UR_DEVICE_INFO_BINDLESS_IMAGES_SHARED_USM_SUPPORT_EXP = 0x2001, ///< [::ur_bool_t] returns true if the device supports the creation of @@ -7762,9 +7773,90 @@ typedef struct ur_exp_command_buffer_desc_t { ur_structure_type_t stype; ///< [in] type of this structure, must be ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC const void *pNext; ///< [in][optional] pointer to extension-specific structure + ur_bool_t isUpdatable; ///< [in] Commands in a finalized command-buffer can be updated. } ur_exp_command_buffer_desc_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Descriptor type for updating a kernel command memobj argument. +typedef struct ur_exp_command_buffer_update_memobj_arg_desc_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC + const void *pNext; ///< [in][optional] pointer to extension-specific structure + uint32_t argIndex; ///< [in] Argument index. + const ur_kernel_arg_mem_obj_properties_t *pProperties; ///< [in][optinal] Pointer to memory object properties. + ur_mem_handle_t hArgValue; ///< [in][optional] Handle of memory object. + +} ur_exp_command_buffer_update_memobj_arg_desc_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Descriptor type for updating a kernel command pointer argument. +typedef struct ur_exp_command_buffer_update_pointer_arg_desc_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC + const void *pNext; ///< [in][optional] pointer to extension-specific structure + uint32_t argIndex; ///< [in] Argument index. + const ur_kernel_arg_pointer_properties_t *pProperties; ///< [in][optinal] Pointer to USM pointer properties. + const void *pArgValue; ///< [in][optional] USM pointer to memory location holding the argument + ///< value. + +} ur_exp_command_buffer_update_pointer_arg_desc_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Descriptor type for updating a kernel command value argument. +typedef struct ur_exp_command_buffer_update_value_arg_desc_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC + const void *pNext; ///< [in][optional] pointer to extension-specific structure + uint32_t argIndex; ///< [in] Argument index. + uint32_t argSize; ///< [in] Argument size. + const ur_kernel_arg_value_properties_t *pProperties; ///< [in][optinal] Pointer to value properties. + const void *pArgValue; ///< [in][optional] Argument value representing kernel arg type. + +} ur_exp_command_buffer_update_value_arg_desc_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Descriptor type for updating kernel command execution info. +typedef struct ur_exp_command_buffer_update_exec_info_desc_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC + const void *pNext; ///< [in][optional] pointer to extension-specific structure + ur_kernel_exec_info_t propName; ///< [in] Name of execution attribute. + size_t propSize; ///< [in] Size of execution attribute. + const ur_kernel_exec_info_properties_t *pProperties; ///< [in][optional] Pointer to execution info properties. + const void *pPropValue; ///< [in] Pointer to memory location holding the property value. + +} ur_exp_command_buffer_update_exec_info_desc_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Descriptor type for updating a kernel launch command. +typedef struct ur_exp_command_buffer_update_kernel_launch_desc_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC + const void *pNext; ///< [in][optional] pointer to extension-specific structure + uint32_t numMemobjArgs; ///< [in] Length of pArgMemobjList. + uint32_t numPointerArgs; ///< [in] Length of pArgPointerList. + uint32_t numValueArgs; ///< [in] Length of pArgValueList. + uint32_t numExecInfos; ///< [in] Length of pExecInfoList. + uint32_t workDim; ///< [in] Number of work dimensions in the kernel ND-range, from 1-3. + const ur_exp_command_buffer_update_memobj_arg_desc_t *pArgMemobjList; ///< [in][optional][range(0, numMemobjArgs)] An array describing the new + ///< kernel mem obj arguments for the command. + const ur_exp_command_buffer_update_pointer_arg_desc_t *pArgPointerList; ///< [in][optional][range(0, numPointerArgs)] An array describing the new + ///< kernel pointer arguments for the command. + const ur_exp_command_buffer_update_value_arg_desc_t *pArgValueList; ///< [in][optional][range(0, numValueArgs)]An array describing the new + ///< kernel value arguments for the command. + const ur_exp_command_buffer_update_exec_info_desc_t *pArgExecInfoList; ///< [in][optional] An array describing the execution info objects for the + ///< command. + size_t *pGlobalWorkOffset; ///< [in][optional][range(0, workDim)] Array of workDim unsigned values + ///< that describe the offset used to calculate the global ID. + size_t *pGlobalWorkSize; ///< [in][optional][range(0, workDim)] Array of workDim unsigned values + ///< that describe the number of global work-items. + size_t *pLocalWorkSize; ///< [in][optional][range(0, workDim)] Array of workDim unsigned values + ///< that describe the number of work-items that make up a work-group. If + ///< nullptr, the runtime implementation will choose the work-group size. + +} ur_exp_command_buffer_update_kernel_launch_desc_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief A value that identifies a command inside of a command-buffer, used for /// defining dependencies between commands in the same command-buffer. @@ -7774,11 +7866,15 @@ typedef uint32_t ur_exp_command_buffer_sync_point_t; /// @brief Handle of Command-Buffer object typedef struct ur_exp_command_buffer_handle_t_ *ur_exp_command_buffer_handle_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Handle of a Command-Buffer command +typedef struct ur_exp_command_buffer_command_handle_t_ *ur_exp_command_buffer_command_handle_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Create a Command-Buffer object /// /// @details -/// - Create a command-buffer object +/// - Create a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7796,10 +7892,10 @@ typedef struct ur_exp_command_buffer_handle_t_ *ur_exp_command_buffer_handle_t; /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object - const ur_exp_command_buffer_desc_t *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor - ur_exp_command_buffer_handle_t *phCommandBuffer ///< [out] pointer to Command-Buffer handle + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. + const ur_exp_command_buffer_desc_t *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. + ur_exp_command_buffer_handle_t *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ); /////////////////////////////////////////////////////////////////////////////// @@ -7817,7 +7913,7 @@ urCommandBufferCreateExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainExp( - ur_exp_command_buffer_handle_t hCommandBuffer ///< [in] handle of the command-buffer object + ur_exp_command_buffer_handle_t hCommandBuffer ///< [in] Handle of the command-buffer object. ); /////////////////////////////////////////////////////////////////////////////// @@ -7836,7 +7932,7 @@ urCommandBufferRetainExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseExp( - ur_exp_command_buffer_handle_t hCommandBuffer ///< [in] handle of the command-buffer object + ur_exp_command_buffer_handle_t hCommandBuffer ///< [in] Handle of the command-buffer object. ); /////////////////////////////////////////////////////////////////////////////// @@ -7855,11 +7951,11 @@ urCommandBufferReleaseExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferFinalizeExp( - ur_exp_command_buffer_handle_t hCommandBuffer ///< [in] handle of the command-buffer object + ur_exp_command_buffer_handle_t hCommandBuffer ///< [in] Handle of the command-buffer object. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a kernel execution command to a command-buffer object +/// @brief Append a kernel execution command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7886,19 +7982,20 @@ urCommandBufferFinalizeExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t *pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. const size_t *pLocalWorkSize, ///< [in] Local work size to use when executing kernel. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t *phCommand ///< [out][optional] Handle to this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM memcpy command to a command-buffer object +/// @brief Append a USM memcpy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7923,17 +8020,17 @@ urCommandBufferAppendKernelLaunchExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM fill command to a command-buffer object +/// @brief Append a USM fill command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7972,7 +8069,7 @@ urCommandBufferAppendUSMFillExp( ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory copy command to a command-buffer object +/// @brief Append a memory copy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7993,7 +8090,7 @@ urCommandBufferAppendUSMFillExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -8001,11 +8098,11 @@ urCommandBufferAppendMemBufferCopyExp( size_t size, ///< [in] The number of bytes to be copied. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory write command to a command-buffer object +/// @brief Append a memory write command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8027,18 +8124,18 @@ urCommandBufferAppendMemBufferCopyExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - const void *pSrc, ///< [in] pointer to host memory where data is to be written from. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + const void *pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory read command to a command-buffer object +/// @brief Append a memory read command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8060,18 +8157,18 @@ urCommandBufferAppendMemBufferWriteExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory copy command to a command-buffer object +/// @brief Append a rectangular memory copy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8092,7 +8189,7 @@ urCommandBufferAppendMemBufferReadExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t srcOrigin, ///< [in] Origin for the region of data to be copied from the source. @@ -8104,11 +8201,11 @@ urCommandBufferAppendMemBufferCopyRectExp( size_t dstSlicePitch, ///< [in] Slice pitch of the destination memory. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory write command to a command-buffer object +/// @brief Append a rectangular memory write command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8130,26 +8227,26 @@ urCommandBufferAppendMemBufferCopyRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. - size_t bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. - size_t bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + size_t bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. + size_t bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. - size_t hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + size_t hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. - size_t hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + size_t hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. - void *pSrc, ///< [in] pointer to host memory where data is to be written from. + void *pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory read command to a command-buffer object +/// @brief Append a rectangular memory read command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8171,25 +8268,25 @@ urCommandBufferAppendMemBufferWriteRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. - size_t bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. - size_t bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. - size_t hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + size_t bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. + size_t bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. + size_t hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. - size_t hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + size_t hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory fill command to a command-buffer object +/// @brief Append a memory fill command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -8225,7 +8322,7 @@ urCommandBufferAppendMemBufferFillExp( ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM Prefetch command to a command-buffer object +/// @brief Append a USM Prefetch command to a command-buffer object. /// /// @details /// - Prefetching may not be supported for all devices or allocation types. @@ -8266,7 +8363,7 @@ urCommandBufferAppendUSMPrefetchExp( ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM Advise command to a command-buffer object +/// @brief Append a USM Advise command to a command-buffer object. /// /// @details /// - Not all memory advice hints may be supported for all devices or @@ -8328,17 +8425,48 @@ urCommandBufferAppendUSMAdviseExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_queue_handle_t hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + ur_exp_command_buffer_handle_t hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_queue_handle_t hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t *phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. ); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Update a kernel launch command. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pUpdateKernelLaunch` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If update functionality is not supported by the device. +/// - ::UR_RESULT_ERROR_INVALID_OPERATION +/// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX +/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION +/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t *pUpdateKernelLaunch ///< [in] Struct defining how the kernel command is to be updated. +); + #if !defined(__GNUC__) #pragma endregion #endif @@ -10497,6 +10625,7 @@ typedef struct ur_command_buffer_append_kernel_launch_exp_params_t { uint32_t *pnumSyncPointsInWaitList; const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; ur_exp_command_buffer_sync_point_t **ppSyncPoint; + ur_exp_command_buffer_command_handle_t **pphCommand; } ur_command_buffer_append_kernel_launch_exp_params_t; /////////////////////////////////////////////////////////////////////////////// @@ -10690,6 +10819,15 @@ typedef struct ur_command_buffer_enqueue_exp_params_t { ur_event_handle_t **pphEvent; } ur_command_buffer_enqueue_exp_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urCommandBufferUpdateKernelLaunchExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_command_buffer_update_kernel_launch_exp_params_t { + ur_exp_command_buffer_command_handle_t *phCommand; + const ur_exp_command_buffer_update_kernel_launch_desc_t **ppUpdateKernelLaunch; +} ur_command_buffer_update_kernel_launch_exp_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urUsmP2PEnablePeerAccessExp /// @details Each entry is a pointer to the parameter passed to the function; diff --git a/include/ur_ddi.h b/include/ur_ddi.h index 92fc742f72..f8962efab8 100644 --- a/include/ur_ddi.h +++ b/include/ur_ddi.h @@ -1853,7 +1853,8 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendKernelLaunchExp_t)( const size_t *, uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *); + ur_exp_command_buffer_sync_point_t *, + ur_exp_command_buffer_command_handle_t *); /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urCommandBufferAppendUSMMemcpyExp @@ -2010,6 +2011,12 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferEnqueueExp_t)( const ur_event_handle_t *, ur_event_handle_t *); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urCommandBufferUpdateKernelLaunchExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferUpdateKernelLaunchExp_t)( + ur_exp_command_buffer_command_handle_t, + const ur_exp_command_buffer_update_kernel_launch_desc_t *); + /////////////////////////////////////////////////////////////////////////////// /// @brief Table of CommandBufferExp functions pointers typedef struct ur_command_buffer_exp_dditable_t { @@ -2030,6 +2037,7 @@ typedef struct ur_command_buffer_exp_dditable_t { ur_pfnCommandBufferAppendUSMPrefetchExp_t pfnAppendUSMPrefetchExp; ur_pfnCommandBufferAppendUSMAdviseExp_t pfnAppendUSMAdviseExp; ur_pfnCommandBufferEnqueueExp_t pfnEnqueueExp; + ur_pfnCommandBufferUpdateKernelLaunchExp_t pfnUpdateKernelLaunchExp; } ur_command_buffer_exp_dditable_t; /////////////////////////////////////////////////////////////////////////////// diff --git a/include/ur_print.hpp b/include/ur_print.hpp index 70e5b9886d..8459a88f1b 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -58,6 +58,8 @@ template <> struct is_handle : std::true_type {}; template <> struct is_handle : std::true_type {}; +template <> +struct is_handle : std::true_type {}; template inline constexpr bool is_handle_v = is_handle::value; template @@ -319,6 +321,11 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_interop_mem_desc_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_interop_semaphore_desc_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_memobj_arg_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_pointer_arg_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_value_arg_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_exec_info_desc_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_update_kernel_launch_desc_t params); inline std::ostream &operator<<(std::ostream &os, ur_exp_peer_info_t value); /////////////////////////////////////////////////////////////////////////////// @@ -822,6 +829,9 @@ inline std::ostream &operator<<(std::ostream &os, ur_function_t value) { case UR_FUNCTION_ADAPTER_GET_INFO: os << "UR_FUNCTION_ADAPTER_GET_INFO"; break; + case UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP"; + break; case UR_FUNCTION_PROGRAM_BUILD_EXP: os << "UR_FUNCTION_PROGRAM_BUILD_EXP"; break; @@ -999,6 +1009,21 @@ inline std::ostream &operator<<(std::ostream &os, ur_structure_type_t value) { case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC: os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC"; break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC: + os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC"; + break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC: + os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC"; + break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC: + os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC"; + break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC: + os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC"; + break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC: + os << "UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC"; + break; case UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES: os << "UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES"; break; @@ -1214,6 +1239,31 @@ inline ur_result_t printStruct(std::ostream &os, const void *ptr) { printPtr(os, pstruct); } break; + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC: { + const ur_exp_command_buffer_update_kernel_launch_desc_t *pstruct = (const ur_exp_command_buffer_update_kernel_launch_desc_t *)ptr; + printPtr(os, pstruct); + } break; + + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC: { + const ur_exp_command_buffer_update_memobj_arg_desc_t *pstruct = (const ur_exp_command_buffer_update_memobj_arg_desc_t *)ptr; + printPtr(os, pstruct); + } break; + + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC: { + const ur_exp_command_buffer_update_pointer_arg_desc_t *pstruct = (const ur_exp_command_buffer_update_pointer_arg_desc_t *)ptr; + printPtr(os, pstruct); + } break; + + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC: { + const ur_exp_command_buffer_update_value_arg_desc_t *pstruct = (const ur_exp_command_buffer_update_value_arg_desc_t *)ptr; + printPtr(os, pstruct); + } break; + + case UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC: { + const ur_exp_command_buffer_update_exec_info_desc_t *pstruct = (const ur_exp_command_buffer_update_exec_info_desc_t *)ptr; + printPtr(os, pstruct); + } break; + case UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES: { const ur_exp_sampler_mip_properties_t *pstruct = (const ur_exp_sampler_mip_properties_t *)ptr; printPtr(os, pstruct); @@ -1472,6 +1522,9 @@ inline std::ostream &operator<<(std::ostream &os, ur_result_t value) { case UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP: os << "UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP"; break; + case UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP: + os << "UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP"; + break; case UR_RESULT_ERROR_UNKNOWN: os << "UR_RESULT_ERROR_UNKNOWN"; break; @@ -2401,6 +2454,12 @@ inline std::ostream &operator<<(std::ostream &os, ur_device_info_t value) { case UR_DEVICE_INFO_ESIMD_SUPPORT: os << "UR_DEVICE_INFO_ESIMD_SUPPORT"; break; + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: + os << "UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP"; + break; + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: + os << "UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP"; + break; case UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP: os << "UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP"; break; @@ -3809,6 +3868,30 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info os << ")"; } break; + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: { + const ur_bool_t *tptr = (const ur_bool_t *)ptr; + if (sizeof(ur_bool_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: { + const ur_bool_t *tptr = (const ur_bool_t *)ptr; + if (sizeof(ur_bool_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; case UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { @@ -9137,6 +9220,279 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_bu ur::details::printStruct(os, (params.pNext)); + os << ", "; + os << ".isUpdatable = "; + + os << (params.isUpdatable); + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_command_buffer_update_memobj_arg_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_buffer_update_memobj_arg_desc_t params) { + os << "(struct ur_exp_command_buffer_update_memobj_arg_desc_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, + (params.pNext)); + + os << ", "; + os << ".argIndex = "; + + os << (params.argIndex); + + os << ", "; + os << ".pProperties = "; + + os << (params.pProperties); + + os << ", "; + os << ".hArgValue = "; + + ur::details::printPtr(os, + (params.hArgValue)); + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_command_buffer_update_pointer_arg_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_buffer_update_pointer_arg_desc_t params) { + os << "(struct ur_exp_command_buffer_update_pointer_arg_desc_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, + (params.pNext)); + + os << ", "; + os << ".argIndex = "; + + os << (params.argIndex); + + os << ", "; + os << ".pProperties = "; + + os << (params.pProperties); + + os << ", "; + os << ".pArgValue = "; + + os << (params.pArgValue); + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_command_buffer_update_value_arg_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_buffer_update_value_arg_desc_t params) { + os << "(struct ur_exp_command_buffer_update_value_arg_desc_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, + (params.pNext)); + + os << ", "; + os << ".argIndex = "; + + os << (params.argIndex); + + os << ", "; + os << ".argSize = "; + + os << (params.argSize); + + os << ", "; + os << ".pProperties = "; + + os << (params.pProperties); + + os << ", "; + os << ".pArgValue = "; + + os << (params.pArgValue); + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_command_buffer_update_exec_info_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_buffer_update_exec_info_desc_t params) { + os << "(struct ur_exp_command_buffer_update_exec_info_desc_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, + (params.pNext)); + + os << ", "; + os << ".propName = "; + + os << (params.propName); + + os << ", "; + os << ".propSize = "; + + os << (params.propSize); + + os << ", "; + os << ".pProperties = "; + + os << (params.pProperties); + + os << ", "; + os << ".pPropValue = "; + + os << (params.pPropValue); + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_command_buffer_update_kernel_launch_desc_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_buffer_update_kernel_launch_desc_t params) { + os << "(struct ur_exp_command_buffer_update_kernel_launch_desc_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, + (params.pNext)); + + os << ", "; + os << ".numMemobjArgs = "; + + os << (params.numMemobjArgs); + + os << ", "; + os << ".numPointerArgs = "; + + os << (params.numPointerArgs); + + os << ", "; + os << ".numValueArgs = "; + + os << (params.numValueArgs); + + os << ", "; + os << ".numExecInfos = "; + + os << (params.numExecInfos); + + os << ", "; + os << ".workDim = "; + + os << (params.workDim); + + os << ", "; + os << ".pArgMemobjList = {"; + for (size_t i = 0; (params.pArgMemobjList) != NULL && i < params.numMemobjArgs; ++i) { + if (i != 0) { + os << ", "; + } + + os << ((params.pArgMemobjList))[i]; + } + os << "}"; + + os << ", "; + os << ".pArgPointerList = {"; + for (size_t i = 0; (params.pArgPointerList) != NULL && i < params.numPointerArgs; ++i) { + if (i != 0) { + os << ", "; + } + + os << ((params.pArgPointerList))[i]; + } + os << "}"; + + os << ", "; + os << ".pArgValueList = {"; + for (size_t i = 0; (params.pArgValueList) != NULL && i < params.numValueArgs; ++i) { + if (i != 0) { + os << ", "; + } + + os << ((params.pArgValueList))[i]; + } + os << "}"; + + os << ", "; + os << ".pArgExecInfoList = "; + + ur::details::printPtr(os, + (params.pArgExecInfoList)); + + os << ", "; + os << ".pGlobalWorkOffset = {"; + for (size_t i = 0; (params.pGlobalWorkOffset) != NULL && i < params.workDim; ++i) { + if (i != 0) { + os << ", "; + } + + os << ((params.pGlobalWorkOffset))[i]; + } + os << "}"; + + os << ", "; + os << ".pGlobalWorkSize = {"; + for (size_t i = 0; (params.pGlobalWorkSize) != NULL && i < params.workDim; ++i) { + if (i != 0) { + os << ", "; + } + + os << ((params.pGlobalWorkSize))[i]; + } + os << "}"; + + os << ", "; + os << ".pLocalWorkSize = {"; + for (size_t i = 0; (params.pLocalWorkSize) != NULL && i < params.workDim; ++i) { + if (i != 0) { + os << ", "; + } + + os << ((params.pLocalWorkSize))[i]; + } + os << "}"; + os << "}"; return os; } @@ -14534,6 +14890,12 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur::details::printPtr(os, *(params->ppSyncPoint)); + os << ", "; + os << ".phCommand = "; + + ur::details::printPtr(os, + *(params->pphCommand)); + return os; } @@ -15233,6 +15595,26 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct return os; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_command_buffer_update_kernel_launch_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_command_buffer_update_kernel_launch_exp_params_t *params) { + + os << ".hCommand = "; + + ur::details::printPtr(os, + *(params->phCommand)); + + os << ", "; + os << ".pUpdateKernelLaunch = "; + + ur::details::printPtr(os, + *(params->ppUpdateKernelLaunch)); + + return os; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_usm_p2p_enable_peer_access_exp_params_t type /// @returns @@ -16367,6 +16749,9 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, ur_function_ case UR_FUNCTION_COMMAND_BUFFER_ENQUEUE_EXP: { os << (const struct ur_command_buffer_enqueue_exp_params_t *)params; } break; + case UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP: { + os << (const struct ur_command_buffer_update_kernel_launch_exp_params_t *)params; + } break; case UR_FUNCTION_USM_P2P_ENABLE_PEER_ACCESS_EXP: { os << (const struct ur_usm_p2p_enable_peer_access_exp_params_t *)params; } break; diff --git a/scripts/core/EXP-COMMAND-BUFFER.rst b/scripts/core/EXP-COMMAND-BUFFER.rst index a6a32a66a1..afe081f7e2 100644 --- a/scripts/core/EXP-COMMAND-BUFFER.rst +++ b/scripts/core/EXP-COMMAND-BUFFER.rst @@ -57,24 +57,29 @@ returned list of supported extensions. ${x}DeviceGetInfo(hDevice, ${X}_DEVICE_INFO_EXTENSIONS, 0, nullptr, &returnedSize); - // Retrieve extension string + // Retrieve extension string std::unique_ptr returnedExtensions(new char[returnedSize]); - ${x}DeviceGetInfo(hDevice, ${X}_DEVICE_INFO_EXTENSIONS, returnedSize, + ${x}DeviceGetInfo(hDevice, ${X}_DEVICE_INFO_EXTENSIONS, returnedSize, returnedExtensions.get(), nullptr); - + std::string_view ExtensionsString(returnedExtensions.get()); - bool CmdBufferSupport = + bool CmdBufferSupport = ExtensionsString.find(${X}_COMMAND_BUFFER_EXTENSION_STRING_EXP) != std::string::npos; +.. note:: + The ${X}_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP device info query exists to + serve the same purpose as ${X}_COMMAND_BUFFER_EXTENSION_STRING_EXP. + Command-Buffer Creation -------------------------------------------------------------------------------- Command-Buffers are tied to a specific ${x}_context_handle_t and ${x}_device_handle_t. ${x}CommandBufferCreateExp optionally takes a descriptor to provide additional properties for how the command-buffer should be -constructed. There are currently no unique members defined for -${x}_exp_command_buffer_desc_t, however they may be added in the future. +constructed. The only unique member defined in ${x}_exp_command_buffer_desc_t +is ``isUpdatable``, which should be set to ``true`` to support :ref:`updating +command-buffer commands`. Command-buffers are reference counted and can be retained and released by calling ${x}CommandBufferRetainExp and ${x}CommandBufferReleaseExp respectively. @@ -89,6 +94,11 @@ However, they differ in that they take a command-buffer handle instead of a queue handle, and the dependencies and return parameters are sync-points instead of event handles. +The entry-point for appending a kernel launch command also returns an optional +handle to the command being appended. Returning this handle does not extend the +lifetime of the parent command-buffer, and using the handle after the +command-buffer has been destroyed is invalid behaviour. + Currently only the following commands are supported: * ${x}CommandBufferAppendKernelLaunchExp @@ -103,9 +113,9 @@ Currently only the following commands are supported: * ${x}CommandBufferAppendMemBufferFillExp * ${x}CommandBufferAppendUSMPrefetchExp * ${x}CommandBufferAppendUSMAdviseExp - + It is planned to eventually support any command type from the Core API which can -actually be appended to the equiavalent adapter native constructs. +actually be appended to the equivalent adapter native constructs. Sync-Points -------------------------------------------------------------------------------- @@ -122,15 +132,15 @@ were obtained from. // Append a memcpy with no sync-point dependencies ${x}_exp_command_buffer_sync_point_t syncPoint; - ${x}CommandBufferAppendUSMMemcpyExp(hCommandBuffer, pDst, pSrc, size, 0, + ${x}CommandBufferAppendUSMMemcpyExp(hCommandBuffer, pDst, pSrc, size, 0, nullptr, &syncPoint); - + // Append a kernel launch with syncPoint as a dependency, ignore returned // sync-point - ${x}CommandBufferAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, - pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, 1, &syncPoint, - nullptr); + ${x}CommandBufferAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, + pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize, 1, &syncPoint, + nullptr, nullptr); Enqueueing Command-Buffers -------------------------------------------------------------------------------- @@ -147,6 +157,82 @@ enqueued or executed simultaneously, and submissions may be serialized. ${x}CommandBufferEnqueueExp(hCommandBuffer, hQueue, 0, nullptr, &executionEvent); +Updating Command-Buffers Commands +-------------------------------------------------------------------------------- + +An adapter implementing the command-buffer experimental feature can optionally +support updating the configuration of kernel commands recorded to a +command-buffer. Support for this is reported by returning true in the +${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP query. + +Updating kernel commands is done by passing the new kernel configuration +to ${x}CommandBufferUpdateKernelLaunchExp along with the command handle of +the kernel command to update. Configurations that can be changed are the +parameters to the kernel and the execution ND-Range. + +.. parsed-literal:: + + // Create a command-buffer with update enabled. + ${x}_exp_command_buffer_desc_t desc { + ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, + nullptr, + true // isUpdatable + }; + ${x}_exp_command_buffer_handle_t hCommandBuffer; + ${x}CommandBufferCreateExp(hContext, hDevice, &desc, &hCommandBuffer); + + // Append a kernel command which has two buffer parameters, an input + // and an output. + ${x}_exp_command_buffer_command_handle_t hCommand; + ${x}CommandBufferAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, + pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize, 0, nullptr, + nullptr, &hCommand); + + // Close the command-buffer before updating + ${x}CommandBufferFinalizeExp(hCommandBuffer); + + // Define kernel argument at index 0 to be a new input buffer object + ${x}_exp_command_buffer_update_memobj_arg_desc_t newInputArg { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC, // stype + nullptr, // pNext, + 0, // argIndex, + nullptr, // pProperties + newInputBuffer, // hArgValue + }; + + // Define kernel argument at index 1 to be a new output buffer object + ${x}_exp_command_buffer_update_memobj_arg_desc_t newOutputArg { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC, // stype + nullptr, // pNext, + 1, // argIndex, + nullptr, // pProperties + newOutputBuffer, // hArgValue + }; + + // Define the new configuration of the kernel command + ${x}_exp_command_buffer_update_memobj_arg_desc_t updatedArgs[2] = {newInputArg, newOutputArg}; + ${x}_exp_command_buffer_update_kernel_launch_desc_t update { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 2, // numMemobjArgs + 0, // numPointerArgs + 0, // numValueArgs + 0, // numExecInfos + 0, // workDim; + new_args, // pArgMemobjList + nullptr, // pArgPointerList + nullptr, // pArgValueList + nullptr, // pArgExecInfoList + nullptr, // pGlobalWorkOffset + nullptr, // pGlobalWorkSize + nullptr, // pLocalWorkSize + }; + + // Perform the update + ${x}CommandBufferUpdateKernelLaunchExp(hCommand, &update); + + API -------------------------------------------------------------------------------- @@ -156,12 +242,21 @@ Macros Enums ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +* ${x}_device_info_t + * ${X}_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP + * ${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP * ${x}_result_t * ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP * ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP * ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP + * ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP * ${x}_structure_type_t * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC + * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC + * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC + * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC + * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC + * ${X}_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC * ${x}_command_t * ${X}_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP * ${x}_function_t @@ -182,15 +277,19 @@ Enums * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_USM_PREFETCH_EXP * ${X}_FUNCTION_COMMAND_BUFFER_APPEND_USM_ADVISE_EXP - - + * ${X}_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP Types ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * ${x}_exp_command_buffer_desc_t +* ${x}_exp_command_buffer_update_kernel_launch_desc_t +* ${x}_exp_command_buffer_update_memobj_arg_desc_t +* ${x}_exp_command_buffer_update_pointer_arg_desc_t +* ${x}_exp_command_buffer_update_value_arg_desc_t +* ${x}_exp_command_buffer_update_exec_info_desc_t * ${x}_exp_command_buffer_sync_point_t * ${x}_exp_command_buffer_handle_t - +* ${x}_exp_command_buffer_command_handle_t Functions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -211,6 +310,7 @@ Functions * ${x}CommandBufferAppendUSMPrefetchExp * ${x}CommandBufferAppendUSMAdviseExp * ${x}CommandBufferEnqueueExp +* ${x}CommandBufferUpdateKernelLaunchExp Changelog -------------------------------------------------------------------------------- @@ -227,6 +327,8 @@ Changelog | 1.3 | Add function definitions for Prefetch and Advise | | | commands | +-----------+-------------------------------------------------------+ +| 1.4 | A function definitions for kernel command update | ++-----------+-------------------------------------------------------+ Contributors -------------------------------------------------------------------------------- @@ -234,3 +336,4 @@ Contributors * Ben Tracy `ben.tracy@codeplay.com `_ * Ewan Crawford `ewan@codeplay.com `_ * Maxime France-Pillois `maxime.francepillois@codeplay.com `_ +* Aaron Greig `aaron.greig@codeplay.com `_ diff --git a/scripts/core/exp-command-buffer.yml b/scripts/core/exp-command-buffer.yml index 7d1b686aab..568b4896e0 100644 --- a/scripts/core/exp-command-buffer.yml +++ b/scripts/core/exp-command-buffer.yml @@ -11,6 +11,20 @@ type: header desc: "Intel $OneApi Unified Runtime Experimental APIs for Command-Buffers" ordinal: "99" +--- #-------------------------------------------------------------------------- +type: enum +extend: true +typed_etors: true +desc: "Extension enums to $x_device_info_t to support command-buffers." +name: $x_device_info_t +etors: + - name: COMMAND_BUFFER_SUPPORT_EXP + value: "0x1000" + desc: "[$x_bool_t] Returns true if the device supports the use of command-buffers." + - name: COMMAND_BUFFER_UPDATE_SUPPORT_EXP + value: "0x1001" + desc: "[$x_bool_t] Returns true if the device supports updating the kernel commands in a command-buffer." + --- #-------------------------------------------------------------------------- type: enum extend: true @@ -26,6 +40,9 @@ etors: - name: ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP value: "0x1002" desc: "Sync point wait list is invalid" + - name: ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP + value: "0x1003" + desc: "Handle to command-buffer command is invalid" --- #-------------------------------------------------------------------------- type: enum extend: true @@ -35,6 +52,22 @@ etors: - name: EXP_COMMAND_BUFFER_DESC desc: $x_exp_command_buffer_desc_t value: "0x1000" + - name: EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC + desc: $x_exp_command_buffer_update_kernel_launch_desc_t + value: "0x1001" + - name: EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC + desc: $x_exp_command_buffer_update_memobj_arg_desc_t + value: "0x1002" + - name: EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC + desc: $x_exp_command_buffer_update_pointer_arg_desc_t + value: "0x1003" + - name: EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC + desc: $x_exp_command_buffer_update_value_arg_desc_t + value: "0x1004" + - name: EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC + desc: $x_exp_command_buffer_update_exec_info_desc_t + value: "0x1005" + --- #-------------------------------------------------------------------------- type: enum extend: true @@ -54,7 +87,120 @@ type: struct desc: "Command-Buffer Descriptor Type" name: $x_exp_command_buffer_desc_t base: $x_base_desc_t -members: [] +members: + - type: $x_bool_t + name: isUpdatable + desc: "[in] Commands in a finalized command-buffer can be updated." +--- #-------------------------------------------------------------------------- +type: struct +desc: "Descriptor type for updating a kernel command memobj argument." +base: $x_base_desc_t +name: $x_exp_command_buffer_update_memobj_arg_desc_t +members: + - type: uint32_t + name: argIndex + desc: "[in] Argument index." + - type: "const ur_kernel_arg_mem_obj_properties_t *" + name: pProperties + desc: "[in][optinal] Pointer to memory object properties." + - type: $x_mem_handle_t + name: hArgValue + desc: "[in][optional] Handle of memory object." +--- #-------------------------------------------------------------------------- +type: struct +desc: "Descriptor type for updating a kernel command pointer argument." +base: $x_base_desc_t +name: $x_exp_command_buffer_update_pointer_arg_desc_t +members: + - type: uint32_t + name: argIndex + desc: "[in] Argument index." + - type: "const ur_kernel_arg_pointer_properties_t *" + name: pProperties + desc: "[in][optinal] Pointer to USM pointer properties." + - type: "const void *" + name: pArgValue + desc: "[in][optional] USM pointer to memory location holding the argument value." +--- #-------------------------------------------------------------------------- +type: struct +desc: "Descriptor type for updating a kernel command value argument." +base: $x_base_desc_t +name: $x_exp_command_buffer_update_value_arg_desc_t +members: + - type: uint32_t + name: argIndex + desc: "[in] Argument index." + - type: uint32_t + name: argSize + desc: "[in] Argument size." + - type: "const ur_kernel_arg_value_properties_t *" + name: pProperties + desc: "[in][optinal] Pointer to value properties." + - type: "const void *" + name: pArgValue + desc: "[in][optional] Argument value representing kernel arg type." + +--- #-------------------------------------------------------------------------- +type: struct +desc: "Descriptor type for updating kernel command execution info." +base: $x_base_desc_t +name: $x_exp_command_buffer_update_exec_info_desc_t +members: + - type: ur_kernel_exec_info_t + name: propName + desc: "[in] Name of execution attribute." + - type: size_t + name: propSize + desc: "[in] Size of execution attribute." + - type: "const ur_kernel_exec_info_properties_t *" + name: pProperties + desc: "[in][optional] Pointer to execution info properties." + - type: "const void *" + name: pPropValue + desc: "[in] Pointer to memory location holding the property value." + +--- #-------------------------------------------------------------------------- +type: struct +desc: "Descriptor type for updating a kernel launch command." +base: $x_base_desc_t +name: $x_exp_command_buffer_update_kernel_launch_desc_t +members: + - type: uint32_t + name: numMemobjArgs + desc: "[in] Length of pArgMemobjList." + - type: uint32_t + name: numPointerArgs + desc: "[in] Length of pArgPointerList." + - type: uint32_t + name: numValueArgs + desc: "[in] Length of pArgValueList." + - type: uint32_t + name: numExecInfos + desc: "[in] Length of pExecInfoList." + - type: uint32_t + name: workDim + desc: "[in] Number of work dimensions in the kernel ND-range, from 1-3." + - type: "const $x_exp_command_buffer_update_memobj_arg_desc_t*" + name: pArgMemobjList + desc: "[in][optional][range(0, numMemobjArgs)] An array describing the new kernel mem obj arguments for the command." + - type: "const $x_exp_command_buffer_update_pointer_arg_desc_t*" + name: pArgPointerList + desc: "[in][optional][range(0, numPointerArgs)] An array describing the new kernel pointer arguments for the command." + - type: "const $x_exp_command_buffer_update_value_arg_desc_t*" + name: pArgValueList + desc: "[in][optional][range(0, numValueArgs)]An array describing the new kernel value arguments for the command." + - type: "const $x_exp_command_buffer_update_exec_info_desc_t*" + name: pArgExecInfoList + desc: "[in][optional] An array describing the execution info objects for the command." + - type: "size_t*" + name: pGlobalWorkOffset + desc: "[in][optional][range(0, workDim)] Array of workDim unsigned values that describe the offset used to calculate the global ID." + - type: "size_t*" + name: pGlobalWorkSize + desc: "[in][optional][range(0, workDim)] Array of workDim unsigned values that describe the number of global work-items." + - type: "size_t*" + name: pLocalWorkSize + desc: "[in][optional][range(0, workDim)] Array of workDim unsigned values that describe the number of work-items that make up a work-group. If nullptr, the runtime implementation will choose the work-group size." --- #-------------------------------------------------------------------------- type: typedef desc: "A value that identifies a command inside of a command-buffer, used for defining dependencies between commands in the same command-buffer." @@ -67,26 +213,31 @@ desc: "Handle of Command-Buffer object" class: $xCommandBuffer name: "$x_exp_command_buffer_handle_t" --- #-------------------------------------------------------------------------- +type: handle +desc: "Handle of a Command-Buffer command" +class: $xCommandBuffer +name: "$x_exp_command_buffer_command_handle_t" +--- #-------------------------------------------------------------------------- type: function desc: "Create a Command-Buffer object" class: $xCommandBuffer name: CreateExp decl: static details: - - "Create a command-buffer object" + - "Create a command-buffer object." params: - type: $x_context_handle_t name: hContext - desc: "[in] handle of the context object" + desc: "[in] Handle of the context object." - type: $x_device_handle_t name: hDevice - desc: "[in] handle of the device object" + desc: "[in] Handle of the device object." - type: "const $x_exp_command_buffer_desc_t*" name: pCommandBufferDesc - desc: "[in][optional] CommandBuffer descriptor" + desc: "[in][optional] command-buffer descriptor." - type: "$x_exp_command_buffer_handle_t*" name: phCommandBuffer - desc: "[out] pointer to Command-Buffer handle" + desc: "[out] Pointer to command-Buffer handle." returns: - $X_RESULT_ERROR_INVALID_CONTEXT - $X_RESULT_ERROR_INVALID_DEVICE @@ -100,7 +251,7 @@ name: RetainExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object" + desc: "[in] Handle of the command-buffer object." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_OUT_OF_RESOURCES @@ -113,7 +264,7 @@ name: ReleaseExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object" + desc: "[in] Handle of the command-buffer object." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_OUT_OF_RESOURCES @@ -126,26 +277,26 @@ name: FinalizeExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object" + desc: "[in] Handle of the command-buffer object." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a kernel execution command to a command-buffer object" +desc: "Append a kernel execution command to a command-buffer object." class: $xCommandBuffer name: AppendKernelLaunchExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object" + desc: "[in] Handle of the command-buffer object." - type: $x_kernel_handle_t name: hKernel - desc: "[in] kernel to append" + desc: "[in] Kernel to append." - type: uint32_t name: workDim - desc: "[in] dimension of the kernel execution" + desc: "[in] Dimension of the kernel execution." - type: "const size_t*" name: pGlobalWorkOffset desc: "[in] Offset to use when executing kernel." @@ -163,7 +314,11 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." + - type: "$x_exp_command_buffer_command_handle_t*" + name: phCommand + desc: "[out][optional] Handle to this command." + returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_KERNEL @@ -178,13 +333,13 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a USM memcpy command to a command-buffer object" +desc: "Append a USM memcpy command to a command-buffer object." class: $xCommandBuffer name: AppendUSMMemcpyExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: "void*" name: pDst desc: "[in] Location the data will be copied to." @@ -193,7 +348,7 @@ params: desc: "[in] The data to be copied." - type: "size_t" name: size - desc: "[in] The number of bytes to copy" + desc: "[in] The number of bytes to copy." - type: uint32_t name: numSyncPointsInWaitList desc: "[in] The number of sync points in the provided dependency list." @@ -202,7 +357,8 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." + returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_SIZE: @@ -217,7 +373,7 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a USM fill command to a command-buffer object" +desc: "Append a USM fill command to a command-buffer object." class: $xCommandBuffer name: AppendUSMFillExp params: @@ -262,13 +418,13 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a memory copy command to a command-buffer object" +desc: "Append a memory copy command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferCopyExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_mem_handle_t name: hSrcMem desc: "[in] The data to be copied." @@ -292,7 +448,7 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -304,25 +460,25 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a memory write command to a command-buffer object" +desc: "Append a memory write command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferWriteExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_mem_handle_t name: hBuffer - desc: "[in] handle of the buffer object." + desc: "[in] Handle of the buffer object." - type: "size_t" name: offset - desc: "[in] offset in bytes in the buffer object." + desc: "[in] Offset in bytes in the buffer object." - type: "size_t" name: size - desc: "[in] size in bytes of data being written." + desc: "[in] Size in bytes of data being written." - type: "const void*" name: pSrc - desc: "[in] pointer to host memory where data is to be written from." + desc: "[in] Pointer to host memory where data is to be written from." - type: uint32_t name: numSyncPointsInWaitList desc: "[in] The number of sync points in the provided dependency list." @@ -331,7 +487,7 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -343,25 +499,25 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a memory read command to a command-buffer object" +desc: "Append a memory read command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferReadExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_mem_handle_t name: hBuffer - desc: "[in] handle of the buffer object." + desc: "[in] Handle of the buffer object." - type: "size_t" name: offset - desc: "[in] offset in bytes in the buffer object." + desc: "[in] Offset in bytes in the buffer object." - type: "size_t" name: size - desc: "[in] size in bytes of data being written." + desc: "[in] Size in bytes of data being written." - type: "void*" name: pDst - desc: "[in] pointer to host memory where data is to be written to." + desc: "[in] Pointer to host memory where data is to be written to." - type: uint32_t name: numSyncPointsInWaitList desc: "[in] The number of sync points in the provided dependency list." @@ -370,7 +526,7 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -382,13 +538,13 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a rectangular memory copy command to a command-buffer object" +desc: "Append a rectangular memory copy command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferCopyRectExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_mem_handle_t name: hSrcMem desc: "[in] The data to be copied." @@ -424,7 +580,7 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: $x_exp_command_buffer_sync_point_t* name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -436,16 +592,16 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a rectangular memory write command to a command-buffer object" +desc: "Append a rectangular memory write command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferWriteRectExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_mem_handle_t name: hBuffer - desc: "[in] handle of the buffer object." + desc: "[in] Handle of the buffer object." - type: $x_rect_offset_t name: bufferOffset desc: "[in] 3D offset in the buffer." @@ -457,19 +613,19 @@ params: desc: "[in] 3D rectangular region descriptor: width, height, depth." - type: "size_t" name: bufferRowPitch - desc: "[in] length of each row in bytes in the buffer object." + desc: "[in] Length of each row in bytes in the buffer object." - type: "size_t" name: bufferSlicePitch - desc: "[in] length of each 2D slice in bytes in the buffer object being written." + desc: "[in] Length of each 2D slice in bytes in the buffer object being written." - type: "size_t" name: hostRowPitch - desc: "[in] length of each row in bytes in the host memory region pointed to by pSrc." + desc: "[in] Length of each row in bytes in the host memory region pointed to by pSrc." - type: "size_t" name: hostSlicePitch - desc: "[in] length of each 2D slice in bytes in the host memory region pointed to by pSrc." + desc: "[in] Length of each 2D slice in bytes in the host memory region pointed to by pSrc." - type: "void*" name: pSrc - desc: "[in] pointer to host memory where data is to be written from." + desc: "[in] Pointer to host memory where data is to be written from." - type: uint32_t name: numSyncPointsInWaitList desc: "[in] The number of sync points in the provided dependency list." @@ -478,7 +634,7 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: $x_exp_command_buffer_sync_point_t* name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -490,16 +646,16 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a rectangular memory read command to a command-buffer object" +desc: "Append a rectangular memory read command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferReadRectExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_mem_handle_t name: hBuffer - desc: "[in] handle of the buffer object." + desc: "[in] Handle of the buffer object." - type: $x_rect_offset_t name: bufferOffset desc: "[in] 3D offset in the buffer." @@ -511,19 +667,19 @@ params: desc: "[in] 3D rectangular region descriptor: width, height, depth." - type: "size_t" name: bufferRowPitch - desc: "[in] length of each row in bytes in the buffer object." + desc: "[in] Length of each row in bytes in the buffer object." - type: "size_t" name: bufferSlicePitch - desc: "[in] length of each 2D slice in bytes in the buffer object being read." + desc: "[in] Length of each 2D slice in bytes in the buffer object being read." - type: "size_t" name: hostRowPitch - desc: "[in] length of each row in bytes in the host memory region pointed to by pDst." + desc: "[in] Length of each row in bytes in the host memory region pointed to by pDst." - type: "size_t" name: hostSlicePitch - desc: "[in] length of each 2D slice in bytes in the host memory region pointed to by pDst." + desc: "[in] Length of each 2D slice in bytes in the host memory region pointed to by pDst." - type: "void*" name: pDst - desc: "[in] pointer to host memory where data is to be read into." + desc: "[in] Pointer to host memory where data is to be read into." - type: uint32_t name: numSyncPointsInWaitList desc: "[in] The number of sync points in the provided dependency list." @@ -532,7 +688,7 @@ params: desc: "[in][optional] A list of sync points that this command depends on." - type: $x_exp_command_buffer_sync_point_t* name: pSyncPoint - desc: "[out][optional] sync point associated with this command" + desc: "[out][optional] Sync point associated with this command." returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP @@ -544,7 +700,7 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a memory fill command to a command-buffer object" +desc: "Append a memory fill command to a command-buffer object." class: $xCommandBuffer name: AppendMemBufferFillExp params: @@ -588,12 +744,12 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a USM Prefetch command to a command-buffer object" +desc: "Append a USM Prefetch command to a command-buffer object." class: $xCommandBuffer name: AppendUSMPrefetchExp details: - - "Prefetching may not be supported for all devices or allocation types. If memory prefetching - is not supported, the prefetch hint will be ignored." + - "Prefetching may not be supported for all devices or allocation types. If + memory prefetching is not supported, the prefetch hint will be ignored." params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer @@ -630,12 +786,13 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- type: function -desc: "Append a USM Advise command to a command-buffer object" +desc: "Append a USM Advise command to a command-buffer object." class: $xCommandBuffer name: AppendUSMAdviseExp details: - - "Not all memory advice hints may be supported for all devices or allocation types. - If a memory advice hint is not supported, it will be ignored." + - "Not all memory advice hints may be supported for all devices or + allocation types. If a memory advice hint is not supported, it will be + ignored." params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer @@ -678,18 +835,18 @@ name: EnqueueExp params: - type: $x_exp_command_buffer_handle_t name: hCommandBuffer - desc: "[in] handle of the command-buffer object." + desc: "[in] Handle of the command-buffer object." - type: $x_queue_handle_t name: hQueue - desc: "[in] the queue to submit this command-buffer for execution." + desc: "[in] The queue to submit this command-buffer for execution." - type: uint32_t name: numEventsInWaitList - desc: "[in] size of the event wait list" + desc: "[in] Size of the event wait list." - type: "const $x_event_handle_t*" name: phEventWaitList desc: | [in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the command-buffer execution. - If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + If nullptr, the numEventsInWaitList must be 0, indicating no wait events. - type: $x_event_handle_t* name: phEvent desc: | @@ -704,3 +861,31 @@ returns: - "If event objects in phEventWaitList are not valid events." - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES +--- #-------------------------------------------------------------------------- +type: function +desc: "Update a kernel launch command." +class: $xCommandBuffer +name: UpdateKernelLaunchExp +params: + - type: $x_exp_command_buffer_command_handle_t + name: hCommand + desc: "[in] Handle of the command-buffer kernel command to update." + - type: "const $x_exp_command_buffer_update_kernel_launch_desc_t*" + name: pUpdateKernelLaunch + desc: "[in] Struct defining how the kernel command is to be updated." + +returns: + - $X_RESULT_ERROR_UNSUPPORTED_FEATURE: + - "If update functionality is not supported by the device." + - $X_RESULT_ERROR_INVALID_OPERATION: + - "If $x_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to." + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP + - $X_RESULT_ERROR_INVALID_MEM_OBJECT + - $X_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX + - $X_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE + - $X_RESULT_ERROR_INVALID_ENUMERATION + - $X_RESULT_ERROR_INVALID_WORK_DIMENSION + - $X_RESULT_ERROR_INVALID_WORK_GROUP_SIZE + - $X_RESULT_ERROR_INVALID_VALUE + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY + - $X_RESULT_ERROR_OUT_OF_RESOURCES diff --git a/scripts/core/registry.yml b/scripts/core/registry.yml index 6195cd4980..d0dece9e2d 100644 --- a/scripts/core/registry.yml +++ b/scripts/core/registry.yml @@ -502,6 +502,9 @@ etors: - name: ADAPTER_GET_INFO desc: Enumerator for $xAdapterGetInfo value: '181' +- name: COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP + desc: Enumerator for $xCommandBufferUpdateKernelLaunchExp + value: '182' - name: PROGRAM_BUILD_EXP desc: Enumerator for $xProgramBuildExp value: '197' diff --git a/scripts/parse_specs.py b/scripts/parse_specs.py index 332af88cc7..493b6ddc88 100644 --- a/scripts/parse_specs.py +++ b/scripts/parse_specs.py @@ -286,8 +286,8 @@ def __validate_members(d, tags): if not annotation: raise Exception(prefix+"'desc' must start with {'[in]', '[out]', '[in,out]'}") - if type_traits.is_handle(item['type']): - raise Exception(prefix+"'type' must not be '*_handle_t': %s"%item['type']) + #if type_traits.is_handle(item['type']): + # raise Exception(prefix+"'type' must not be '*_handle_t': %s"%item['type']) if item['type'].endswith("flag_t"): raise Exception(prefix+"'type' must not be '*_flag_t': %s"%item['type']) diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index a65530a1f1..9e29dd7f5a 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -250,7 +250,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + ur_exp_command_buffer_sync_point_t *pSyncPoint, + ur_exp_command_buffer_command_handle_t *phCommand) { // Preconditions UR_ASSERT(hCommandBuffer->Context == hKernel->getContext(), UR_RESULT_ERROR_INVALID_KERNEL); @@ -324,8 +325,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( hKernel->clearLocalSize(); // Get sync point and register the cuNode with it. - *pSyncPoint = - hCommandBuffer->AddSyncPoint(std::make_shared(GraphNode)); + auto NodeSP = std::make_shared(GraphNode); + if (pSyncPoint) { + *pSyncPoint = hCommandBuffer->AddSyncPoint(NodeSP); + } + + *phCommand = hCommandBuffer + ->AddCommandHandle(hKernel, NodeSP, NodeParams, workDim, + pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize) + .get(); } catch (ur_result_t Err) { Result = Err; } @@ -762,3 +771,127 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( return Result; } + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t hCommand, + const ur_exp_command_buffer_update_kernel_launch_desc_t *pKernelLaunch) { + // Kernel corresponding to the command to update + ur_kernel_handle_t Kernel = hCommand->Kernel; + + // Update pointer arguments to the kernel + uint32_t NumPointerArgs = pKernelLaunch->numPointerArgs; + const ur_exp_command_buffer_update_pointer_arg_desc_t *ArgPointerList = + pKernelLaunch->pArgPointerList; + for (uint32_t i = 0; i < NumPointerArgs; i++) { + const auto &PointerArgDesc = ArgPointerList[i]; + uint32_t ArgIndex = PointerArgDesc.argIndex; + const void *ArgValue = PointerArgDesc.pArgValue; + + ur_result_t Result = UR_RESULT_SUCCESS; + try { + Kernel->setKernelArg(ArgIndex, sizeof(ArgValue), ArgValue); + } catch (ur_result_t Err) { + Result = Err; + return Result; + } + } + + // Update memobj arguments to the kernel + uint32_t NumMemobjArgs = pKernelLaunch->numMemobjArgs; + const ur_exp_command_buffer_update_memobj_arg_desc_t *ArgMemobjList = + pKernelLaunch->pArgMemobjList; + for (uint32_t i = 0; i < NumMemobjArgs; i++) { + const auto &MemobjArgDesc = ArgMemobjList[i]; + uint32_t ArgIndex = MemobjArgDesc.argIndex; + ur_mem_handle_t ArgValue = MemobjArgDesc.hArgValue; + + ur_result_t Result = UR_RESULT_SUCCESS; + try { + if (ArgValue == nullptr) { + Kernel->setKernelArg(ArgIndex, 0, nullptr); + } else { + CUdeviceptr CuPtr = std::get(ArgValue->Mem).get(); + Kernel->setKernelArg(ArgIndex, sizeof(CUdeviceptr), (void *)&CuPtr); + } + } catch (ur_result_t Err) { + Result = Err; + return Result; + } + } + + // Update value arguments to the kernel + uint32_t NumValueArgs = pKernelLaunch->numValueArgs; + const ur_exp_command_buffer_update_value_arg_desc_t *ArgValueList = + pKernelLaunch->pArgValueList; + for (uint32_t i = 0; i < NumValueArgs; i++) { + const auto &ValueArgDesc = ArgValueList[i]; + uint32_t ArgIndex = ValueArgDesc.argIndex; + size_t ArgSize = ValueArgDesc.argSize; + const void *ArgValue = ValueArgDesc.pArgValue; + + ur_result_t Result = UR_RESULT_SUCCESS; + + try { + Kernel->setKernelArg(ArgIndex, ArgSize, ArgValue); + } catch (ur_result_t Err) { + Result = Err; + return Result; + } + } + + // Set the updated ND range + const uint32_t NewWorkDim = pKernelLaunch->workDim; + if (NewWorkDim != 0) { + UR_ASSERT(NewWorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + UR_ASSERT(NewWorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + hCommand->WorkDim = NewWorkDim; + } + + if (pKernelLaunch->pGlobalWorkOffset) { + hCommand->SetGlobalOffset(pKernelLaunch->pGlobalWorkOffset); + } + + if (pKernelLaunch->pGlobalWorkSize) { + hCommand->SetGlobalSize(pKernelLaunch->pGlobalWorkSize); + } + + if (pKernelLaunch->pLocalWorkSize) { + hCommand->SetLocalSize(pKernelLaunch->pLocalWorkSize); + } + + size_t *GlobalWorkOffset = hCommand->GlobalWorkOffset; + size_t *GlobalWorkSize = hCommand->GlobalWorkSize; + size_t *LocalWorkSize = hCommand->LocalWorkSize; + uint32_t WorkDim = hCommand->WorkDim; + + // Set the number of threads per block to the number of threads per warp + // by default unless user has provided a better number + size_t ThreadsPerBlock[3] = {32u, 1u, 1u}; + size_t BlocksPerGrid[3] = {1u, 1u, 1u}; + CUfunction CuFunc = Kernel->get(); + ur_context_handle_t Context = hCommand->CommandBuffer->Context; + ur_device_handle_t Device = hCommand->CommandBuffer->Device; + auto Result = setKernelParams(Context, Device, WorkDim, GlobalWorkOffset, + GlobalWorkSize, LocalWorkSize, Kernel, CuFunc, + ThreadsPerBlock, BlocksPerGrid); + if (Result != UR_RESULT_SUCCESS) { + return Result; + } + + CUDA_KERNEL_NODE_PARAMS &Params = hCommand->Params; + + Params.func = CuFunc; + Params.gridDimX = BlocksPerGrid[0]; + Params.gridDimY = BlocksPerGrid[1]; + Params.gridDimZ = BlocksPerGrid[2]; + Params.blockDimX = ThreadsPerBlock[0]; + Params.blockDimY = ThreadsPerBlock[1]; + Params.blockDimZ = ThreadsPerBlock[2]; + Params.sharedMemBytes = Kernel->getLocalSize(); + Params.kernelParams = const_cast(Kernel->getArgIndices().data()); + + CUgraphNode Node = *(hCommand->Node); + CUgraphExec CudaGraphExec = hCommand->CommandBuffer->CudaGraphExec; + UR_CHECK_ERROR(cuGraphExecKernelNodeSetParams(CudaGraphExec, Node, &Params)); + return UR_RESULT_SUCCESS; +} diff --git a/source/adapters/cuda/command_buffer.hpp b/source/adapters/cuda/command_buffer.hpp index 18264410c4..74825870ba 100644 --- a/source/adapters/cuda/command_buffer.hpp +++ b/source/adapters/cuda/command_buffer.hpp @@ -175,6 +175,70 @@ static inline const char *getUrResultString(ur_result_t Result) { fprintf(stderr, "UR <--- %s(%s)\n", #Call, getUrResultString(Result)); \ } +// Handle to a kernel command. +// +// Struct that stores all the information related to a kernel command in a +// command-buffer, such that the command can be recreated. When handles can +// be returned from other command types this struct will need refactored. +struct ur_exp_command_buffer_command_handle_t_ { + ur_exp_command_buffer_command_handle_t_( + ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel, + std::shared_ptr Node, CUDA_KERNEL_NODE_PARAMS Params, + uint32_t WorkDim, const size_t *pGlobalWorkOffset, + const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize) + : CommandBuffer(CommandBuffer), Kernel(Kernel), Node(Node), + Params(Params), WorkDim(WorkDim) { + const size_t CopySize = sizeof(size_t) * WorkDim; + std::memcpy(GlobalWorkOffset, pGlobalWorkOffset, CopySize); + std::memcpy(GlobalWorkSize, pGlobalWorkSize, CopySize); + std::memcpy(LocalWorkSize, pLocalWorkSize, CopySize); + + if (WorkDim < 3) { + const size_t ZeroSize = sizeof(size_t) * (3 - WorkDim); + std::memset(GlobalWorkOffset + WorkDim, 0, ZeroSize); + std::memset(GlobalWorkSize + WorkDim, 0, ZeroSize); + std::memset(LocalWorkSize + WorkDim, 0, ZeroSize); + } + } + + void SetGlobalOffset(const size_t *pGlobalWorkOffset) { + const size_t CopySize = sizeof(size_t) * WorkDim; + std::memcpy(GlobalWorkOffset, pGlobalWorkOffset, CopySize); + if (WorkDim < 3) { + const size_t ZeroSize = sizeof(size_t) * (3 - WorkDim); + std::memset(GlobalWorkOffset + WorkDim, 0, ZeroSize); + } + } + + void SetGlobalSize(const size_t *pGlobalWorkSize) { + const size_t CopySize = sizeof(size_t) * WorkDim; + std::memcpy(GlobalWorkSize, pGlobalWorkSize, CopySize); + if (WorkDim < 3) { + const size_t ZeroSize = sizeof(size_t) * (3 - WorkDim); + std::memset(GlobalWorkSize + WorkDim, 0, ZeroSize); + } + } + + void SetLocalSize(const size_t *pLocalWorkSize) { + const size_t CopySize = sizeof(size_t) * WorkDim; + std::memcpy(LocalWorkSize, pLocalWorkSize, CopySize); + if (WorkDim < 3) { + const size_t ZeroSize = sizeof(size_t) * (3 - WorkDim); + std::memset(LocalWorkSize + WorkDim, 0, ZeroSize); + } + } + + ur_exp_command_buffer_handle_t CommandBuffer; + ur_kernel_handle_t Kernel; + std::shared_ptr Node; + CUDA_KERNEL_NODE_PARAMS Params; + + uint32_t WorkDim; + size_t GlobalWorkOffset[3]; + size_t GlobalWorkSize[3]; + size_t LocalWorkSize[3]; +}; + struct ur_exp_command_buffer_handle_t_ { ur_exp_command_buffer_handle_t_(ur_context_handle_t hContext, @@ -202,6 +266,27 @@ struct ur_exp_command_buffer_handle_t_ { return SyncPoint; } + // Creates a UR command handle + // @param[in] Kernel UR kernel associated with this command. + // @param[in] Node CUDA Graph node associated with this command. + // @param[in] Params CUDA Kernel configuration associated with this node. + // @param[in] WorkDim Dimensions of the kernel execution. + // @param[in] GlobalWorkOffset Work item offset of the kernel execution. + // @param[in] GlobalWorkSize Global work size of the kernel execution. + // @param[in] LocalWorkSize Local work size of the kernel execution. + // @return Shared pointer to the created handle. + std::shared_ptr + AddCommandHandle(ur_kernel_handle_t Kernel, std::shared_ptr Node, + const CUDA_KERNEL_NODE_PARAMS &Params, uint32_t WorkDim, + const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize, + const size_t *LocalWorkSize) { + + Handles.push_back(std::make_shared( + this, Kernel, Node, Params, WorkDim, GlobalWorkOffset, GlobalWorkSize, + LocalWorkSize)); + return Handles.back(); + } + // UR context associated with this command-buffer ur_context_handle_t Context; // Device associated with this command buffer @@ -222,6 +307,9 @@ struct ur_exp_command_buffer_handle_t_ { // is not enough) ur_exp_command_buffer_sync_point_t NextSyncPoint; + // List of command handles returned to the user. + std::vector> Handles; + // Used when retaining an object. uint32_t incrementReferenceCount() noexcept { return ++RefCount; } // Used when releasing an object. diff --git a/source/adapters/cuda/device.cpp b/source/adapters/cuda/device.cpp index 0723cfe4e7..dc83d8741d 100644 --- a/source/adapters/cuda/device.cpp +++ b/source/adapters/cuda/device.cpp @@ -1031,6 +1031,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: + return ReturnValue(true); + default: break; } diff --git a/source/adapters/cuda/ur_interface_loader.cpp b/source/adapters/cuda/ur_interface_loader.cpp index f31ffe6d87..6a9d17c102 100644 --- a/source/adapters/cuda/ur_interface_loader.cpp +++ b/source/adapters/cuda/ur_interface_loader.cpp @@ -294,6 +294,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnAppendMemBufferFillExp = urCommandBufferAppendMemBufferFillExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; + pDdiTable->pfnUpdateKernelLaunchExp = urCommandBufferUpdateKernelLaunchExp; return retVal; } diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index 54a6fa2f4e..20a7f508fe 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -46,7 +46,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t, ur_kernel_handle_t, uint32_t, const size_t *, const size_t *, const size_t *, uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *) { + ur_exp_command_buffer_sync_point_t *, + ur_exp_command_buffer_command_handle_t *) { detail::ur::die("Experimental Command-buffer feature is not " "implemented for HIP adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; @@ -162,3 +163,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( "implemented for HIP adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t, + const ur_exp_command_buffer_update_kernel_launch_desc_t *) { + detail::ur::die("Experimental Command-buffer feature is not " + "implemented for the HIP adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/hip/device.cpp b/source/adapters/hip/device.cpp index e40470f9aa..21e88c4d6a 100644 --- a/source/adapters/hip/device.cpp +++ b/source/adapters/hip/device.cpp @@ -837,6 +837,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_ASYNC_BARRIER: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: + return ReturnValue(false); + default: break; } diff --git a/source/adapters/hip/ur_interface_loader.cpp b/source/adapters/hip/ur_interface_loader.cpp index 7707e78425..550818adb4 100644 --- a/source/adapters/hip/ur_interface_loader.cpp +++ b/source/adapters/hip/ur_interface_loader.cpp @@ -291,6 +291,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; pDdiTable->pfnAppendMemBufferFillExp = urCommandBufferAppendMemBufferFillExp; + pDdiTable->pfnUpdateKernelLaunchExp = urCommandBufferUpdateKernelLaunchExp; return retVal; } diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index bbe49cb705..d1a7970bf5 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -509,7 +509,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const size_t *GlobalWorkSize, const size_t *LocalWorkSize, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, - ur_exp_command_buffer_sync_point_t *SyncPoint) { + ur_exp_command_buffer_sync_point_t *SyncPoint, + ur_exp_command_buffer_command_handle_t *) { // Lock automatically releases when this goes out of scope. std::scoped_lock Lock( Kernel->Mutex, Kernel->Program->Mutex); @@ -986,3 +987,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( return UR_RESULT_SUCCESS; } + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t, + const ur_exp_command_buffer_update_kernel_launch_desc_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index abdfd2e541..6c4bdb1045 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -823,6 +823,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( return ReturnValue(result); } + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: + return ReturnValue(true); + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: + return ReturnValue(false); + default: urPrint("Unsupported ParamName in urGetDeviceInfo\n"); urPrint("ParamName=%d(0x%x)\n", ParamName, ParamName); diff --git a/source/adapters/level_zero/ur_interface_loader.cpp b/source/adapters/level_zero/ur_interface_loader.cpp index 74d0706b31..e6c5ef72eb 100644 --- a/source/adapters/level_zero/ur_interface_loader.cpp +++ b/source/adapters/level_zero/ur_interface_loader.cpp @@ -341,6 +341,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnAppendMemBufferFillExp = urCommandBufferAppendMemBufferFillExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; + pDdiTable->pfnUpdateKernelLaunchExp = urCommandBufferUpdateKernelLaunchExp; return retVal; } diff --git a/source/adapters/native_cpu/command_buffer.cpp b/source/adapters/native_cpu/command_buffer.cpp index f13a57f392..32d4a1dc49 100644 --- a/source/adapters/native_cpu/command_buffer.cpp +++ b/source/adapters/native_cpu/command_buffer.cpp @@ -50,7 +50,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t, ur_kernel_handle_t, uint32_t, const size_t *, const size_t *, const size_t *, uint32_t, const ur_exp_command_buffer_sync_point_t *, - ur_exp_command_buffer_sync_point_t *) { + ur_exp_command_buffer_sync_point_t *, + ur_exp_command_buffer_command_handle_t *) { detail::ur::die("Experimental Command-buffer feature is not " "implemented for the NativeCPU adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; @@ -133,3 +134,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( "implemented for the NativeCPU adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t, + const ur_exp_command_buffer_update_kernel_launch_desc_t *) { + detail::ur::die("Experimental Command-buffer feature is not " + "implemented for the NativeCPU adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/native_cpu/device.cpp b/source/adapters/native_cpu/device.cpp index 3432ce780e..3a67b30c08 100644 --- a/source/adapters/native_cpu/device.cpp +++ b/source/adapters/native_cpu/device.cpp @@ -304,6 +304,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, CASE_UR_UNSUPPORTED(UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH); case UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT: return ReturnValue(false); + + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: + return ReturnValue(false); + default: DIE_NO_IMPLEMENTATION; } diff --git a/source/adapters/native_cpu/ur_interface_loader.cpp b/source/adapters/native_cpu/ur_interface_loader.cpp index 9408101927..d884539d3d 100644 --- a/source/adapters/native_cpu/ur_interface_loader.cpp +++ b/source/adapters/native_cpu/ur_interface_loader.cpp @@ -283,6 +283,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendMemBufferWriteRectExp = urCommandBufferAppendMemBufferWriteRectExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; + pDdiTable->pfnUpdateKernelLaunchExp = urCommandBufferUpdateKernelLaunchExp; return retVal; } diff --git a/source/adapters/null/ur_nullddi.cpp b/source/adapters/null/ur_nullddi.cpp index f016830d11..60988eb22c 100644 --- a/source/adapters/null/ur_nullddi.cpp +++ b/source/adapters/null/ur_nullddi.cpp @@ -4441,12 +4441,12 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferCreateExp __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. const ur_exp_command_buffer_desc_t - *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor + *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. ur_exp_command_buffer_handle_t - *phCommandBuffer ///< [out] pointer to Command-Buffer handle + *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4470,7 +4470,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( /// @brief Intercept function for urCommandBufferRetainExp __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4491,7 +4491,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( /// @brief Intercept function for urCommandBufferReleaseExp __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4512,7 +4512,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( /// @brief Intercept function for urCommandBufferFinalizeExp __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4533,9 +4533,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// @brief Intercept function for urCommandBufferAppendKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * @@ -4546,8 +4546,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4558,9 +4560,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( result = pfnAppendKernelLaunchExp( hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint); + pSyncPointWaitList, pSyncPoint, phCommand); } else { // generic implementation + if (nullptr != phCommand) { + *phCommand = + reinterpret_cast( + d_context.get()); + } } return result; @@ -4572,16 +4579,16 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// @brief Intercept function for urCommandBufferAppendUSMMemcpyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4640,7 +4647,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -4650,8 +4657,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4675,18 +4682,18 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. const void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4710,17 +4717,17 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4744,7 +4751,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t @@ -4761,8 +4768,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4787,31 +4794,31 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4836,29 +4843,29 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { ur_result_t result = UR_RESULT_SUCCESS; @@ -4985,15 +4992,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( /// @brief Intercept function for urCommandBufferEnqueueExp __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_queue_handle_t - hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. @@ -5017,6 +5023,30 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferUpdateKernelLaunchExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t * + pUpdateKernelLaunch ///< [in] Struct defining how the kernel command is to be updated. + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnUpdateKernelLaunchExp = + d_context.urDdiTable.CommandBufferExp.pfnUpdateKernelLaunchExp; + if (nullptr != pfnUpdateKernelLaunchExp) { + result = pfnUpdateKernelLaunchExp(hCommand, pUpdateKernelLaunch); + } else { + // generic implementation + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( @@ -5481,6 +5511,9 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnEnqueueExp = driver::urCommandBufferEnqueueExp; + pDdiTable->pfnUpdateKernelLaunchExp = + driver::urCommandBufferUpdateKernelLaunchExp; + return result; } catch (...) { return exceptionToResult(std::current_exception()); diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 74cdd8a03d..772dc8a4d3 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -104,7 +104,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint) { + ur_exp_command_buffer_sync_point_t *pSyncPoint, + ur_exp_command_buffer_command_handle_t *) { cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); cl_ext::clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR = nullptr; @@ -356,3 +357,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( return UR_RESULT_SUCCESS; } + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + [[maybe_unused]] ur_exp_command_buffer_command_handle_t hCommand, + [[maybe_unused]] const ur_exp_command_buffer_update_kernel_launch_desc_t + *pUpdateKernelLaunch) { + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index 27577eab39..c437dbc5c8 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -947,6 +947,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_ASYNC_BARRIER: { return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } + + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: + return ReturnValue(true); + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: + return ReturnValue(false); + default: { return UR_RESULT_ERROR_INVALID_ENUMERATION; } diff --git a/source/adapters/opencl/ur_interface_loader.cpp b/source/adapters/opencl/ur_interface_loader.cpp index ac2c33475b..2ca3d561b6 100644 --- a/source/adapters/opencl/ur_interface_loader.cpp +++ b/source/adapters/opencl/ur_interface_loader.cpp @@ -301,6 +301,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnAppendMemBufferFillExp = urCommandBufferAppendMemBufferFillExp; pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; + pDdiTable->pfnUpdateKernelLaunchExp = urCommandBufferUpdateKernelLaunchExp; return retVal; } diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index 402b64d638..b3e7f10f37 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -5048,12 +5048,12 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferCreateExp __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. const ur_exp_command_buffer_desc_t - *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor + *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. ur_exp_command_buffer_handle_t - *phCommandBuffer ///< [out] pointer to Command-Buffer handle + *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ) { auto pfnCreateExp = context.urDdiTable.CommandBufferExp.pfnCreateExp; @@ -5080,7 +5080,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( /// @brief Intercept function for urCommandBufferRetainExp __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { auto pfnRetainExp = context.urDdiTable.CommandBufferExp.pfnRetainExp; @@ -5105,7 +5105,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( /// @brief Intercept function for urCommandBufferReleaseExp __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { auto pfnReleaseExp = context.urDdiTable.CommandBufferExp.pfnReleaseExp; @@ -5130,7 +5130,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( /// @brief Intercept function for urCommandBufferFinalizeExp __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { auto pfnFinalizeExp = context.urDdiTable.CommandBufferExp.pfnFinalizeExp; @@ -5156,9 +5156,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// @brief Intercept function for urCommandBufferAppendKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * @@ -5169,8 +5169,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { auto pfnAppendKernelLaunchExp = context.urDdiTable.CommandBufferExp.pfnAppendKernelLaunchExp; @@ -5188,15 +5190,16 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( &pLocalWorkSize, &numSyncPointsInWaitList, &pSyncPointWaitList, - &pSyncPoint}; + &pSyncPoint, + &phCommand}; uint64_t instance = context.notify_begin( UR_FUNCTION_COMMAND_BUFFER_APPEND_KERNEL_LAUNCH_EXP, "urCommandBufferAppendKernelLaunchExp", ¶ms); ur_result_t result = pfnAppendKernelLaunchExp( hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, - pSyncPoint); + pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, + phCommand); context.notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_KERNEL_LAUNCH_EXP, "urCommandBufferAppendKernelLaunchExp", ¶ms, &result, @@ -5209,16 +5212,16 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// @brief Intercept function for urCommandBufferAppendUSMMemcpyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendUSMMemcpyExp = context.urDdiTable.CommandBufferExp.pfnAppendUSMMemcpyExp; @@ -5292,7 +5295,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -5302,8 +5305,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferCopyExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyExp; @@ -5341,18 +5344,18 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. const void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferWriteExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteExp; @@ -5389,17 +5392,17 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferReadExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferReadExp; @@ -5436,7 +5439,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t @@ -5453,8 +5456,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferCopyRectExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyRectExp; @@ -5498,31 +5501,31 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferWriteRectExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteRectExp; @@ -5566,29 +5569,29 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferReadRectExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferReadRectExp; @@ -5772,15 +5775,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( /// @brief Intercept function for urCommandBufferEnqueueExp __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_queue_handle_t - hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. @@ -5807,6 +5809,37 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferUpdateKernelLaunchExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t * + pUpdateKernelLaunch ///< [in] Struct defining how the kernel command is to be updated. +) { + auto pfnUpdateKernelLaunchExp = + context.urDdiTable.CommandBufferExp.pfnUpdateKernelLaunchExp; + + if (nullptr == pfnUpdateKernelLaunchExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_command_buffer_update_kernel_launch_exp_params_t params = { + &hCommand, &pUpdateKernelLaunch}; + uint64_t instance = context.notify_begin( + UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP, + "urCommandBufferUpdateKernelLaunchExp", ¶ms); + + ur_result_t result = + pfnUpdateKernelLaunchExp(hCommand, pUpdateKernelLaunch); + + context.notify_end(UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP, + "urCommandBufferUpdateKernelLaunchExp", ¶ms, &result, + instance); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( @@ -6387,6 +6420,10 @@ __urdlllocal ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( dditable.pfnEnqueueExp = pDdiTable->pfnEnqueueExp; pDdiTable->pfnEnqueueExp = ur_tracing_layer::urCommandBufferEnqueueExp; + dditable.pfnUpdateKernelLaunchExp = pDdiTable->pfnUpdateKernelLaunchExp; + pDdiTable->pfnUpdateKernelLaunchExp = + ur_tracing_layer::urCommandBufferUpdateKernelLaunchExp; + return result; } /////////////////////////////////////////////////////////////////////////////// diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index 72e225028c..9b137051bc 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -6747,12 +6747,12 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferCreateExp __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. const ur_exp_command_buffer_desc_t - *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor + *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. ur_exp_command_buffer_handle_t - *phCommandBuffer ///< [out] pointer to Command-Buffer handle + *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ) { auto pfnCreateExp = context.urDdiTable.CommandBufferExp.pfnCreateExp; @@ -6784,7 +6784,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( /// @brief Intercept function for urCommandBufferRetainExp __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { auto pfnRetainExp = context.urDdiTable.CommandBufferExp.pfnRetainExp; @@ -6811,7 +6811,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( /// @brief Intercept function for urCommandBufferReleaseExp __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { auto pfnReleaseExp = context.urDdiTable.CommandBufferExp.pfnReleaseExp; @@ -6838,7 +6838,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( /// @brief Intercept function for urCommandBufferFinalizeExp __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { auto pfnFinalizeExp = context.urDdiTable.CommandBufferExp.pfnFinalizeExp; @@ -6861,9 +6861,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// @brief Intercept function for urCommandBufferAppendKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * @@ -6874,8 +6874,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { auto pfnAppendKernelLaunchExp = context.urDdiTable.CommandBufferExp.pfnAppendKernelLaunchExp; @@ -6916,8 +6918,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_result_t result = pfnAppendKernelLaunchExp( hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, - pSyncPoint); + pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, + phCommand); return result; } @@ -6926,16 +6928,16 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// @brief Intercept function for urCommandBufferAppendUSMMemcpyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendUSMMemcpyExp = context.urDdiTable.CommandBufferExp.pfnAppendUSMMemcpyExp; @@ -7050,7 +7052,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -7060,8 +7062,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferCopyExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyExp; @@ -7103,18 +7105,18 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. const void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferWriteExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteExp; @@ -7156,17 +7158,17 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferReadExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferReadExp; @@ -7208,7 +7210,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t @@ -7225,8 +7227,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferCopyRectExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyRectExp; @@ -7269,31 +7271,31 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferWriteRectExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteRectExp; @@ -7336,29 +7338,29 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { auto pfnAppendMemBufferReadRectExp = context.urDdiTable.CommandBufferExp.pfnAppendMemBufferReadRectExp; @@ -7565,15 +7567,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( /// @brief Intercept function for urCommandBufferEnqueueExp __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_queue_handle_t - hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. @@ -7616,6 +7617,37 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferUpdateKernelLaunchExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t * + pUpdateKernelLaunch ///< [in] Struct defining how the kernel command is to be updated. +) { + auto pfnUpdateKernelLaunchExp = + context.urDdiTable.CommandBufferExp.pfnUpdateKernelLaunchExp; + + if (nullptr == pfnUpdateKernelLaunchExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (context.enableParameterValidation) { + if (NULL == hCommand) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == pUpdateKernelLaunch) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + } + + ur_result_t result = + pfnUpdateKernelLaunchExp(hCommand, pUpdateKernelLaunch); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( @@ -8258,6 +8290,10 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( dditable.pfnEnqueueExp = pDdiTable->pfnEnqueueExp; pDdiTable->pfnEnqueueExp = ur_validation_layer::urCommandBufferEnqueueExp; + dditable.pfnUpdateKernelLaunchExp = pDdiTable->pfnUpdateKernelLaunchExp; + pDdiTable->pfnUpdateKernelLaunchExp = + ur_validation_layer::urCommandBufferUpdateKernelLaunchExp; + return result; } diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index 6d3dda30f0..fadffc6148 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -32,6 +32,7 @@ ur_exp_image_mem_factory_t ur_exp_image_mem_factory; ur_exp_interop_mem_factory_t ur_exp_interop_mem_factory; ur_exp_interop_semaphore_factory_t ur_exp_interop_semaphore_factory; ur_exp_command_buffer_factory_t ur_exp_command_buffer_factory; +ur_exp_command_buffer_command_factory_t ur_exp_command_buffer_command_factory; /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urAdapterGet @@ -6085,12 +6086,12 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferCreateExp __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. const ur_exp_command_buffer_desc_t - *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor + *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. ur_exp_command_buffer_handle_t - *phCommandBuffer ///< [out] pointer to Command-Buffer handle + *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6131,7 +6132,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( /// @brief Intercept function for urCommandBufferRetainExp __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6159,7 +6160,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( /// @brief Intercept function for urCommandBufferReleaseExp __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6187,7 +6188,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( /// @brief Intercept function for urCommandBufferFinalizeExp __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6215,9 +6216,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// @brief Intercept function for urCommandBufferAppendKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * @@ -6228,8 +6229,10 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6252,10 +6255,26 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( hKernel = reinterpret_cast(hKernel)->handle; // forward to device-platform - result = pfnAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, - pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint); + result = pfnAppendKernelLaunchExp( + hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, + phCommand); + + if (UR_RESULT_SUCCESS != result) { + return result; + } + + try { + // convert platform handle to loader handle + if (nullptr != phCommand) { + *phCommand = + reinterpret_cast( + ur_exp_command_buffer_command_factory.getInstance( + *phCommand, dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } return result; } @@ -6264,16 +6283,16 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// @brief Intercept function for urCommandBufferAppendUSMMemcpyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6346,7 +6365,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -6356,8 +6375,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6394,18 +6413,18 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. const void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6439,17 +6458,17 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6483,7 +6502,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( /// @brief Intercept function for urCommandBufferAppendMemBufferCopyRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t @@ -6500,8 +6519,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6539,31 +6558,31 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferWriteRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6598,29 +6617,29 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( /// @brief Intercept function for urCommandBufferAppendMemBufferReadRectExp __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; @@ -6781,15 +6800,14 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( /// @brief Intercept function for urCommandBufferEnqueueExp __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_queue_handle_t - hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. @@ -6842,6 +6860,59 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferUpdateKernelLaunchExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t * + pUpdateKernelLaunch ///< [in] Struct defining how the kernel command is to be updated. +) { + ur_result_t result = UR_RESULT_SUCCESS; + + // extract platform's function pointer table + auto dditable = + reinterpret_cast(hCommand) + ->dditable; + auto pfnUpdateKernelLaunchExp = + dditable->ur.CommandBufferExp.pfnUpdateKernelLaunchExp; + if (nullptr == pfnUpdateKernelLaunchExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hCommand = + reinterpret_cast(hCommand) + ->handle; + + // Deal with any struct parameters that have handle members we need to convert. + auto pUpdateKernelLaunchLocal = *pUpdateKernelLaunch; + + std::vector + pUpdateKernelLaunchpArgMemobjList; + for (uint32_t i = 0; i < pUpdateKernelLaunch->numMemobjArgs; i++) { + ur_exp_command_buffer_update_memobj_arg_desc_t NewRangeStruct = + pUpdateKernelLaunchLocal.pArgMemobjList[i]; + if (NewRangeStruct.hArgValue) { + NewRangeStruct.hArgValue = + reinterpret_cast(NewRangeStruct.hArgValue) + ->handle; + } + + pUpdateKernelLaunchpArgMemobjList.push_back(NewRangeStruct); + } + pUpdateKernelLaunchLocal.pArgMemobjList = + pUpdateKernelLaunchpArgMemobjList.data(); + + // Now that we've converted all the members update the param pointers + pUpdateKernelLaunch = &pUpdateKernelLaunchLocal; + + // forward to device-platform + result = pfnUpdateKernelLaunchExp(hCommand, pUpdateKernelLaunch); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( @@ -7457,6 +7528,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = ur_loader::urCommandBufferAppendUSMAdviseExp; pDdiTable->pfnEnqueueExp = ur_loader::urCommandBufferEnqueueExp; + pDdiTable->pfnUpdateKernelLaunchExp = + ur_loader::urCommandBufferUpdateKernelLaunchExp; } else { // return pointers directly to platform's DDIs *pDdiTable = ur_loader::context->platforms.front() diff --git a/source/loader/ur_ldrddi.hpp b/source/loader/ur_ldrddi.hpp index 4edbabbd8b..d98b99a655 100644 --- a/source/loader/ur_ldrddi.hpp +++ b/source/loader/ur_ldrddi.hpp @@ -92,6 +92,12 @@ using ur_exp_command_buffer_factory_t = singleton_factory_t; +using ur_exp_command_buffer_command_object_t = + object_t; +using ur_exp_command_buffer_command_factory_t = + singleton_factory_t; + } // namespace ur_loader #endif /* UR_LOADER_LDRDDI_H */ diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 0a69fcd1e2..883e084fec 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -7013,7 +7013,7 @@ ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /// @brief Create a Command-Buffer object /// /// @details -/// - Create a command-buffer object +/// - Create a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7030,12 +7030,12 @@ ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. const ur_exp_command_buffer_desc_t - *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor + *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. ur_exp_command_buffer_handle_t - *phCommandBuffer ///< [out] pointer to Command-Buffer handle + *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ) try { auto pfnCreateExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnCreateExp; @@ -7063,7 +7063,7 @@ ur_result_t UR_APICALL urCommandBufferCreateExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY ur_result_t UR_APICALL urCommandBufferRetainExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) try { auto pfnRetainExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnRetainExp; @@ -7092,7 +7092,7 @@ ur_result_t UR_APICALL urCommandBufferRetainExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY ur_result_t UR_APICALL urCommandBufferReleaseExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) try { auto pfnReleaseExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnReleaseExp; @@ -7121,7 +7121,7 @@ ur_result_t UR_APICALL urCommandBufferReleaseExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferFinalizeExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) try { auto pfnFinalizeExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnFinalizeExp; @@ -7135,7 +7135,7 @@ ur_result_t UR_APICALL urCommandBufferFinalizeExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a kernel execution command to a command-buffer object +/// @brief Append a kernel execution command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7162,9 +7162,9 @@ ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * @@ -7175,8 +7175,10 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) try { auto pfnAppendKernelLaunchExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendKernelLaunchExp; @@ -7187,13 +7189,13 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( return pfnAppendKernelLaunchExp(hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint); + pSyncPointWaitList, pSyncPoint, phCommand); } catch (...) { return exceptionToResult(std::current_exception()); } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM memcpy command to a command-buffer object +/// @brief Append a USM memcpy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7218,16 +7220,16 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendUSMMemcpyExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendUSMMemcpyExp; @@ -7243,7 +7245,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM fill command to a command-buffer object +/// @brief Append a USM fill command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7298,7 +7300,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory copy command to a command-buffer object +/// @brief Append a memory copy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7319,7 +7321,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -7329,8 +7331,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendMemBufferCopyExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendMemBufferCopyExp; @@ -7346,7 +7348,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory write command to a command-buffer object +/// @brief Append a memory write command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7368,18 +7370,18 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. const void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendMemBufferWriteExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendMemBufferWriteExp; @@ -7395,7 +7397,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory read command to a command-buffer object +/// @brief Append a memory read command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7417,17 +7419,17 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendMemBufferReadExp = ur_lib::context->urDdiTable.CommandBufferExp.pfnAppendMemBufferReadExp; @@ -7443,7 +7445,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory copy command to a command-buffer object +/// @brief Append a rectangular memory copy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7464,7 +7466,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t @@ -7481,8 +7483,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendMemBufferCopyRectExp = ur_lib::context->urDdiTable.CommandBufferExp @@ -7500,7 +7502,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory write command to a command-buffer object +/// @brief Append a rectangular memory write command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7522,31 +7524,31 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendMemBufferWriteRectExp = ur_lib::context->urDdiTable.CommandBufferExp @@ -7564,7 +7566,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory read command to a command-buffer object +/// @brief Append a rectangular memory read command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7586,29 +7588,29 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { auto pfnAppendMemBufferReadRectExp = ur_lib::context->urDdiTable.CommandBufferExp @@ -7626,7 +7628,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory fill command to a command-buffer object +/// @brief Append a memory fill command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -7678,7 +7680,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM Prefetch command to a command-buffer object +/// @brief Append a USM Prefetch command to a command-buffer object. /// /// @details /// - Prefetching may not be supported for all devices or allocation types. @@ -7734,7 +7736,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM Advise command to a command-buffer object +/// @brief Append a USM Advise command to a command-buffer object. /// /// @details /// - Not all memory advice hints may be supported for all devices or @@ -7811,15 +7813,14 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_queue_handle_t - hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. @@ -7836,6 +7837,49 @@ ur_result_t UR_APICALL urCommandBufferEnqueueExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Update a kernel launch command. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pUpdateKernelLaunch` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If update functionality is not supported by the device. +/// - ::UR_RESULT_ERROR_INVALID_OPERATION +/// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX +/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION +/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t * + pUpdateKernelLaunch ///< [in] Struct defining how the kernel command is to be updated. + ) try { + auto pfnUpdateKernelLaunchExp = + ur_lib::context->urDdiTable.CommandBufferExp.pfnUpdateKernelLaunchExp; + if (nullptr == pfnUpdateKernelLaunchExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnUpdateKernelLaunchExp(hCommand, pUpdateKernelLaunch); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Enqueue a command to execute a cooperative kernel /// diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 2bcc229f29..609ff670bd 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -5949,7 +5949,7 @@ ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /// @brief Create a Command-Buffer object /// /// @details -/// - Create a command-buffer object +/// - Create a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -5966,12 +5966,12 @@ ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ///< [in] handle of the context object - ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_context_handle_t hContext, ///< [in] Handle of the context object. + ur_device_handle_t hDevice, ///< [in] Handle of the device object. const ur_exp_command_buffer_desc_t - *pCommandBufferDesc, ///< [in][optional] CommandBuffer descriptor + *pCommandBufferDesc, ///< [in][optional] command-buffer descriptor. ur_exp_command_buffer_handle_t - *phCommandBuffer ///< [out] pointer to Command-Buffer handle + *phCommandBuffer ///< [out] Pointer to command-Buffer handle. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; @@ -5992,7 +5992,7 @@ ur_result_t UR_APICALL urCommandBufferCreateExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY ur_result_t UR_APICALL urCommandBufferRetainExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; @@ -6014,7 +6014,7 @@ ur_result_t UR_APICALL urCommandBufferRetainExp( /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY ur_result_t UR_APICALL urCommandBufferReleaseExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; @@ -6036,14 +6036,14 @@ ur_result_t UR_APICALL urCommandBufferReleaseExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferFinalizeExp( ur_exp_command_buffer_handle_t - hCommandBuffer ///< [in] handle of the command-buffer object + hCommandBuffer ///< [in] Handle of the command-buffer object. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a kernel execution command to a command-buffer object +/// @brief Append a kernel execution command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6070,9 +6070,9 @@ ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object - ur_kernel_handle_t hKernel, ///< [in] kernel to append - uint32_t workDim, ///< [in] dimension of the kernel execution + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_kernel_handle_t hKernel, ///< [in] Kernel to append. + uint32_t workDim, ///< [in] Dimension of the kernel execution. const size_t *pGlobalWorkOffset, ///< [in] Offset to use when executing kernel. const size_t * @@ -6083,15 +6083,17 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint, ///< [out][optional] Sync point associated with this command. + ur_exp_command_buffer_command_handle_t + *phCommand ///< [out][optional] Handle to this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM memcpy command to a command-buffer object +/// @brief Append a USM memcpy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6116,23 +6118,23 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. void *pDst, ///< [in] Location the data will be copied to. const void *pSrc, ///< [in] The data to be copied. - size_t size, ///< [in] The number of bytes to copy + size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM fill command to a command-buffer object +/// @brief Append a USM fill command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6178,7 +6180,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory copy command to a command-buffer object +/// @brief Append a memory copy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6199,7 +6201,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. size_t srcOffset, ///< [in] Offset into the source memory. @@ -6209,15 +6211,15 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory write command to a command-buffer object +/// @brief Append a memory write command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6239,25 +6241,25 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. const void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory read command to a command-buffer object +/// @brief Append a memory read command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6279,24 +6281,24 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. - size_t offset, ///< [in] offset in bytes in the buffer object. - size_t size, ///< [in] size in bytes of data being written. - void *pDst, ///< [in] pointer to host memory where data is to be written to. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. + size_t offset, ///< [in] Offset in bytes in the buffer object. + size_t size, ///< [in] Size in bytes of data being written. + void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory copy command to a command-buffer object +/// @brief Append a rectangular memory copy command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6317,7 +6319,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_mem_handle_t hSrcMem, ///< [in] The data to be copied. ur_mem_handle_t hDstMem, ///< [in] The location the data will be copied to. ur_rect_offset_t @@ -6334,15 +6336,15 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory write command to a command-buffer object +/// @brief Append a rectangular memory write command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6364,38 +6366,38 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being ///< written. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pSrc. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pSrc. void * - pSrc, ///< [in] pointer to host memory where data is to be written from. + pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a rectangular memory read command to a command-buffer object +/// @brief Append a rectangular memory read command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6417,36 +6419,36 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. - ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. + ur_mem_handle_t hBuffer, ///< [in] Handle of the buffer object. ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer. ur_rect_offset_t hostOffset, ///< [in] 3D offset in the host region. ur_rect_region_t region, ///< [in] 3D rectangular region descriptor: width, height, depth. size_t - bufferRowPitch, ///< [in] length of each row in bytes in the buffer object. + bufferRowPitch, ///< [in] Length of each row in bytes in the buffer object. size_t - bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read. + bufferSlicePitch, ///< [in] Length of each 2D slice in bytes in the buffer object being read. size_t - hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed to + hostRowPitch, ///< [in] Length of each row in bytes in the host memory region pointed to ///< by pDst. size_t - hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + hostSlicePitch, ///< [in] Length of each 2D slice in bytes in the host memory region ///< pointed to by pDst. - void *pDst, ///< [in] pointer to host memory where data is to be read into. + void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. - ur_exp_command_buffer_sync_point_t - *pSyncPoint ///< [out][optional] sync point associated with this command + ur_exp_command_buffer_sync_point_t * + pSyncPoint ///< [out][optional] Sync point associated with this command. ) { ur_result_t result = UR_RESULT_SUCCESS; return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a memory fill command to a command-buffer object +/// @brief Append a memory fill command to a command-buffer object. /// /// @returns /// - ::UR_RESULT_SUCCESS @@ -6489,7 +6491,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM Prefetch command to a command-buffer object +/// @brief Append a USM Prefetch command to a command-buffer object. /// /// @details /// - Prefetching may not be supported for all devices or allocation types. @@ -6536,7 +6538,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( } /////////////////////////////////////////////////////////////////////////////// -/// @brief Append a USM Advise command to a command-buffer object +/// @brief Append a USM Advise command to a command-buffer object. /// /// @details /// - Not all memory advice hints may be supported for all devices or @@ -6604,15 +6606,14 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t - hCommandBuffer, ///< [in] handle of the command-buffer object. + hCommandBuffer, ///< [in] Handle of the command-buffer object. ur_queue_handle_t - hQueue, ///< [in] the queue to submit this command-buffer for execution. - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + hQueue, ///< [in] The queue to submit this command-buffer for execution. + uint32_t numEventsInWaitList, ///< [in] Size of the event wait list. const ur_event_handle_t * phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of ///< events that must be complete before the command-buffer execution. - ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait - ///< events. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. ur_event_handle_t * phEvent ///< [out][optional] return an event object that identifies this particular ///< command-buffer execution instance. @@ -6621,6 +6622,42 @@ ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Update a kernel launch command. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommand` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pUpdateKernelLaunch` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If update functionality is not supported by the device. +/// - ::UR_RESULT_ERROR_INVALID_OPERATION +/// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX +/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION +/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t + hCommand, ///< [in] Handle of the command-buffer kernel command to update. + const ur_exp_command_buffer_update_kernel_launch_desc_t * + pUpdateKernelLaunch ///< [in] Struct defining how the kernel command is to be updated. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Enqueue a command to execute a cooperative kernel /// diff --git a/test/conformance/CMakeLists.txt b/test/conformance/CMakeLists.txt index df80c02681..05e522c38b 100644 --- a/test/conformance/CMakeLists.txt +++ b/test/conformance/CMakeLists.txt @@ -115,6 +115,7 @@ if(UR_DPCXX) add_subdirectory(kernel) add_subdirectory(program) add_subdirectory(enqueue) + add_subdirectory(exp_command_buffer) else() message(WARNING "UR_DPCXX is not defined, the following conformance test executables \ diff --git a/test/conformance/device_code/CMakeLists.txt b/test/conformance/device_code/CMakeLists.txt index 9d19a34b93..e013d7bcb2 100644 --- a/test/conformance/device_code/CMakeLists.txt +++ b/test/conformance/device_code/CMakeLists.txt @@ -53,6 +53,9 @@ add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/foo.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/image_copy.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/mean.cpp) add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/spec_constant.cpp) +add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/saxpy.cpp) +add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/saxpy_usm.cpp) +add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/indexers_usm.cpp) set(KERNEL_HEADER ${UR_CONFORMANCE_DEVICE_BINARIES_DIR}/kernel_entry_points.h) add_custom_command(OUTPUT ${KERNEL_HEADER} diff --git a/test/conformance/device_code/indexers_usm.cpp b/test/conformance/device_code/indexers_usm.cpp new file mode 100644 index 0000000000..ee64fb39e0 --- /dev/null +++ b/test/conformance/device_code/indexers_usm.cpp @@ -0,0 +1,38 @@ +// Copyright (C) 2023 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include + +int main() { + const cl::sycl::range<3> global_range(8, 8, 8); + const cl::sycl::range<3> local_range(2, 2, 2); + const cl::sycl::id<3> global_offset(4, 4, 4); + const cl::sycl::nd_range<3> nd_range(global_range, local_range, + global_offset); + + cl::sycl::queue sycl_queue; + const size_t elements_per_work_item = 6; + int *ptr = cl::sycl::malloc_shared(global_range[0] * global_range[1] * + global_range[2] * + elements_per_work_item, + sycl_queue); + + sycl_queue.submit([&](cl::sycl::handler &cgh) { + cgh.parallel_for( + nd_range, [ptr](cl::sycl::nd_item<3> index) { + int *wi_ptr = + ptr + index.get_global_linear_id() * elements_per_work_item; + + wi_ptr[0] = index.get_global_id(0); + wi_ptr[1] = index.get_global_id(1); + wi_ptr[2] = index.get_global_id(2); + + wi_ptr[3] = index.get_local_id(0); + wi_ptr[4] = index.get_local_id(1); + wi_ptr[5] = index.get_local_id(2); + }); + }); + return 0; +} diff --git a/test/conformance/device_code/saxpy.cpp b/test/conformance/device_code/saxpy.cpp new file mode 100644 index 0000000000..83ae11cb0a --- /dev/null +++ b/test/conformance/device_code/saxpy.cpp @@ -0,0 +1,33 @@ +// Copyright (C) 2023 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include + +int main() { + size_t array_size = 16; + std::vector X(array_size, 1); + std::vector Y(array_size, 2); + std::vector Z(array_size, 0); + uint32_t A = 42; + auto x_buff = + cl::sycl::buffer(X.data(), cl::sycl::range<1>(array_size)); + auto y_buff = + cl::sycl::buffer(Y.data(), cl::sycl::range<1>(array_size)); + auto z_buff = + cl::sycl::buffer(Z.data(), cl::sycl::range<1>(array_size)); + + cl::sycl::queue sycl_queue; + sycl_queue.submit([&](cl::sycl::handler &cgh) { + auto x_acc = x_buff.get_access(cgh); + auto y_acc = y_buff.get_access(cgh); + auto z_acc = z_buff.get_access(cgh); + cgh.parallel_for(cl::sycl::range<1>{array_size}, + [=](cl::sycl::item<1> itemId) { + auto i = itemId.get_id(0); + z_acc[i] = A * x_acc[i] + y_acc[i]; + }); + }); + return 0; +} diff --git a/test/conformance/device_code/saxpy_usm.cpp b/test/conformance/device_code/saxpy_usm.cpp new file mode 100644 index 0000000000..6d93fd2f8b --- /dev/null +++ b/test/conformance/device_code/saxpy_usm.cpp @@ -0,0 +1,25 @@ +// Copyright (C) 2023 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include + +int main() { + size_t array_size = 16; + + cl::sycl::queue sycl_queue; + uint32_t *X = cl::sycl::malloc_shared(array_size, sycl_queue); + uint32_t *Y = cl::sycl::malloc_shared(array_size, sycl_queue); + uint32_t *Z = cl::sycl::malloc_shared(array_size, sycl_queue); + uint32_t A = 42; + + sycl_queue.submit([&](cl::sycl::handler &cgh) { + cgh.parallel_for(cl::sycl::range<1>{array_size}, + [=](cl::sycl::item<1> itemId) { + auto i = itemId.get_id(0); + Z[i] = A * X[i] + Y[i]; + }); + }); + return 0; +} diff --git a/test/conformance/exp_command_buffer/CMakeLists.txt b/test/conformance/exp_command_buffer/CMakeLists.txt new file mode 100644 index 0000000000..1a948803ee --- /dev/null +++ b/test/conformance/exp_command_buffer/CMakeLists.txt @@ -0,0 +1,12 @@ +# Copyright (C) 2023 Intel Corporation +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +add_conformance_test_with_kernels_environment(exp_command_buffer + buffer_fill_kernel_update.cpp + usm_fill_kernel_update.cpp + buffer_saxpy_kernel_update.cpp + usm_saxpy_kernel_update.cpp + ndrange_update.cpp +) diff --git a/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp new file mode 100644 index 0000000000..196552b944 --- /dev/null +++ b/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp @@ -0,0 +1,397 @@ +// Copyright (C) 2023 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include + +// Test that updating a command-buffer with a single kernel command +// taking USM arguments works correctly. +struct BufferFillCommandTest : uur::urExpUpdatableCommandBufferTests { + void SetUp() override { + program_name = "fill"; + UUR_RETURN_ON_FATAL_FAILURE(urExpUpdatableCommandBufferTests::SetUp()); + + // First argument is buffer to fill (will also be hidden accessor arg) + AddBuffer1DArg(sizeof(val) * global_size, &buffer); + // Second argument is scalar to fill with. + AddPodArg(val); + + // Append kernel command to command-buffer and close command-buffer + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &global_size, &local_size, 0, nullptr, nullptr, &command_handle)); + ASSERT_NE(command_handle, nullptr); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + + void TearDown() override { + if (new_buffer) { + EXPECT_SUCCESS(urMemRelease(new_buffer)); + } + UUR_RETURN_ON_FATAL_FAILURE( + urExpUpdatableCommandBufferTests::TearDown()); + } + + static constexpr uint32_t val = 42; + static constexpr size_t local_size = 4; + static constexpr size_t global_size = 32; + static constexpr size_t global_offset = 0; + static constexpr size_t n_dimensions = 1; + static constexpr size_t buffer_size = sizeof(val) * global_size; + ur_mem_handle_t buffer = nullptr; + ur_mem_handle_t new_buffer = nullptr; + ur_exp_command_buffer_command_handle_t command_handle = nullptr; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(BufferFillCommandTest); + +// Update kernel arguments to fill with a new scalar value to a new output +// buffer. +TEST_P(BufferFillCommandTest, UpdateParameters) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + ValidateBuffer(buffer, buffer_size, val); + + // Create a new buffer to update kernel output parameter to + ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, + buffer_size, nullptr, &new_buffer)); + char zero = 0; + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, new_buffer, &zero, + sizeof(zero), 0, buffer_size, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Set argument index zero as new buffer + ur_exp_command_buffer_update_memobj_arg_desc_t new_output_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC, // stype + nullptr, // pNext + 0, // argIndex + nullptr, // pProperties + new_buffer, // hArgValue + }; + + // Set argument index 2 as new value to fill (index 1 is buffer accessor) + uint32_t new_val = 33; + ur_exp_command_buffer_update_value_arg_desc_t new_input_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + sizeof(new_val), // argSize + nullptr, // pProperties + &new_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 1, // numMemobjArgs + 0, // numPointerArgs + 1, // numValueArgs + 0, // numExecInfos + 0, // workDim + &new_output_desc, // pArgMemobjList + nullptr, // pArgPointerList + &new_input_desc, // pArgValueList + nullptr, // pArgExecInfoList + nullptr, // pGlobalWorkOffset + nullptr, // pGlobalWorkSize + nullptr, // pLocalWorkSize + }; + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + ValidateBuffer(new_buffer, buffer_size, new_val); +} + +// Test updating the global size so that the fill outputs to a larger buffer +TEST_P(BufferFillCommandTest, UpdateGlobalSize) { + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + ValidateBuffer(buffer, sizeof(val) * global_size, val); + + size_t new_global_size = 64; + const size_t buffer_size = sizeof(val) * new_global_size; + ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, + buffer_size, nullptr, &new_buffer)); + char zero = 0; + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, new_buffer, &zero, + sizeof(zero), 0, buffer_size, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + ur_exp_command_buffer_update_memobj_arg_desc_t new_output_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC, // stype + nullptr, // pNext + 0, // argIndex + nullptr, // pProperties + new_buffer, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 1, // numMemobjArgs + 0, // numPointerArgs + 0, // numValueArgs + 0, // numExecInfos + 0, // workDim + &new_output_desc, // pArgMemobjList + nullptr, // pArgPointerList + nullptr, // pArgValueList + nullptr, // pArgExecInfoList + nullptr, // pGlobalWorkOffset + &new_global_size, // pGlobalWorkSize + nullptr, // pLocalWorkSize + }; + + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + ValidateBuffer(new_buffer, buffer_size, val); +} + +// Test updating the input & output kernel arguments and global +// size, by calling update individually for each of these configurations. +TEST_P(BufferFillCommandTest, SeparateUpdateCalls) { + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + ValidateBuffer(buffer, sizeof(val) * global_size, val); + + size_t new_global_size = 64; + const size_t buffer_size = sizeof(val) * new_global_size; + ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, + buffer_size, nullptr, &new_buffer)); + char zero = 0; + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, new_buffer, &zero, + sizeof(zero), 0, buffer_size, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + ur_exp_command_buffer_update_memobj_arg_desc_t new_output_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC, // stype + nullptr, // pNext + 0, // argIndex + nullptr, // pProperties + new_buffer, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t output_update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 1, // numMemobjArgs + 0, // numPointerArgs + 0, // numValueArgs + 0, // numExecInfos + 0, // workDim + &new_output_desc, // pArgMemobjList + nullptr, // pArgPointerList + nullptr, // pArgValueList + nullptr, // pArgExecInfoList + nullptr, // pGlobalWorkOffset + nullptr, // pGlobalWorkSize + nullptr, // pLocalWorkSize + }; + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(command_handle, + &output_update_desc)); + + uint32_t new_val = 33; + ur_exp_command_buffer_update_value_arg_desc_t new_input_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + sizeof(new_val), // argSize + nullptr, // pProperties + &new_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t input_update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numMemobjArgs + 0, // numPointerArgs + 1, // numValueArgs + 0, // numExecInfos + 0, // workDim + nullptr, // pArgMemobjList + nullptr, // pArgPointerList + &new_input_desc, // pArgValueList + nullptr, // pArgExecInfoList + nullptr, // pGlobalWorkOffset + nullptr, // pGlobalWorkSize + nullptr, // pLocalWorkSize + }; + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(command_handle, + &input_update_desc)); + + ur_exp_command_buffer_update_kernel_launch_desc_t global_size_update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numMemobjArgs + 0, // numPointerArgs + 0, // numValueArgs + 0, // numExecInfos + 0, // workDim + nullptr, // pArgMemobjList + nullptr, // pArgPointerList + nullptr, // pArgValueList + nullptr, // pArgExecInfoList + nullptr, // pGlobalWorkOffset + &new_global_size, // pGlobalWorkSize + nullptr, // pLocalWorkSize + }; + + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp( + command_handle, &global_size_update_desc)); + + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + ValidateBuffer(new_buffer, buffer_size, new_val); +} + +// Test calling update twice on the same command-handle updating the +// input value, and verifying that it's the second call which persists. +TEST_P(BufferFillCommandTest, OverrideUpdate) { + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + ValidateBuffer(buffer, sizeof(val) * global_size, val); + + uint32_t first_val = 33; + ur_exp_command_buffer_update_value_arg_desc_t first_input_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + sizeof(first_val), // argSize + nullptr, // pProperties + &first_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t first_update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numMemobjArgs + 0, // numPointerArgs + 1, // numValueArgs + 0, // numExecInfos + 0, // workDim + nullptr, // pArgMemobjList + nullptr, // pArgPointerList + &first_input_desc, // pArgValueList + nullptr, // pArgExecInfoList + nullptr, // pGlobalWorkOffset + nullptr, // pGlobalWorkSize + nullptr, // pLocalWorkSize + }; + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(command_handle, + &first_update_desc)); + + uint32_t second_val = -99; + ur_exp_command_buffer_update_value_arg_desc_t second_input_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + sizeof(second_val), // argSize + nullptr, // pProperties + &second_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t second_update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numMemobjArgs + 0, // numPointerArgs + 1, // numValueArgs + 0, // numExecInfos + 0, // workDim + nullptr, // pArgMemobjList + nullptr, // pArgPointerList + &second_input_desc, // pArgValueList + nullptr, // pArgExecInfoList + nullptr, // pGlobalWorkOffset + nullptr, // pGlobalWorkSize + nullptr, // pLocalWorkSize + }; + + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(command_handle, + &second_update_desc)); + + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + ValidateBuffer(buffer, sizeof(val) * global_size, second_val); +} + +// Test calling update with multiple ur_exp_command_buffer_update_value_arg_desc_t +// instances updating the same argument, and checking that the last one in the +// list persists. +TEST_P(BufferFillCommandTest, OverrideArgList) { + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + ValidateBuffer(buffer, sizeof(val) * global_size, val); + + ur_exp_command_buffer_update_value_arg_desc_t input_descs[2]; + uint32_t first_val = 33; + input_descs[0] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + sizeof(first_val), // argSize + nullptr, // pProperties + &first_val, // hArgValue + }; + + uint32_t second_val = -99; + input_descs[1] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + sizeof(second_val), // argSize + nullptr, // pProperties + &second_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t second_update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numMemobjArgs + 0, // numPointerArgs + 2, // numValueArgs + 0, // numExecInfos + 0, // workDim + nullptr, // pArgMemobjList + nullptr, // pArgPointerList + input_descs, // pArgValueList + nullptr, // pArgExecInfoList + nullptr, // pGlobalWorkOffset + nullptr, // pGlobalWorkSize + nullptr, // pLocalWorkSize + }; + + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(command_handle, + &second_update_desc)); + + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + ValidateBuffer(buffer, sizeof(val) * global_size, second_val); +} diff --git a/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp new file mode 100644 index 0000000000..09049337d6 --- /dev/null +++ b/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp @@ -0,0 +1,172 @@ +// Copyright (C) 2023 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include + +// Test that updating a command-buffer with a single kernel command +// taking buffer & scalar arguments works correctly. +struct BufferSaxpyKernelTests : uur::urExpUpdatableCommandBufferTests { + void SetUp() override { + program_name = "saxpy"; + UUR_RETURN_ON_FATAL_FAILURE(urExpUpdatableCommandBufferTests::SetUp()); + + const size_t allocation_size = sizeof(uint32_t) * global_size; + for (auto &buffer : buffers) { + ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, + allocation_size, nullptr, + &buffer)); + ASSERT_NE(buffer, nullptr); + + std::vector init(allocation_size); + uur::generateMemFillPattern(init); + + ASSERT_SUCCESS(urEnqueueMemBufferWrite(queue, buffer, true, 0, + allocation_size, init.data(), + 0, nullptr, nullptr)); + } + + // Index 0 is output buffer + ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, 0, nullptr, buffers[0])); + // Index 1 is output accessor + struct { + size_t offsets[1] = {0}; + } accessor; + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(accessor), nullptr, + &accessor)); + + // Index 2 is A + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 2, sizeof(A), nullptr, &A)); + // Index 3 is X buffer + ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, 3, nullptr, buffers[1])); + + // Index 4 is X buffer accessor + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 4, sizeof(accessor), nullptr, + &accessor)); + // Index 5 is Y buffer + ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, 5, nullptr, buffers[2])); + + // Index 6 is X buffer accessor + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 6, sizeof(accessor), nullptr, + &accessor)); + + // Append kernel command to command-buffer and close command-buffer + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &global_size, &local_size, 0, nullptr, nullptr, &command_handle)); + ASSERT_NE(command_handle, nullptr); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + + void Validate(ur_mem_handle_t output, ur_mem_handle_t X, ur_mem_handle_t Y, + uint32_t A, size_t length) { + + std::vector output_data(length, 0); + ASSERT_SUCCESS(urEnqueueMemBufferRead(queue, output, true, 0, length, + output_data.data(), 0, nullptr, + nullptr)); + + std::vector X_data(length, 0); + ASSERT_SUCCESS(urEnqueueMemBufferRead( + queue, X, true, 0, length, X_data.data(), 0, nullptr, nullptr)); + + std::vector Y_data(length, 0); + ASSERT_SUCCESS(urEnqueueMemBufferRead( + queue, Y, true, 0, length, Y_data.data(), 0, nullptr, nullptr)); + + for (size_t i = 0; i < length; i++) { + uint32_t result = A * X_data[i] + Y_data[i]; + ASSERT_EQ(result, output_data[i]); + } + } + + void TearDown() override { + for (auto &buffer : buffers) { + if (buffer) { + EXPECT_SUCCESS(urMemRelease(buffer)); + } + } + + UUR_RETURN_ON_FATAL_FAILURE( + urExpUpdatableCommandBufferTests::TearDown()); + } + + static constexpr size_t local_size = 4; + static constexpr size_t global_size = 32; + static constexpr size_t global_offset = 0; + static constexpr size_t n_dimensions = 1; + static constexpr uint32_t A = 42; + std::array buffers = {nullptr, nullptr, nullptr, + nullptr}; + ur_exp_command_buffer_command_handle_t command_handle = nullptr; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(BufferSaxpyKernelTests); + +TEST_P(BufferSaxpyKernelTests, UpdateParameters) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + + ASSERT_SUCCESS(urQueueFinish(queue)); + Validate(buffers[0], buffers[1], buffers[2], A, global_size); + + ur_exp_command_buffer_update_memobj_arg_desc_t new_input_descs[2]; + // New X at index 3 + new_input_descs[0] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC, // stype + nullptr, // pNext + 3, // argIndex + nullptr, // pProperties + buffers[3], // hArgValue + }; + + // New Y at index 5 + new_input_descs[1] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC, // stype + nullptr, // pNext + 5, // argIndex + nullptr, // pProperties + buffers[4], // hArgValue + }; + + // A at index 2 + uint32_t new_A = 33; + ur_exp_command_buffer_update_value_arg_desc_t new_A_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext, + 2, // argIndex + sizeof(new_A), // argSize + nullptr, // pProperties + &new_A, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 2, // numMemobjArgs + 0, // numPointerArgs + 1, // numValueArgs + 0, // numExecInfos + 0, // workDim + new_input_descs, // pArgMemobjList + nullptr, // pArgPointerList + &new_A_desc, // pArgValueList + nullptr, // pArgExecInfoList + nullptr, // pGlobalWorkOffset + nullptr, // pGlobalWorkSize + nullptr, // pLocalWorkSize + }; + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + Validate(buffers[0], buffers[3], buffers[4], new_A, global_size); +} diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_cuda.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_cuda.match new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match new file mode 100644 index 0000000000..6ebcfb6d7d --- /dev/null +++ b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match @@ -0,0 +1,13 @@ +{{OPT}}BufferFillCommandTest.UpdateParameters/AMD_HIP_BACKEND{{.*}} +{{OPT}}BufferFillCommandTest.UpdateGlobalSize/AMD_HIP_BACKEND{{.*}} +{{OPT}}BufferFillCommandTest.SeparateUpdateCalls/AMD_HIP_BACKEND{{.*}} +{{OPT}}BufferFillCommandTest.OverrideUpdate/AMD_HIP_BACKEND{{.*}} +{{OPT}}BufferFillCommandTest.OverrideArgList/AMD_HIP_BACKEND{{.*}} +{{OPT}}USMFillCommandTest.UpdateParameters/AMD_HIP_BACKEND{{.*}} +{{OPT}}USMFillCommandTest.UpdateExecInfo/AMD_HIP_BACKEND{{.*}} +{{OPT}}USMMultipleFillCommandTest.UpdateAllKernels/AMD_HIP_BACKEND{{.*}} +{{OPT}}BufferSaxpyKernelTests.UpdateParameters/AMD_HIP_BACKEND{{.*}} +{{OPT}}USMSaxpyKernelTests.UpdateParameters/AMD_HIP_BACKEND{{.*}} +{{OPT}}NDRangeUpdateTests.Update3D/AMD_HIP_BACKEND{{.*}} +{{OPT}}NDRangeUpdateTests.Update2D/AMD_HIP_BACKEND{{.*}} +{{OPT}}NDRangeUpdateTests.Update1D/AMD_HIP_BACKEND{{.*}} diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero.match new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_opencl.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_opencl.match new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/conformance/exp_command_buffer/ndrange_update.cpp b/test/conformance/exp_command_buffer/ndrange_update.cpp new file mode 100644 index 0000000000..5ad98fc9d8 --- /dev/null +++ b/test/conformance/exp_command_buffer/ndrange_update.cpp @@ -0,0 +1,249 @@ +// Copyright (C) 2023 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include + +// Test that updating a command-buffer with a single kernel command +// in a way that changes the NDRange configuration. +struct NDRangeUpdateTests : uur::urExpUpdatableCommandBufferTests { + void SetUp() override { + program_name = "indexers_usm"; + UUR_RETURN_ON_FATAL_FAILURE(urExpUpdatableCommandBufferTests::SetUp()); + + ur_device_usm_access_capability_flags_t shared_usm_flags; + ASSERT_SUCCESS( + uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); + if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { + GTEST_SKIP() << "Shared USM is not supported."; + } + + // Allocate a USM pointer for use as kernel output at index 0 + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &shared_ptr)); + ASSERT_NE(shared_ptr, nullptr); + std::memset(shared_ptr, 0, allocation_size); + + ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 0, nullptr, &shared_ptr)); + + // Add a 3 dimension kernel command to command-buffer and close + // command-buffer + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, + global_offset.data(), global_size.data(), local_size.data(), 0, + nullptr, nullptr, &command_handle)); + ASSERT_NE(command_handle, nullptr); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + + // For each work-item the kernel prints the global id and local id in each + // of the 3 dimensions to an offset in the output based on global linear + // id. + void Validate(std::array global_size, + std::array local_size, + std::array global_offset) { + // DPC++ swaps the X & Z dimension for 3 Dimensional kernels + // between those set by user and SPIR-V builtins. + // See `ReverseRangeDimensionsForKernel()` in commands.cpp + + std::swap(global_size[0], global_size[2]); + std::swap(local_size[0], local_size[2]); + std::swap(global_offset[0], global_offset[2]); + + // Verify global ID and local ID of each work item + for (size_t x = 0; x < global_size[0]; x++) { + for (size_t y = 0; y < global_size[1]; y++) { + for (size_t z = 0; z < global_size[2]; z++) { + const size_t global_linear_id = + z + (y * global_size[2]) + + (x * global_size[1] * global_size[0]); + int *wi_ptr = (int *)shared_ptr + + (elements_per_id * global_linear_id); + + const int global_id_x = wi_ptr[0]; + const int global_id_y = wi_ptr[1]; + const int global_id_z = wi_ptr[2]; + +#if 0 + std::cout << "("<< x <<"," << y << "," << z << ") --> "; + std::cout << global_linear_id << " = "; + std::cout << "("<< global_id_x <<"," << global_id_y << "," + << global_id_z << ")\n"; +#endif + + EXPECT_EQ(global_id_x, x + global_offset[0]); + EXPECT_EQ(global_id_y, y + global_offset[1]); + EXPECT_EQ(global_id_z, z + global_offset[2]); + + const int local_id_x = wi_ptr[3]; + const int local_id_y = wi_ptr[4]; + const int local_id_z = wi_ptr[5]; + + EXPECT_EQ(local_id_x, x % local_size[0]); + EXPECT_EQ(local_id_y, y % local_size[1]); + EXPECT_EQ(local_id_z, z % local_size[2]); + } + } + } + } + + void TearDown() override { + if (shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); + } + + UUR_RETURN_ON_FATAL_FAILURE( + urExpUpdatableCommandBufferTests::TearDown()); + } + + static constexpr size_t elements_per_id = 6; + static constexpr size_t n_dimensions = 3; + static constexpr std::array global_size = {8, 8, 8}; + static constexpr std::array local_size = {1, 2, 2}; + static constexpr std::array global_offset = {0, 4, 4}; + static constexpr size_t allocation_size = sizeof(int) * elements_per_id * + global_size[0] * global_size[1] * + global_size[2]; + void *shared_ptr = nullptr; + ur_exp_command_buffer_command_handle_t command_handle = nullptr; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(NDRangeUpdateTests); + +// Keep the kernel work dimensions as 3, and update local size and global +// offset. +TEST_P(NDRangeUpdateTests, Update3D) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + Validate(global_size, local_size, global_offset); + + // Set local size and global offset to update to + std::array new_local_size = {4, 2, 2}; + std::array new_global_offset = {3, 2, 1}; + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numMemobjArgs + 0, // numPointerArgs + 0, // numValueArgs + 0, // numExecInfos + 3, // workDim + nullptr, // pArgMemobjList + nullptr, // pArgPointerList + nullptr, // pArgValueList + nullptr, // pArgExecInfoList + new_global_offset.data(), // pGlobalWorkOffset + nullptr, // pGlobalWorkSize + new_local_size.data(), // pLocalWorkSize + }; + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + Validate(global_size, new_local_size, new_global_offset); +} + +// Update the kernel work dimensions to 2, and update global size, local size, +// and global offset to new values. +TEST_P(NDRangeUpdateTests, Update2D) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + Validate(global_size, local_size, global_offset); + + // Set ND-Range configuration to update to + std::array new_global_size = {6, 6, 1}; + std::array new_local_size = {3, 3, 1}; + std::array new_global_offset = {3, 3, 0}; + + // Set dimensions as 2 + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numMemobjArgs + 0, // numPointerArgs + 0, // numValueArgs + 0, // numExecInfos + 2, // workDim + nullptr, // pArgMemobjList + nullptr, // pArgPointerList + nullptr, // pArgValueList + nullptr, // pArgExecInfoList + new_global_offset.data(), // pGlobalWorkOffset + new_global_size.data(), // pGlobalWorkSize + new_local_size.data(), // pLocalWorkSize + }; + + // Reset output to remove old values which will no longer have a + // work-item to overwrite them + std::memset(shared_ptr, 0, allocation_size); + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + Validate(new_global_size, new_local_size, new_global_offset); +} + +// Update the kernel work dimensions to 1, and check that previously +// set global size, local size, and global offset update accordingly. +TEST_P(NDRangeUpdateTests, Update1D) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + Validate(global_size, local_size, global_offset); + + // Set dimensions to 1 + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numMemobjArgs + 0, // numPointerArgs + 0, // numValueArgs + 0, // numExecInfos + 1, // workDim + nullptr, // pArgMemobjList + nullptr, // pArgPointerList + nullptr, // pArgValueList + nullptr, // pArgExecInfoList + nullptr, // pGlobalWorkOffset + nullptr, // pGlobalWorkSize + nullptr, // pLocalWorkSize + }; + + // Reset output to remove old values which will no longer have a + // work-item to overwrite them + std::memset(shared_ptr, 0, allocation_size); + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + std::array new_global_size = {global_size[0], 1, 1}; + std::array new_local_size = {local_size[0], 1, 1}; + std::array new_global_offset = {global_offset[0], 0, 0}; + Validate(new_global_size, new_local_size, new_global_offset); +} diff --git a/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp new file mode 100644 index 0000000000..8dbfba0418 --- /dev/null +++ b/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp @@ -0,0 +1,370 @@ +// Copyright (C) 2023 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include + +// Test that updating a command-buffer with a single kernel command +// taking USM arguments works correctly. +struct USMFillCommandTest : uur::urExpUpdatableCommandBufferTests { + void SetUp() override { + program_name = "fill_usm"; + UUR_RETURN_ON_FATAL_FAILURE(urExpUpdatableCommandBufferTests::SetUp()); + + ur_device_usm_access_capability_flags_t shared_usm_flags; + ASSERT_SUCCESS( + uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); + if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { + GTEST_SKIP() << "Shared USM is not supported."; + } + + // Allocate USM pointer to fill + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &shared_ptr)); + ASSERT_NE(shared_ptr, nullptr); + std::memset(shared_ptr, 0, allocation_size); + + // Index 0 is output + ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 0, nullptr, &shared_ptr)); + // Index 1 is input scalar + ASSERT_SUCCESS( + urKernelSetArgValue(kernel, 1, sizeof(val), nullptr, &val)); + + // Append kernel command to command-buffer and close command-buffer + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &global_size, &local_size, 0, nullptr, nullptr, &command_handle)); + ASSERT_NE(command_handle, nullptr); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + + void Validate(uint32_t *pointer, size_t length, uint32_t val) { + for (size_t i = 0; i < length; i++) { + ASSERT_EQ(pointer[i], val); + } + } + + void TearDown() override { + if (shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); + } + + if (new_shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, new_shared_ptr)); + } + + UUR_RETURN_ON_FATAL_FAILURE( + urExpUpdatableCommandBufferTests::TearDown()); + } + + static constexpr uint32_t val = 42; + static constexpr size_t local_size = 4; + static constexpr size_t global_size = 32; + static constexpr size_t global_offset = 0; + static constexpr size_t n_dimensions = 1; + static constexpr size_t allocation_size = sizeof(val) * global_size; + void *shared_ptr = nullptr; + void *new_shared_ptr = nullptr; + ur_exp_command_buffer_command_handle_t command_handle = nullptr; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(USMFillCommandTest); + +// Test using a different global size to fill and larger USM output buffer +TEST_P(USMFillCommandTest, UpdateParameters) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + Validate((uint32_t *)shared_ptr, global_size, val); + + // Allocate a new USM pointer of larger size + size_t new_global_size = 64; + const size_t new_allocation_size = sizeof(val) * new_global_size; + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + new_allocation_size, &new_shared_ptr)); + ASSERT_NE(new_shared_ptr, nullptr); + std::memset(new_shared_ptr, 0, new_allocation_size); + + // Set new USM pointer as kernel output at index 0 + ur_exp_command_buffer_update_pointer_arg_desc_t new_output_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 0, // argIndex + nullptr, // pProperties + &new_shared_ptr, // pArgValue + }; + + // Set new value to use for fill at kernel index 1 + uint32_t new_val = 33; + ur_exp_command_buffer_update_value_arg_desc_t new_input_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 1, // argIndex + sizeof(new_val), // argSize + nullptr, // pProperties + &new_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numMemobjArgs + 1, // numPointerArgs + 1, // numValueArgs + 0, // numExecInfos + 0, // workDim + nullptr, // pArgMemobjList + &new_output_desc, // pArgPointerList + &new_input_desc, // pArgValueList + nullptr, // pArgExecInfoList + nullptr, // pGlobalWorkOffset + &new_global_size, // pGlobalWorkSize + nullptr, // pLocalWorkSize + }; + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + Validate((uint32_t *)new_shared_ptr, new_global_size, new_val); +} + +// Test updating the kernel execution info +TEST_P(USMFillCommandTest, UpdateExecInfo) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + Validate((uint32_t *)shared_ptr, global_size, val); + + ur_exp_command_buffer_update_exec_info_desc_t new_exec_info_descs[3]; + + // Update direct access flag + bool indirect_access = false; + new_exec_info_descs[0] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC, // stype + nullptr, // pNext + UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS, // propName + sizeof(indirect_access), // propSize + nullptr, // pProperties + &indirect_access, // pPropValue + }; + + // Update cache config + ur_kernel_cache_config_t cache_config = UR_KERNEL_CACHE_CONFIG_DEFAULT; + new_exec_info_descs[1] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC, // stype + nullptr, // pNext + UR_KERNEL_EXEC_INFO_CACHE_CONFIG, // propName + sizeof(cache_config), // propSize + nullptr, // pProperties + &cache_config, // pPropValue + }; + + // Create a new USM allocation to set indirect access for + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &new_shared_ptr)); + ASSERT_NE(new_shared_ptr, nullptr); + void *pointers = {new_shared_ptr}; + new_exec_info_descs[2] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC, // stype + nullptr, // pNext + UR_KERNEL_EXEC_INFO_USM_PTRS, // propName + sizeof(pointers), // propSize + nullptr, // pProperties + &pointers, // pPropValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numMemobjArgs + 0, // numPointerArgs + 0, // numValueArgs + 3, // numExecInfos + 0, // workDim + nullptr, // pArgMemobjList + nullptr, // pArgPointerList + nullptr, // pArgValueList + new_exec_info_descs, // pArgExecInfoList + nullptr, // pGlobalWorkOffset + nullptr, // pGlobalWorkSize + nullptr, // pLocalWorkSize + }; + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify results are correct, although exec info modifications should + // have no effect on output + Validate((uint32_t *)shared_ptr, global_size, val); +} + +// Test updating a command-buffer with multiple USM fill kernel commands +struct USMMultipleFillCommandTest : uur::urExpUpdatableCommandBufferTests { + void SetUp() override { + program_name = "fill_usm"; + UUR_RETURN_ON_FATAL_FAILURE(urExpUpdatableCommandBufferTests::SetUp()); + + ur_device_usm_access_capability_flags_t shared_usm_flags; + ASSERT_SUCCESS( + uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); + if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { + GTEST_SKIP() << "Shared USM is not supported."; + } + + // Create a single USM allocation which will be used by all kernels + // by accessing at pointer offsets + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &shared_ptr)); + ASSERT_NE(shared_ptr, nullptr); + std::memset(shared_ptr, 0, allocation_size); + + // Append multiple kernel commands to command-buffer + for (size_t k = 0; k < num_kernels; k++) { + // Calculate offset into output allocation, and set as + // kernel output. + void *offset_ptr = (uint32_t *)shared_ptr + (k * elements); + ASSERT_SUCCESS( + urKernelSetArgPointer(kernel, 0, nullptr, &offset_ptr)); + + // Each kernel has a unique fill value + uint32_t fill_val = val + k; + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(fill_val), + nullptr, &fill_val)); + + // Append kernel and store returned handle + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &elements, &local_size, 0, nullptr, nullptr, + &command_handles[k])); + ASSERT_NE(command_handles[k], nullptr); + } + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + + void Validate(uint32_t *pointer, size_t length, uint32_t val) { + for (size_t i = 0; i < length; i++) { + ASSERT_EQ(pointer[i], val); + } + } + + void TearDown() override { + if (shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); + } + + if (new_shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, new_shared_ptr)); + } + + UUR_RETURN_ON_FATAL_FAILURE( + urExpUpdatableCommandBufferTests::TearDown()); + } + + static constexpr uint32_t val = 42; + static constexpr size_t local_size = 4; + static constexpr size_t global_size = 64; + static constexpr size_t global_offset = 0; + static constexpr size_t n_dimensions = 1; + static constexpr size_t allocation_size = sizeof(val) * global_size; + static constexpr size_t num_kernels = 8; + static constexpr size_t elements = global_size / num_kernels; + + void *shared_ptr = nullptr; + void *new_shared_ptr = nullptr; + std::array + command_handles; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(USMMultipleFillCommandTest); + +// Test updating all the kernels commands in the command-buffer +TEST_P(USMMultipleFillCommandTest, UpdateAllKernels) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + uint32_t *output = (uint32_t *)shared_ptr; + for (size_t i = 0; i < global_size; i++) { + const uint32_t expected = val + (i / elements); + ASSERT_EQ(expected, output[i]); + } + + // Create a new USM allocation to update kernel outputs to + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &new_shared_ptr)); + ASSERT_NE(new_shared_ptr, nullptr); + std::memset(new_shared_ptr, 0, allocation_size); + + // Update each kernel in the command-buffer. + uint32_t new_val = 33; + for (size_t k = 0; k < num_kernels; k++) { + // Update output pointer to an offset into new USM allocation + void *offset_ptr = (uint32_t *)new_shared_ptr + (k * elements); + ur_exp_command_buffer_update_pointer_arg_desc_t new_output_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 0, // argIndex + nullptr, // pProperties + &offset_ptr, // pArgValue + }; + + // Update fill value + uint32_t new_fill_val = new_val + k; + ur_exp_command_buffer_update_value_arg_desc_t new_input_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 1, // argIndex + sizeof(int), // argSize + nullptr, // pProperties + &new_fill_val, // hArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numMemobjArgs + 1, // numPointerArgs + 1, // numValueArgs + 0, // numExecInfos + 0, // workDim + nullptr, // pArgMemobjList + &new_output_desc, // pArgPointerList + &new_input_desc, // pArgValueList + nullptr, // pArgExecInfoList + nullptr, // pGlobalWorkOffset + nullptr, // pGlobalWorkSize + nullptr, // pLocalWorkSize + }; + + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(command_handles[k], + &update_desc)); + } + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + uint32_t *updated_output = (uint32_t *)new_shared_ptr; + for (size_t i = 0; i < global_size; i++) { + uint32_t expected = new_val + (i / elements); + ASSERT_EQ(expected, updated_output[i]) << i; + } +} diff --git a/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp new file mode 100644 index 0000000000..d7f76cfc7f --- /dev/null +++ b/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp @@ -0,0 +1,158 @@ +// Copyright (C) 2023 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include + +// Test that updating a command-buffer with a single kernel command +// taking USM & scalar arguments works correctly. +struct USMSaxpyKernelTests : uur::urExpUpdatableCommandBufferTests { + void SetUp() override { + program_name = "saxpy_usm"; + UUR_RETURN_ON_FATAL_FAILURE(urExpUpdatableCommandBufferTests::SetUp()); + + ur_device_usm_access_capability_flags_t shared_usm_flags; + ASSERT_SUCCESS( + uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); + if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { + GTEST_SKIP() << "Shared USM is not supported."; + } + + const size_t allocation_size = sizeof(uint32_t) * global_size; + for (auto &shared_ptr : shared_ptrs) { + ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, + allocation_size, &shared_ptr)); + ASSERT_NE(shared_ptr, nullptr); + + std::vector pattern(allocation_size); + uur::generateMemFillPattern(pattern); + std::memcpy(shared_ptr, pattern.data(), allocation_size); + } + + // Index 0 is output + ASSERT_SUCCESS( + urKernelSetArgPointer(kernel, 0, nullptr, &shared_ptrs[0])); + // Index 1 is A + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(A), nullptr, &A)); + // Index 2 is X + ASSERT_SUCCESS( + urKernelSetArgPointer(kernel, 2, nullptr, &shared_ptrs[1])); + // Index 3 is Y + ASSERT_SUCCESS( + urKernelSetArgPointer(kernel, 3, nullptr, &shared_ptrs[2])); + + // Append kernel command to command-buffer and close command-buffer + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, + &global_size, &local_size, 0, nullptr, nullptr, &command_handle)); + ASSERT_NE(command_handle, nullptr); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + } + + void Validate(uint32_t *output, uint32_t *X, uint32_t *Y, uint32_t A, + size_t length) { + for (size_t i = 0; i < length; i++) { + uint32_t result = A * X[i] + Y[i]; + ASSERT_EQ(result, output[i]); + } + } + + void TearDown() override { + for (auto &shared_ptr : shared_ptrs) { + if (shared_ptr) { + EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); + } + } + + UUR_RETURN_ON_FATAL_FAILURE( + urExpUpdatableCommandBufferTests::TearDown()); + } + + static constexpr size_t local_size = 4; + static constexpr size_t global_size = 32; + static constexpr size_t global_offset = 0; + static constexpr size_t n_dimensions = 1; + static constexpr uint32_t A = 42; + std::array shared_ptrs = {nullptr, nullptr, nullptr, nullptr}; + ur_exp_command_buffer_command_handle_t command_handle = nullptr; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(USMSaxpyKernelTests); + +TEST_P(USMSaxpyKernelTests, UpdateParameters) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + uint32_t *output = (uint32_t *)shared_ptrs[0]; + uint32_t *X = (uint32_t *)shared_ptrs[1]; + uint32_t *Y = (uint32_t *)shared_ptrs[2]; + Validate(output, X, Y, A, global_size); + + // Update inputs + ur_exp_command_buffer_update_pointer_arg_desc_t new_input_descs[2]; + + // New X at index 2 + new_input_descs[0] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + nullptr, // pProperties + &shared_ptrs[3], // pArgValue + }; + + // New Y at index 3 + new_input_descs[1] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 3, // argIndex + nullptr, // pProperties + &shared_ptrs[4], // pArgValue + }; + + // New A at index 1 + uint32_t new_A = 33; + ur_exp_command_buffer_update_value_arg_desc_t new_A_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 1, // argIndex + sizeof(new_A), // argSize + nullptr, // pProperties + &new_A, // hArgValue + }; + + // Update kernel inputs + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numMemobjArgs + 2, // numPointerArgs + 1, // numValueArgs + 0, // numExecInfos + 0, // workDim + nullptr, // pArgMemobjList + new_input_descs, // pArgPointerList + &new_A_desc, // pArgValueList + nullptr, // pArgExecInfoList + nullptr, // pGlobalWorkOffset + nullptr, // pGlobalWorkSize + nullptr, // pLocalWorkSize + }; + + // Update kernel and enqueue command-buffer again + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + uint32_t *new_output = (uint32_t *)shared_ptrs[0]; + uint32_t *new_X = (uint32_t *)shared_ptrs[3]; + uint32_t *new_Y = (uint32_t *)shared_ptrs[4]; + Validate(new_output, new_X, new_Y, new_A, global_size); +} diff --git a/test/conformance/testing/include/uur/fixtures.h b/test/conformance/testing/include/uur/fixtures.h index 2ede84d135..fed57fcb38 100644 --- a/test/conformance/testing/include/uur/fixtures.h +++ b/test/conformance/testing/include/uur/fixtures.h @@ -1276,6 +1276,77 @@ struct urGlobalVariableTest : uur::urKernelExecutionTest { GlobalVar global_var; }; +struct urExpCommandBufferTest : urKernelExecutionTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urKernelExecutionTest::SetUp()); + + size_t returned_size; + ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_EXTENSIONS, 0, + nullptr, &returned_size)); + + std::unique_ptr returned_extensions(new char[returned_size]); + + ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_EXTENSIONS, + returned_size, returned_extensions.get(), + nullptr)); + + std::string_view extensions_string(returned_extensions.get()); + bool command_buffer_support = + extensions_string.find(UR_COMMAND_BUFFER_EXTENSION_STRING_EXP) != + std::string::npos; + + if (!command_buffer_support) { + GTEST_SKIP() << "EXP command-buffer feature is not supported."; + } + + ASSERT_SUCCESS(urDeviceGetInfo( + device, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP, + sizeof(ur_bool_t), &updatable_command_buffer_support, nullptr)); + + // Create a command-buffer + ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, nullptr, + &cmd_buf_handle)); + ASSERT_NE(cmd_buf_handle, nullptr); + } + + void TearDown() override { + if (cmd_buf_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseExp(cmd_buf_handle)); + } + UUR_RETURN_ON_FATAL_FAILURE(urKernelExecutionTest::TearDown()); + } + + ur_exp_command_buffer_handle_t cmd_buf_handle = nullptr; + ur_bool_t updatable_command_buffer_support = false; +}; + +struct urExpUpdatableCommandBufferTests : urExpCommandBufferTest { + void SetUp() override { + UUR_RETURN_ON_FATAL_FAILURE(urExpCommandBufferTest ::SetUp()); + + if (!updatable_command_buffer_support) { + GTEST_SKIP() << "Updating EXP command-buffers is not supported."; + } + + // Create a command-buffer with update enabled. + ur_exp_command_buffer_desc_t desc{ + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, nullptr, true}; + + ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, &desc, + &updatable_cmd_buf_handle)); + ASSERT_NE(updatable_cmd_buf_handle, nullptr); + } + + void TearDown() override { + if (updatable_cmd_buf_handle) { + EXPECT_SUCCESS(urCommandBufferReleaseExp(updatable_cmd_buf_handle)); + } + UUR_RETURN_ON_FATAL_FAILURE(urExpCommandBufferTest::TearDown()); + } + + ur_exp_command_buffer_handle_t updatable_cmd_buf_handle = nullptr; +}; + } // namespace uur #endif // UR_CONFORMANCE_INCLUDE_FIXTURES_H_INCLUDED diff --git a/tools/urinfo/urinfo.hpp b/tools/urinfo/urinfo.hpp index d9677c3eab..b959e565c4 100644 --- a/tools/urinfo/urinfo.hpp +++ b/tools/urinfo/urinfo.hpp @@ -323,6 +323,12 @@ inline void printDeviceInfos(ur_device_handle_t hDevice, std::cout << prefix; printDeviceInfo(hDevice, UR_DEVICE_INFO_ESIMD_SUPPORT); std::cout << prefix; + printDeviceInfo(hDevice, + UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP); + std::cout << prefix; + printDeviceInfo( + hDevice, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP); + std::cout << prefix; printDeviceInfo(hDevice, UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP); std::cout << prefix;