From 3f85f3058eab63fa9bd28b0b98194fa2c21ce7ce Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Mon, 19 Feb 2024 16:06:56 +0000 Subject: [PATCH] [EXP][Command-Buffer] Optimize L0 command buffer submission - Adds command buffer property to explicitly enable profiling - Add ability to enforce use of in-order command lists --- include/ur_api.h | 39 +- include/ur_print.hpp | 10 + scripts/core/EXP-COMMAND-BUFFER.rst | 11 +- scripts/core/exp-command-buffer.yml | 42 +- source/adapters/level_zero/command_buffer.cpp | 398 ++++++++++++------ source/adapters/level_zero/command_buffer.hpp | 9 +- source/adapters/level_zero/event.cpp | 8 +- source/adapters/level_zero/event.hpp | 1 + source/adapters/level_zero/queue.cpp | 5 +- source/adapters/null/ur_nullddi.cpp | 36 +- source/loader/layers/tracing/ur_trcddi.cpp | 36 +- source/loader/layers/validation/ur_valddi.cpp | 36 +- source/loader/ur_ldrddi.cpp | 36 +- source/loader/ur_libapi.cpp | 36 +- source/ur_api.cpp | 36 +- 15 files changed, 496 insertions(+), 243 deletions(-) diff --git a/include/ur_api.h b/include/ur_api.h index 7a2d76597e..063c398d8c 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -7979,6 +7979,9 @@ typedef struct ur_exp_command_buffer_desc_t { ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC const void *pNext; ///< [in][optional] pointer to extension-specific structure ur_bool_t isUpdatable; ///< [in] Commands in a finalized command-buffer can be updated. + ur_bool_t isInOrder; ///< [in] Commands in a command-buffer may be executed in-order without + ///< explicit dependencies. + ur_bool_t enableProfiling; ///< [in] Command-buffer profiling is enabled. } ur_exp_command_buffer_desc_t; @@ -8183,7 +8186,8 @@ urCommandBufferAppendKernelLaunchExp( const size_t *pGlobalWorkSize, ///< [in] Global work size to use when executing kernel. const size_t *pLocalWorkSize, ///< [in][optional] Local work size to use when executing kernel. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. - const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t *pSyncPoint, ///< [out][optional] Sync point associated with this command. ur_exp_command_buffer_command_handle_t *phCommand ///< [out][optional] Handle to this command. ); @@ -8219,7 +8223,8 @@ urCommandBufferAppendUSMMemcpyExp( const void *pSrc, ///< [in] The data to be copied. size_t size, ///< [in] The number of bytes to copy. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. - const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); @@ -8258,7 +8263,8 @@ urCommandBufferAppendUSMFillExp( size_t patternSize, ///< [in] size in bytes of the pattern. size_t size, ///< [in] fill size in bytes, must be a multiple of patternSize. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. - const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command. ); @@ -8291,7 +8297,8 @@ urCommandBufferAppendMemBufferCopyExp( size_t dstOffset, ///< [in] Offset into the destination memory size_t size, ///< [in] The number of bytes to be copied. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. - const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); @@ -8324,7 +8331,8 @@ urCommandBufferAppendMemBufferWriteExp( size_t size, ///< [in] Size in bytes of data being written. const void *pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. - const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); @@ -8357,7 +8365,8 @@ urCommandBufferAppendMemBufferReadExp( size_t size, ///< [in] Size in bytes of data being written. void *pDst, ///< [in] Pointer to host memory where data is to be written to. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. - const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); @@ -8394,7 +8403,8 @@ urCommandBufferAppendMemBufferCopyRectExp( size_t dstRowPitch, ///< [in] Row pitch of the destination memory. size_t dstSlicePitch, ///< [in] Slice pitch of the destination memory. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. - const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); @@ -8435,7 +8445,8 @@ urCommandBufferAppendMemBufferWriteRectExp( ///< pointed to by pSrc. void *pSrc, ///< [in] Pointer to host memory where data is to be written from. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. - const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); @@ -8475,7 +8486,8 @@ urCommandBufferAppendMemBufferReadRectExp( ///< pointed to by pDst. void *pDst, ///< [in] Pointer to host memory where data is to be read into. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. - const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] Sync point associated with this command. ); @@ -8511,7 +8523,8 @@ urCommandBufferAppendMemBufferFillExp( size_t offset, ///< [in] offset into the buffer. size_t size, ///< [in] fill size in bytes, must be a multiple of patternSize. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. - const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command. ); @@ -8552,7 +8565,8 @@ urCommandBufferAppendUSMPrefetchExp( size_t size, ///< [in] size in bytes to be fetched. ur_usm_migration_flags_t flags, ///< [in] USM prefetch flags uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. - const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command. ); @@ -8593,7 +8607,8 @@ urCommandBufferAppendUSMAdviseExp( size_t size, ///< [in] size in bytes to be advised. ur_usm_advice_flags_t advice, ///< [in] USM memory advice uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. - const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t *pSyncPoint ///< [out][optional] sync point associated with this command. ); diff --git a/include/ur_print.hpp b/include/ur_print.hpp index 63eeb17a0e..a5074c5da1 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -9496,6 +9496,16 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_bu os << (params.isUpdatable); + os << ", "; + os << ".isInOrder = "; + + os << (params.isInOrder); + + os << ", "; + os << ".enableProfiling = "; + + os << (params.enableProfiling); + os << "}"; return os; } diff --git a/scripts/core/EXP-COMMAND-BUFFER.rst b/scripts/core/EXP-COMMAND-BUFFER.rst index eb4656ba05..c23519cf67 100644 --- a/scripts/core/EXP-COMMAND-BUFFER.rst +++ b/scripts/core/EXP-COMMAND-BUFFER.rst @@ -77,9 +77,13 @@ Command-Buffer Creation Command-Buffers are tied to a specific ${x}_context_handle_t and ${x}_device_handle_t. ${x}CommandBufferCreateExp optionally takes a descriptor to provide additional properties for how the command-buffer should be -constructed. The only unique member defined in ${x}_exp_command_buffer_desc_t -is ``isUpdatable``, which should be set to ``true`` to support :ref:`updating +constructed. The members defined in ${x}_exp_command_buffer_desc_t are: +* ``isUpdatable``, which should be set to ``true`` to support :ref:`updating command-buffer commands`. +* ``isInOrder``, which should be set to ``true`` to enable commands enqueued to +a command-buffer to be executed in an in-order fashion where possible. +* ``enableProfiling``, which should be set to ``true`` to enable profiling of +the command-buffer. Command-buffers are reference counted and can be retained and released by calling ${x}CommandBufferRetainExp and ${x}CommandBufferReleaseExp respectively. @@ -123,7 +127,8 @@ Sync-Points A sync-point is a value which represents a command inside of a command-buffer which is returned from command-buffer append function calls. These can be optionally passed to these functions to define execution dependencies on other -commands within the command-buffer. +commands within the command-buffer. Sync-points passed to functions may be +ignored if the command-buffer was created in-order. Sync-points are unique and valid for use only within the command-buffer they were obtained from. diff --git a/scripts/core/exp-command-buffer.yml b/scripts/core/exp-command-buffer.yml index 6eb02229ed..78a1b020ef 100644 --- a/scripts/core/exp-command-buffer.yml +++ b/scripts/core/exp-command-buffer.yml @@ -110,6 +110,12 @@ members: - type: $x_bool_t name: isUpdatable desc: "[in] Commands in a finalized command-buffer can be updated." + - type: $x_bool_t + name: isInOrder + desc: "[in] Commands in a command-buffer may be executed in-order without explicit dependencies." + - type: $x_bool_t + name: enableProfiling + desc: "[in] Command-buffer profiling is enabled." --- #-------------------------------------------------------------------------- type: struct desc: "Descriptor type for updating a kernel command memobj argument." @@ -306,7 +312,8 @@ params: desc: "[in] The number of sync points in the provided dependency list." - type: "const $x_exp_command_buffer_sync_point_t*" name: pSyncPointWaitList - desc: "[in][optional] A list of sync points that this command depends on." + desc: "[in][optional] A list of sync points that this command depends on. + May be ignored if command-buffer is in-order." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint desc: "[out][optional] Sync point associated with this command." @@ -348,7 +355,8 @@ params: desc: "[in] The number of sync points in the provided dependency list." - type: "const $x_exp_command_buffer_sync_point_t*" name: pSyncPointWaitList - desc: "[in][optional] A list of sync points that this command depends on." + desc: "[in][optional] A list of sync points that this command depends on. + May be ignored if command-buffer is in-order." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint desc: "[out][optional] Sync point associated with this command." @@ -390,7 +398,8 @@ params: desc: "[in] The number of sync points in the provided dependency list." - type: "const $x_exp_command_buffer_sync_point_t*" name: pSyncPointWaitList - desc: "[in][optional] A list of sync points that this command depends on." + desc: "[in][optional] A list of sync points that this command depends on. + May be ignored if command-buffer is in-order." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint desc: "[out][optional] sync point associated with this command." @@ -438,7 +447,8 @@ params: desc: "[in] The number of sync points in the provided dependency list." - type: "const $x_exp_command_buffer_sync_point_t*" name: pSyncPointWaitList - desc: "[in][optional] A list of sync points that this command depends on." + desc: "[in][optional] A list of sync points that this command depends on. + May be ignored if command-buffer is in-order." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint desc: "[out][optional] Sync point associated with this command." @@ -477,7 +487,8 @@ params: desc: "[in] The number of sync points in the provided dependency list." - type: "const $x_exp_command_buffer_sync_point_t*" name: pSyncPointWaitList - desc: "[in][optional] A list of sync points that this command depends on." + desc: "[in][optional] A list of sync points that this command depends on. + May be ignored if command-buffer is in-order." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint desc: "[out][optional] Sync point associated with this command." @@ -516,7 +527,8 @@ params: desc: "[in] The number of sync points in the provided dependency list." - type: "const $x_exp_command_buffer_sync_point_t*" name: pSyncPointWaitList - desc: "[in][optional] A list of sync points that this command depends on." + desc: "[in][optional] A list of sync points that this command depends on. + May be ignored if command-buffer is in-order." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint desc: "[out][optional] Sync point associated with this command." @@ -570,7 +582,8 @@ params: desc: "[in] The number of sync points in the provided dependency list." - type: "const $x_exp_command_buffer_sync_point_t*" name: pSyncPointWaitList - desc: "[in][optional] A list of sync points that this command depends on." + desc: "[in][optional] A list of sync points that this command depends on. + May be ignored if command-buffer is in-order." - type: $x_exp_command_buffer_sync_point_t* name: pSyncPoint desc: "[out][optional] Sync point associated with this command." @@ -624,7 +637,8 @@ params: desc: "[in] The number of sync points in the provided dependency list." - type: "const $x_exp_command_buffer_sync_point_t*" name: pSyncPointWaitList - desc: "[in][optional] A list of sync points that this command depends on." + desc: "[in][optional] A list of sync points that this command depends on. + May be ignored if command-buffer is in-order." - type: $x_exp_command_buffer_sync_point_t* name: pSyncPoint desc: "[out][optional] Sync point associated with this command." @@ -678,7 +692,8 @@ params: desc: "[in] The number of sync points in the provided dependency list." - type: "const $x_exp_command_buffer_sync_point_t*" name: pSyncPointWaitList - desc: "[in][optional] A list of sync points that this command depends on." + desc: "[in][optional] A list of sync points that this command depends on. + May be ignored if command-buffer is in-order." - type: $x_exp_command_buffer_sync_point_t* name: pSyncPoint desc: "[out][optional] Sync point associated with this command." @@ -720,7 +735,8 @@ params: desc: "[in] The number of sync points in the provided dependency list." - type: "const $x_exp_command_buffer_sync_point_t*" name: pSyncPointWaitList - desc: "[in][optional] A list of sync points that this command depends on." + desc: "[in][optional] A list of sync points that this command depends on. + May be ignored if command-buffer is in-order." - type: $x_exp_command_buffer_sync_point_t* name: pSyncPoint desc: "[out][optional] sync point associated with this command." @@ -761,7 +777,8 @@ params: desc: "[in] The number of sync points in the provided dependency list." - type: "const $x_exp_command_buffer_sync_point_t*" name: pSyncPointWaitList - desc: "[in][optional] A list of sync points that this command depends on." + desc: "[in][optional] A list of sync points that this command depends on. + May be ignored if command-buffer is in-order." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint desc: "[out][optional] sync point associated with this command." @@ -804,7 +821,8 @@ params: desc: "[in] The number of sync points in the provided dependency list." - type: "const $x_exp_command_buffer_sync_point_t*" name: pSyncPointWaitList - desc: "[in][optional] A list of sync points that this command depends on." + desc: "[in][optional] A list of sync points that this command depends on. + May be ignored if command-buffer is in-order." - type: "$x_exp_command_buffer_sync_point_t*" name: pSyncPoint desc: "[out][optional] sync point associated with this command." diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 46e2e33607..f5382aa026 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -15,17 +15,44 @@ https://github.com/intel/llvm/blob/sycl/sycl/doc/design/CommandGraph.md#level-zero */ +namespace { +/// Checks the version of the level-zero driver. +/// @param Context Execution context +/// @param VersionMajor Major verion number to compare to. +/// @param VersionMinor Minor verion number to compare to. +/// @param VersionBuild Build verion number to compare to. +/// @return true is the version of the driver is higher than or equal to the +/// compared version +bool IsDriverVersionNewerOrSimilar(ur_context_handle_t Context, + uint32_t VersionMajor, uint32_t VersionMinor, + uint32_t VersionBuild) { + ZeStruct ZeDriverProperties; + ZE2UR_CALL(zeDriverGetProperties, + (Context->getPlatform()->ZeDriver, &ZeDriverProperties)); + uint32_t DriverVersion = ZeDriverProperties.driverVersion; + auto DriverVersionMajor = (DriverVersion & 0xFF000000) >> 24; + auto DriverVersionMinor = (DriverVersion & 0x00FF0000) >> 16; + auto DriverVersionBuild = DriverVersion & 0x0000FFFF; + + return ((DriverVersionMajor >= VersionMajor) && + (DriverVersionMinor >= VersionMinor) && + (DriverVersionBuild >= VersionBuild)); +} +}; // namespace + ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_( ur_context_handle_t Context, ur_device_handle_t Device, ze_command_list_handle_t CommandList, ze_command_list_handle_t CommandListResetEvents, ZeStruct ZeDesc, - const ur_exp_command_buffer_desc_t *Desc) + const ur_exp_command_buffer_desc_t *Desc, const bool IsInOrderCmdList) : Context(Context), Device(Device), ZeCommandList(CommandList), ZeCommandListResetEvents(CommandListResetEvents), ZeCommandListDesc(ZeDesc), ZeFencesList(), QueueProperties(), SyncPoints(), NextSyncPoint(0), - IsUpdatable(Desc ? Desc->isUpdatable : false) { + IsUpdatable(Desc ? Desc->isUpdatable : false), + IsProfilingEnabled(Desc ? Desc->enableProfiling : false), + IsInOrderCmdList(IsInOrderCmdList) { urContextRetain(Context); urDeviceRetain(Device); } @@ -76,6 +103,35 @@ ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() { for (auto &ZeFence : ZeFencesList) { ZE_CALL_NOCHECK(zeFenceDestroy, (ZeFence)); } + + auto ReleaseIndirectMem = [](ur_kernel_handle_t Kernel) { + if (IndirectAccessTrackingEnabled) { + // urKernelRelease is called by CleanupCompletedEvent(Event) as soon as + // kernel execution has finished. This is the place where we need to + // release memory allocations. If kernel is not in use (not submitted by + // some other thread) then release referenced memory allocations. As a + // result, memory can be deallocated and context can be removed from + // container in the platform. That's why we need to lock a mutex here. + ur_platform_handle_t Platform = Kernel->Program->Context->getPlatform(); + std::scoped_lock ContextsLock(Platform->ContextsMutex); + + if (--Kernel->SubmissionsCount == 0) { + // Kernel is not submitted for execution, release referenced memory + // allocations. + for (auto &MemAlloc : Kernel->MemAllocs) { + // std::pair *, Hash + USMFreeHelper(MemAlloc->second.Context, MemAlloc->first, + MemAlloc->second.OwnNativeHandle); + } + Kernel->MemAllocs.clear(); + } + } + }; + + for (auto &AssociatedKernel : KernelsList) { + ReleaseIndirectMem(AssociatedKernel); + urKernelRelease(AssociatedKernel); + } } ur_exp_command_buffer_command_handle_t_:: @@ -227,14 +283,10 @@ static ur_result_t getEventsFromSyncPoints( if (!SyncPointWaitList || NumSyncPointsInWaitList == 0) return UR_RESULT_SUCCESS; - // Map of ur_exp_command_buffer_sync_point_t to ur_event_handle_t defining - // the event associated with each sync-point - auto SyncPoints = CommandBuffer->SyncPoints; - // For each sync-point add associated L0 event to the return list. for (size_t i = 0; i < NumSyncPointsInWaitList; i++) { - if (auto EventHandle = SyncPoints.find(SyncPointWaitList[i]); - EventHandle != SyncPoints.end()) { + if (auto EventHandle = CommandBuffer->SyncPoints.find(SyncPointWaitList[i]); + EventHandle != CommandBuffer->SyncPoints.end()) { ZeEventList.push_back(EventHandle->second->ZeEvent); } else { return UR_RESULT_ERROR_INVALID_VALUE; @@ -251,27 +303,33 @@ static ur_result_t enqueueCommandBufferMemCopyHelper( void *Dst, const void *Src, size_t Size, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, ur_exp_command_buffer_sync_point_t *SyncPoint) { - std::vector ZeEventList; - UR_CALL(getEventsFromSyncPoints(CommandBuffer, NumSyncPointsInWaitList, - SyncPointWaitList, ZeEventList)); - - ur_event_handle_t LaunchEvent; - UR_CALL( - EventCreate(CommandBuffer->Context, nullptr, false, false, &LaunchEvent)); - LaunchEvent->CommandType = CommandType; + if (CommandBuffer->IsInOrderCmdList) { + ZE2UR_CALL( + zeCommandListAppendMemoryCopy, + (CommandBuffer->ZeCommandList, Dst, Src, Size, nullptr, 0, nullptr)); - // Get sync point and register the event with it. - *SyncPoint = CommandBuffer->GetNextSyncPoint(); - CommandBuffer->RegisterSyncPoint(*SyncPoint, LaunchEvent); + logger::debug("calling zeCommandListAppendMemoryCopy()"); + } else { + std::vector ZeEventList; + ur_event_handle_t LaunchEvent; + UR_CALL(getEventsFromSyncPoints(CommandBuffer, NumSyncPointsInWaitList, + SyncPointWaitList, ZeEventList)); + UR_CALL(EventCreate(CommandBuffer->Context, nullptr, false, false, + !CommandBuffer->IsProfilingEnabled, &LaunchEvent)); + LaunchEvent->CommandType = CommandType; - ZE2UR_CALL(zeCommandListAppendMemoryCopy, - (CommandBuffer->ZeCommandList, Dst, Src, Size, - LaunchEvent->ZeEvent, ZeEventList.size(), ZeEventList.data())); + // Get sync point and register the event with it. + *SyncPoint = CommandBuffer->GetNextSyncPoint(); + CommandBuffer->RegisterSyncPoint(*SyncPoint, LaunchEvent); - logger::debug("calling zeCommandListAppendMemoryCopy() with" - " ZeEvent {}", - ur_cast(LaunchEvent->ZeEvent)); + ZE2UR_CALL(zeCommandListAppendMemoryCopy, + (CommandBuffer->ZeCommandList, Dst, Src, Size, + LaunchEvent->ZeEvent, ZeEventList.size(), ZeEventList.data())); + logger::debug("calling zeCommandListAppendMemoryCopy() with" + " ZeEvent {}", + ur_cast(LaunchEvent->ZeEvent)); + } return UR_RESULT_SUCCESS; } @@ -317,27 +375,36 @@ static ur_result_t enqueueCommandBufferMemCopyRectHelper( const ze_copy_region_t ZeDstRegion = {DstOriginX, DstOriginY, DstOriginZ, Width, Height, Depth}; - std::vector ZeEventList; - UR_CALL(getEventsFromSyncPoints(CommandBuffer, NumSyncPointsInWaitList, - SyncPointWaitList, ZeEventList)); + if (CommandBuffer->IsInOrderCmdList) { + ZE2UR_CALL(zeCommandListAppendMemoryCopyRegion, + (CommandBuffer->ZeCommandList, Dst, &ZeDstRegion, DstPitch, + DstSlicePitch, Src, &ZeSrcRegion, SrcPitch, SrcSlicePitch, + nullptr, 0, nullptr)); - ur_event_handle_t LaunchEvent; - UR_CALL( - EventCreate(CommandBuffer->Context, nullptr, false, false, &LaunchEvent)); - LaunchEvent->CommandType = CommandType; + logger::debug("calling zeCommandListAppendMemoryCopyRegion()"); + } else { + std::vector ZeEventList; + UR_CALL(getEventsFromSyncPoints(CommandBuffer, NumSyncPointsInWaitList, + SyncPointWaitList, ZeEventList)); + + ur_event_handle_t LaunchEvent; + UR_CALL(EventCreate(CommandBuffer->Context, nullptr, false, false, + !CommandBuffer->IsProfilingEnabled, &LaunchEvent)); + LaunchEvent->CommandType = CommandType; - // Get sync point and register the event with it. - *SyncPoint = CommandBuffer->GetNextSyncPoint(); - CommandBuffer->RegisterSyncPoint(*SyncPoint, LaunchEvent); + // Get sync point and register the event with it. + *SyncPoint = CommandBuffer->GetNextSyncPoint(); + CommandBuffer->RegisterSyncPoint(*SyncPoint, LaunchEvent); - ZE2UR_CALL(zeCommandListAppendMemoryCopyRegion, - (CommandBuffer->ZeCommandList, Dst, &ZeDstRegion, DstPitch, - DstSlicePitch, Src, &ZeSrcRegion, SrcPitch, SrcSlicePitch, - LaunchEvent->ZeEvent, ZeEventList.size(), ZeEventList.data())); + ZE2UR_CALL(zeCommandListAppendMemoryCopyRegion, + (CommandBuffer->ZeCommandList, Dst, &ZeDstRegion, DstPitch, + DstSlicePitch, Src, &ZeSrcRegion, SrcPitch, SrcSlicePitch, + LaunchEvent->ZeEvent, ZeEventList.size(), ZeEventList.data())); - logger::debug("calling zeCommandListAppendMemoryCopyRegion() with" - " ZeEvent {}", - ur_cast(LaunchEvent->ZeEvent)); + logger::debug("calling zeCommandListAppendMemoryCopyRegion() with" + " ZeEvent {}", + ur_cast(LaunchEvent->ZeEvent)); + } return UR_RESULT_SUCCESS; } @@ -361,26 +428,34 @@ static ur_result_t enqueueCommandBufferFillHelper( .ZeProperties.maxMemoryFillPatternSize, UR_RESULT_ERROR_INVALID_VALUE); - std::vector ZeEventList; - UR_CALL(getEventsFromSyncPoints(CommandBuffer, NumSyncPointsInWaitList, - SyncPointWaitList, ZeEventList)); + if (CommandBuffer->IsInOrderCmdList) { + ZE2UR_CALL(zeCommandListAppendMemoryFill, + (CommandBuffer->ZeCommandList, Ptr, Pattern, PatternSize, Size, + nullptr, 0, nullptr)); + + logger::debug("calling zeCommandListAppendMemoryFill()"); + } else { + std::vector ZeEventList; + UR_CALL(getEventsFromSyncPoints(CommandBuffer, NumSyncPointsInWaitList, + SyncPointWaitList, ZeEventList)); - ur_event_handle_t LaunchEvent; - UR_CALL( - EventCreate(CommandBuffer->Context, nullptr, false, true, &LaunchEvent)); - LaunchEvent->CommandType = CommandType; + ur_event_handle_t LaunchEvent; + UR_CALL(EventCreate(CommandBuffer->Context, nullptr, false, true, + !CommandBuffer->IsProfilingEnabled, &LaunchEvent)); + LaunchEvent->CommandType = CommandType; - // Get sync point and register the event with it. - *SyncPoint = CommandBuffer->GetNextSyncPoint(); - CommandBuffer->RegisterSyncPoint(*SyncPoint, LaunchEvent); + // Get sync point and register the event with it. + *SyncPoint = CommandBuffer->GetNextSyncPoint(); + CommandBuffer->RegisterSyncPoint(*SyncPoint, LaunchEvent); - ZE2UR_CALL(zeCommandListAppendMemoryFill, - (CommandBuffer->ZeCommandList, Ptr, Pattern, PatternSize, Size, - LaunchEvent->ZeEvent, ZeEventList.size(), ZeEventList.data())); + ZE2UR_CALL(zeCommandListAppendMemoryFill, + (CommandBuffer->ZeCommandList, Ptr, Pattern, PatternSize, Size, + LaunchEvent->ZeEvent, ZeEventList.size(), ZeEventList.data())); - logger::debug("calling zeCommandListAppendMemoryFill() with" - " ZeEvent {}", - ur_cast(LaunchEvent->ZeEvent)); + logger::debug("calling zeCommandListAppendMemoryFill() with" + " ZeEvent {}", + ur_cast(LaunchEvent->ZeEvent)); + } return UR_RESULT_SUCCESS; } @@ -389,6 +464,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device, const ur_exp_command_buffer_desc_t *CommandBufferDesc, ur_exp_command_buffer_handle_t *CommandBuffer) { + // In-order command-lists are not available in old driver version. + bool CompatibleDriver = IsDriverVersionNewerOrSimilar(Context, 1, 3, 28454); + const bool IsInOrder = + CompatibleDriver + ? (CommandBufferDesc ? CommandBufferDesc->isInOrder : false) + : false; + // Force compute queue type for now. Copy engine types may be better suited // for host to device copies. uint32_t QueueGroupOrdinal = @@ -404,10 +486,11 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device, (Context->ZeContext, Device->ZeDevice, &ZeCommandListDesc, &ZeCommandListResetEvents)); - // Dependencies between commands are explicitly enforced by sync points when - // enqueuing. Consequently, relax the command ordering in the command list - // can enable the backend to further optimize the workload - ZeCommandListDesc.flags = ZE_COMMAND_LIST_FLAG_RELAXED_ORDERING; + // For non-linear graph, dependencies between commands are explicitly enforced + // by sync points when enqueuing. Consequently, relax the command ordering in + // the command list can enable the backend to further optimize the workload + ZeCommandListDesc.flags = IsInOrder ? ZE_COMMAND_LIST_FLAG_IN_ORDER + : ZE_COMMAND_LIST_FLAG_RELAXED_ORDERING; ZeStruct ZeMutableCommandListDesc; if (CommandBufferDesc && CommandBufferDesc->isUpdatable) { @@ -423,7 +506,7 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device, try { *CommandBuffer = new ur_exp_command_buffer_handle_t_( Context, Device, ZeCommandList, ZeCommandListResetEvents, - ZeCommandListDesc, CommandBufferDesc); + ZeCommandListDesc, CommandBufferDesc, IsInOrder); } catch (const std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; } catch (...) { @@ -434,10 +517,13 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device, // on command-buffer enqueue. auto RetCommandBuffer = *CommandBuffer; UR_CALL(EventCreate(Context, nullptr, false, false, + !RetCommandBuffer->IsProfilingEnabled, &RetCommandBuffer->SignalEvent)); UR_CALL(EventCreate(Context, nullptr, false, false, + !RetCommandBuffer->IsProfilingEnabled, &RetCommandBuffer->WaitEvent)); UR_CALL(EventCreate(Context, nullptr, false, false, + !RetCommandBuffer->IsProfilingEnabled, &RetCommandBuffer->AllResetEvent)); // Add prefix commands @@ -489,11 +575,24 @@ urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t CommandBuffer) { (CommandBuffer->ZeCommandListResetEvents, CommandBuffer->AllResetEvent->ZeEvent)); - // Wait for all the user added commands to complete, and signal the - // command-buffer signal-event when they are done. - ZE2UR_CALL(zeCommandListAppendBarrier, - (CommandBuffer->ZeCommandList, CommandBuffer->SignalEvent->ZeEvent, - NumEvents, CommandBuffer->ZeEventsList.data())); + if (CommandBuffer->IsInOrderCmdList) { + ZE2UR_CALL( + zeCommandListAppendSignalEvent, + (CommandBuffer->ZeCommandList, CommandBuffer->SignalEvent->ZeEvent)); + } else { + // Create a list of events for our signal event to wait on + const size_t NumEvents = CommandBuffer->SyncPoints.size(); + std::vector WaitEventList{NumEvents}; + for (size_t i = 0; i < NumEvents; i++) { + WaitEventList[i] = CommandBuffer->SyncPoints[i]->ZeEvent; + } + + // Wait for all the user added commands to complete, and signal the + // command-buffer signal-event when they are done. + ZE2UR_CALL(zeCommandListAppendBarrier, (CommandBuffer->ZeCommandList, + CommandBuffer->SignalEvent->ZeEvent, + NumEvents, WaitEventList.data())); + } // Close the command lists and have them ready for dispatch. ZE2UR_CALL(zeCommandListClose, (CommandBuffer->ZeCommandList)); @@ -551,21 +650,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ZE2UR_CALL(zeKernelSetGroupSize, (Kernel->ZeKernel, WG[0], WG[1], WG[2])); - std::vector ZeEventList; - UR_CALL(getEventsFromSyncPoints(CommandBuffer, NumSyncPointsInWaitList, - SyncPointWaitList, ZeEventList)); - ur_event_handle_t LaunchEvent; - UR_CALL( - EventCreate(CommandBuffer->Context, nullptr, false, false, &LaunchEvent)); - LaunchEvent->CommandType = UR_COMMAND_KERNEL_LAUNCH; - - if (SyncPoint) { - // Get sync point and register the event with it. - *SyncPoint = CommandBuffer->GetNextSyncPoint(); - CommandBuffer->RegisterSyncPoint(*SyncPoint, LaunchEvent); - } - - LaunchEvent->CommandData = (void *)Kernel; + CommandBuffer->KernelsList.push_back(Kernel); // Increment the reference count of the Kernel and indicate that the Kernel // is in use. Once the event has been signaled, the code in // CleanupCompletedEvent(Event) will do a urKernelRelease to update the @@ -601,14 +686,36 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( return UR_RESULT_ERROR_UNKNOWN; } - ZE2UR_CALL(zeCommandListAppendLaunchKernel, - (CommandBuffer->ZeCommandList, Kernel->ZeKernel, - &ZeThreadGroupDimensions, LaunchEvent->ZeEvent, - ZeEventList.size(), ZeEventList.data())); + if (CommandBuffer->IsInOrderCmdList) { + ZE2UR_CALL(zeCommandListAppendLaunchKernel, + (CommandBuffer->ZeCommandList, Kernel->ZeKernel, + &ZeThreadGroupDimensions, nullptr, 0, nullptr)); - logger::debug("calling zeCommandListAppendLaunchKernel() with" - " ZeEvent {}", - ur_cast(LaunchEvent->ZeEvent)); + logger::debug("calling zeCommandListAppendLaunchKernel()"); + } else { + std::vector ZeEventList; + UR_CALL(getEventsFromSyncPoints(CommandBuffer, NumSyncPointsInWaitList, + SyncPointWaitList, ZeEventList)); + ur_event_handle_t LaunchEvent; + UR_CALL(EventCreate(CommandBuffer->Context, nullptr, false, false, + !CommandBuffer->IsProfilingEnabled, &LaunchEvent)); + LaunchEvent->CommandType = UR_COMMAND_KERNEL_LAUNCH; + + if (SyncPoint) { + // Get sync point and register the event with it. + *SyncPoint = CommandBuffer->GetNextSyncPoint(); + CommandBuffer->RegisterSyncPoint(*SyncPoint, LaunchEvent); + } + + ZE2UR_CALL(zeCommandListAppendLaunchKernel, + (CommandBuffer->ZeCommandList, Kernel->ZeKernel, + &ZeThreadGroupDimensions, LaunchEvent->ZeEvent, + ZeEventList.size(), ZeEventList.data())); + + logger::debug("calling zeCommandListAppendLaunchKernel() with" + " ZeEvent {}", + ur_cast(LaunchEvent->ZeEvent)); + } return UR_RESULT_SUCCESS; } @@ -758,34 +865,41 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( ur_exp_command_buffer_sync_point_t *SyncPoint) { std::ignore = Flags; - std::vector ZeEventList; - UR_CALL(getEventsFromSyncPoints(CommandBuffer, NumSyncPointsInWaitList, - SyncPointWaitList, ZeEventList)); - - if (NumSyncPointsInWaitList) { - ZE2UR_CALL(zeCommandListAppendWaitOnEvents, - (CommandBuffer->ZeCommandList, NumSyncPointsInWaitList, - ZeEventList.data())); - } + if (CommandBuffer->IsInOrderCmdList) { + // Add the prefetch command to the command buffer. + // Note that L0 does not handle migration flags. + ZE2UR_CALL(zeCommandListAppendMemoryPrefetch, + (CommandBuffer->ZeCommandList, Mem, Size)); + } else { + std::vector ZeEventList; + UR_CALL(getEventsFromSyncPoints(CommandBuffer, NumSyncPointsInWaitList, + SyncPointWaitList, ZeEventList)); + + if (NumSyncPointsInWaitList) { + ZE2UR_CALL(zeCommandListAppendWaitOnEvents, + (CommandBuffer->ZeCommandList, NumSyncPointsInWaitList, + ZeEventList.data())); + } - ur_event_handle_t LaunchEvent; - UR_CALL( - EventCreate(CommandBuffer->Context, nullptr, false, true, &LaunchEvent)); - LaunchEvent->CommandType = UR_COMMAND_USM_PREFETCH; + ur_event_handle_t LaunchEvent; + UR_CALL(EventCreate(CommandBuffer->Context, nullptr, false, true, + !CommandBuffer->IsProfilingEnabled, &LaunchEvent)); + LaunchEvent->CommandType = UR_COMMAND_USM_PREFETCH; - // Get sync point and register the event with it. - *SyncPoint = CommandBuffer->GetNextSyncPoint(); - CommandBuffer->RegisterSyncPoint(*SyncPoint, LaunchEvent); + // Get sync point and register the event with it. + *SyncPoint = CommandBuffer->GetNextSyncPoint(); + CommandBuffer->RegisterSyncPoint(*SyncPoint, LaunchEvent); - // Add the prefetch command to the command buffer. - // Note that L0 does not handle migration flags. - ZE2UR_CALL(zeCommandListAppendMemoryPrefetch, - (CommandBuffer->ZeCommandList, Mem, Size)); + // Add the prefetch command to the command buffer. + // Note that L0 does not handle migration flags. + ZE2UR_CALL(zeCommandListAppendMemoryPrefetch, + (CommandBuffer->ZeCommandList, Mem, Size)); - // Level Zero does not have a completion "event" with the prefetch API, - // so manually add command to signal our event. - ZE2UR_CALL(zeCommandListAppendSignalEvent, - (CommandBuffer->ZeCommandList, LaunchEvent->ZeEvent)); + // Level Zero does not have a completion "event" with the prefetch API, + // so manually add command to signal our event. + ZE2UR_CALL(zeCommandListAppendSignalEvent, + (CommandBuffer->ZeCommandList, LaunchEvent->ZeEvent)); + } return UR_RESULT_SUCCESS; } @@ -822,33 +936,39 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( ze_memory_advice_t ZeAdvice = static_cast(Value); - std::vector ZeEventList; - UR_CALL(getEventsFromSyncPoints(CommandBuffer, NumSyncPointsInWaitList, - SyncPointWaitList, ZeEventList)); - - if (NumSyncPointsInWaitList) { - ZE2UR_CALL(zeCommandListAppendWaitOnEvents, - (CommandBuffer->ZeCommandList, NumSyncPointsInWaitList, - ZeEventList.data())); - } + if (CommandBuffer->IsInOrderCmdList) { + ZE2UR_CALL(zeCommandListAppendMemAdvise, + (CommandBuffer->ZeCommandList, CommandBuffer->Device->ZeDevice, + Mem, Size, ZeAdvice)); + } else { + std::vector ZeEventList; + UR_CALL(getEventsFromSyncPoints(CommandBuffer, NumSyncPointsInWaitList, + SyncPointWaitList, ZeEventList)); + + if (NumSyncPointsInWaitList) { + ZE2UR_CALL(zeCommandListAppendWaitOnEvents, + (CommandBuffer->ZeCommandList, NumSyncPointsInWaitList, + ZeEventList.data())); + } - ur_event_handle_t LaunchEvent; - UR_CALL( - EventCreate(CommandBuffer->Context, nullptr, false, true, &LaunchEvent)); - LaunchEvent->CommandType = UR_COMMAND_USM_ADVISE; + ur_event_handle_t LaunchEvent; + UR_CALL(EventCreate(CommandBuffer->Context, nullptr, false, true, + !CommandBuffer->IsProfilingEnabled, &LaunchEvent)); + LaunchEvent->CommandType = UR_COMMAND_USM_ADVISE; - // Get sync point and register the event with it. - *SyncPoint = CommandBuffer->GetNextSyncPoint(); - CommandBuffer->RegisterSyncPoint(*SyncPoint, LaunchEvent); + // Get sync point and register the event with it. + *SyncPoint = CommandBuffer->GetNextSyncPoint(); + CommandBuffer->RegisterSyncPoint(*SyncPoint, LaunchEvent); - ZE2UR_CALL(zeCommandListAppendMemAdvise, - (CommandBuffer->ZeCommandList, CommandBuffer->Device->ZeDevice, - Mem, Size, ZeAdvice)); + ZE2UR_CALL(zeCommandListAppendMemAdvise, + (CommandBuffer->ZeCommandList, CommandBuffer->Device->ZeDevice, + Mem, Size, ZeAdvice)); - // Level Zero does not have a completion "event" with the advise API, - // so manually add command to signal our event. - ZE2UR_CALL(zeCommandListAppendSignalEvent, - (CommandBuffer->ZeCommandList, LaunchEvent->ZeEvent)); + // Level Zero does not have a completion "event" with the advise API, + // so manually add command to signal our event. + ZE2UR_CALL(zeCommandListAppendSignalEvent, + (CommandBuffer->ZeCommandList, LaunchEvent->ZeEvent)); + } return UR_RESULT_SUCCESS; } @@ -973,7 +1093,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( Queue, &RetEvent, UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP, SignalCommandList, false, false, true)); - if ((Queue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE)) { + if ((Queue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE) && + (!CommandBuffer->IsInOrderCmdList) && + (CommandBuffer->IsProfilingEnabled)) { // Multiple submissions of a command buffer implies that we need to save // the event timestamps before resubmiting the command buffer. We // therefore copy the these timestamps in a dedicated USM memory section diff --git a/source/adapters/level_zero/command_buffer.hpp b/source/adapters/level_zero/command_buffer.hpp index f9a9288712..04d6a7d269 100644 --- a/source/adapters/level_zero/command_buffer.hpp +++ b/source/adapters/level_zero/command_buffer.hpp @@ -30,7 +30,7 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object { ze_command_list_handle_t CommandList, ze_command_list_handle_t CommandListResetEvents, ZeStruct ZeDesc, - const ur_exp_command_buffer_desc_t *Desc); + const ur_exp_command_buffer_desc_t *Desc, const bool IsInOrderCmdList); ~ur_exp_command_buffer_handle_t_(); @@ -82,6 +82,13 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object { bool IsUpdatable = false; // Indicates if command buffer was finalized. bool IsFinalized = false; + // Command-buffer profiling is enabled. + bool IsProfilingEnabled = false; + // Command-buffer can be submitted to an in-order command-list. + bool IsInOrderCmdList = false; + // This list is needed to release all kernels retained by the + // command_buffer. + std::vector KernelsList; }; struct ur_exp_command_buffer_command_handle_t_ : public _ur_object { diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index 97ffe2f19e..ae1d1b7fa5 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -763,7 +763,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urExtEventCreate( ur_event_handle_t *Event ///< [out] pointer to the handle of the event object created. ) { - UR_CALL(EventCreate(Context, nullptr, false, true, Event)); + UR_CALL(EventCreate(Context, nullptr, false, true, false, Event)); (*Event)->RefCountExternal++; ZE2UR_CALL(zeEventHostSignal, ((*Event)->ZeEvent)); @@ -781,7 +781,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( // we dont have urEventCreate, so use this check for now to know that // the call comes from urEventCreate() if (NativeEvent == nullptr) { - UR_CALL(EventCreate(Context, nullptr, false, true, Event)); + UR_CALL(EventCreate(Context, nullptr, false, true, false, Event)); (*Event)->RefCountExternal++; ZE2UR_CALL(zeEventHostSignal, ((*Event)->ZeEvent)); @@ -1061,9 +1061,11 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked, // ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue, bool IsMultiDevice, bool HostVisible, + bool ForceDisableProfiling, ur_event_handle_t *RetEvent) { - bool ProfilingEnabled = !Queue || Queue->isProfilingEnabled(); + bool ProfilingEnabled = + ForceDisableProfiling ? false : (!Queue || Queue->isProfilingEnabled()); ur_device_handle_t Device = nullptr; diff --git a/source/adapters/level_zero/event.hpp b/source/adapters/level_zero/event.hpp index a566c77825..80a1ca80f6 100644 --- a/source/adapters/level_zero/event.hpp +++ b/source/adapters/level_zero/event.hpp @@ -31,6 +31,7 @@ extern "C" { ur_result_t urEventReleaseInternal(ur_event_handle_t Event); ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue, bool IsMultiDevice, bool HostVisible, + bool ForceDisableProfiling, ur_event_handle_t *RetEvent); } // extern "C" diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index a5e1066a23..1e88f662f8 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -104,7 +104,8 @@ ur_result_t ur_completion_batch::seal(ur_queue_handle_t queue, assert(st == ACCUMULATING); if (!barrierEvent) { - UR_CALL(EventCreate(queue->Context, queue, false, true, &barrierEvent)); + UR_CALL( + EventCreate(queue->Context, queue, false, true, false, &barrierEvent)); } // Instead of collecting all the batched events, we simply issue a global @@ -1754,7 +1755,7 @@ ur_result_t createEventAndAssociateQueue(ur_queue_handle_t Queue, if (*Event == nullptr) UR_CALL(EventCreate(Queue->Context, Queue, IsMultiDevice, - HostVisible.value(), Event)); + HostVisible.value(), false, Event)); (*Event)->UrQueue = Queue; (*Event)->CommandType = CommandType; diff --git a/source/adapters/null/ur_nullddi.cpp b/source/adapters/null/ur_nullddi.cpp index 9a0f8aec58..b6e001aa84 100644 --- a/source/adapters/null/ur_nullddi.cpp +++ b/source/adapters/null/ur_nullddi.cpp @@ -4811,7 +4811,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint, ///< [out][optional] Sync point associated with this command. ur_exp_command_buffer_command_handle_t @@ -4852,7 +4853,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { @@ -4887,7 +4889,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] sync point associated with this command. ) try { @@ -4922,7 +4925,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { @@ -4957,7 +4961,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { @@ -4991,7 +4996,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { @@ -5033,7 +5039,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { @@ -5082,7 +5089,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { @@ -5129,7 +5137,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { @@ -5166,7 +5175,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] sync point associated with this command. ) try { @@ -5199,7 +5209,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] sync point associated with this command. ) try { @@ -5232,7 +5243,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] sync point associated with this command. ) try { diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index f097238036..9c1ff017ab 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -5210,7 +5210,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint, ///< [out][optional] Sync point associated with this command. ur_exp_command_buffer_command_handle_t @@ -5261,7 +5262,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) { @@ -5303,7 +5305,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] sync point associated with this command. ) { @@ -5346,7 +5349,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) { @@ -5395,7 +5399,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) { @@ -5442,7 +5447,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) { @@ -5497,7 +5503,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) { @@ -5565,7 +5572,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) { @@ -5631,7 +5639,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) { @@ -5687,7 +5696,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] sync point associated with this command. ) { @@ -5734,7 +5744,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] sync point associated with this command. ) { @@ -5779,7 +5790,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] sync point associated with this command. ) { diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index 8483b6a879..d75a99fa82 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -7822,7 +7822,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint, ///< [out][optional] Sync point associated with this command. ur_exp_command_buffer_command_handle_t @@ -7885,7 +7886,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) { @@ -7942,7 +7944,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] sync point associated with this command. ) { @@ -8011,7 +8014,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) { @@ -8074,7 +8078,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) { @@ -8131,7 +8136,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) { @@ -8196,7 +8202,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) { @@ -8273,7 +8280,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) { @@ -8343,7 +8351,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) { @@ -8403,7 +8412,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] sync point associated with this command. ) { @@ -8459,7 +8469,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] sync point associated with this command. ) { @@ -8514,7 +8525,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] sync point associated with this command. ) { diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index 9e61ca7227..aedd737fea 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -6711,7 +6711,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint, ///< [out][optional] Sync point associated with this command. ur_exp_command_buffer_command_handle_t @@ -6773,7 +6774,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) { @@ -6815,7 +6817,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] sync point associated with this command. ) { @@ -6857,7 +6860,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) { @@ -6905,7 +6909,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) { @@ -6949,7 +6954,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) { @@ -7001,7 +7007,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) { @@ -7063,7 +7070,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) { @@ -7120,7 +7128,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) { @@ -7167,7 +7176,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] sync point associated with this command. ) { @@ -7210,7 +7220,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] sync point associated with this command. ) { @@ -7250,7 +7261,8 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] sync point associated with this command. ) { diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 7f5736663f..1fac5999e7 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -7329,7 +7329,8 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint, ///< [out][optional] Sync point associated with this command. ur_exp_command_buffer_command_handle_t @@ -7382,7 +7383,8 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { @@ -7437,7 +7439,8 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] sync point associated with this command. ) try { @@ -7485,7 +7488,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { @@ -7534,7 +7538,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { @@ -7582,7 +7587,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { @@ -7637,7 +7643,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { @@ -7701,7 +7708,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { @@ -7763,7 +7771,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) try { @@ -7817,7 +7826,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] sync point associated with this command. ) try { @@ -7873,7 +7883,8 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] sync point associated with this command. ) try { @@ -7929,7 +7940,8 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] sync point associated with this command. ) try { diff --git a/source/ur_api.cpp b/source/ur_api.cpp index b3b7a6bf92..44f9310622 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -6226,7 +6226,8 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint, ///< [out][optional] Sync point associated with this command. ur_exp_command_buffer_command_handle_t @@ -6269,7 +6270,8 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) { @@ -6315,7 +6317,8 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] sync point associated with this command. ) { @@ -6354,7 +6357,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) { @@ -6394,7 +6398,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) { @@ -6433,7 +6438,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) { @@ -6479,7 +6485,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) { @@ -6532,7 +6539,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) { @@ -6583,7 +6591,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] Sync point associated with this command. ) { @@ -6626,7 +6635,8 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] sync point associated with this command. ) { @@ -6673,7 +6683,8 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] sync point associated with this command. ) { @@ -6720,7 +6731,8 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * - pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. + pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May + ///< be ignored if command-buffer is in-order. ur_exp_command_buffer_sync_point_t * pSyncPoint ///< [out][optional] sync point associated with this command. ) {