From 27829e745002d42b707911b99b51e94ebfcd0979 Mon Sep 17 00:00:00 2001 From: Raiyan Latif Date: Thu, 29 Feb 2024 05:32:45 -0800 Subject: [PATCH] [L0] Add support for in-order lists using L0 driver Signed-off-by: Raiyan Latif --- source/adapters/level_zero/device.cpp | 16 +++++++++++ source/adapters/level_zero/device.hpp | 3 +++ source/adapters/level_zero/event.cpp | 38 +++++++++++++++++++++++++++ source/adapters/level_zero/kernel.cpp | 4 +-- source/adapters/level_zero/queue.cpp | 11 ++++++-- 5 files changed, 68 insertions(+), 4 deletions(-) diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index 918b04400a..80eabcd0ff 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -1029,6 +1029,22 @@ bool ur_device_handle_t_::useRelaxedAllocationLimits() { return EnableRelaxedAllocationLimits; } +bool ur_device_handle_t_::useDriverInOrderLists() { + // Use in-order lists implementation from L0 driver instead + // of adapter's implementation. + static const bool UseDriverInOrderLists = [] { + // Temporary Enable by Default to test CI Results + return true; + + const char *UrRet = std::getenv("UR_L0_USE_DRIVER_INORDER_LISTS"); + if (!UrRet) + return false; + return std::atoi(UrRet) != 0; + }(); + + return UseDriverInOrderLists; +} + ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal, int SubSubDeviceIndex) { // Maintain various device properties cache. diff --git a/source/adapters/level_zero/device.hpp b/source/adapters/level_zero/device.hpp index 94480336c5..a57a97d38d 100644 --- a/source/adapters/level_zero/device.hpp +++ b/source/adapters/level_zero/device.hpp @@ -143,6 +143,9 @@ struct ur_device_handle_t_ : _ur_object { // Read env settings to select immediate commandlist mode. ImmCmdlistMode useImmediateCommandLists(); + // Whether Adapter uses driver's implementation of in-order lists or not + bool useDriverInOrderLists(); + // Returns whether immediate command lists are used on this device. ImmCmdlistMode ImmCommandListUsed{}; diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index 57b839a714..09aed3748a 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -1189,6 +1189,23 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList( CurQueue->LastCommandEvent && CurQueue->LastCommandEvent->IsDiscarded) IncludeLastCommandEvent = false; + if (CurQueue->Device->useDriverInOrderLists() && CurQueue->UsingImmCmdLists) { + auto QueueGroup = CurQueue->getQueueGroup(UseCopyEngine); + uint32_t QueueGroupOrdinal, QueueIndex; + auto NextIndex = QueueGroup.getQueueIndex(&QueueGroupOrdinal, &QueueIndex, + /*QueryOnly */ true); + auto NextImmCmdList = QueueGroup.ImmCmdLists[NextIndex]; + + // If we are using L0 native implementation for handling in-order queues, + // then we don't need to add the last enqueued event into the waitlist, as + // the native driver implementation will already ensure in-order semantics. + // The only exception is when a different immediate command was last used on + // the same UR Queue. + IncludeLastCommandEvent &= + CurQueue->LastUsedCommandList != CurQueue->CommandListMap.end() && + NextImmCmdList != CurQueue->LastUsedCommandList; + } + try { uint32_t TmpListLength = 0; @@ -1205,6 +1222,27 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList( this->UrEventList = new ur_event_handle_t[EventListLength]; } + auto WaitListEmptyOrAllEventsFromSameQueue = [CurQueue, EventListLength, + EventList]() { + if (!EventListLength) + return true; + + for (uint32_t i = 0; i < EventListLength; ++i) { + if (CurQueue != EventList[i]->UrQueue) + return false; + } + + return true; + }; + + // For in-order queue and wait-list which is empty or has events only from + // the same queue then we don't need to wait on any other additional events + if (CurQueue->Device->useDriverInOrderLists() && + CurQueue->isInOrderQueue() && WaitListEmptyOrAllEventsFromSameQueue()) { + this->Length = TmpListLength; + return UR_RESULT_SUCCESS; + } + if (EventListLength > 0) { for (uint32_t I = 0; I < EventListLength; I++) { { diff --git a/source/adapters/level_zero/kernel.cpp b/source/adapters/level_zero/kernel.cpp index 0e5ce3215a..c40e4ef0e3 100644 --- a/source/adapters/level_zero/kernel.cpp +++ b/source/adapters/level_zero/kernel.cpp @@ -214,8 +214,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( // the code can do a urKernelRelease on this kernel. (*Event)->CommandData = (void *)Kernel; - // Increment the reference count of the Kernel and indicate that the Kernel is - // in use. Once the event has been signalled, the code in + // Increment the reference count of the Kernel and indicate that the Kernel + // is in use. Once the event has been signalled, the code in // CleanupCompletedEvent(Event) will do a urKernelRelease to update the // reference count on the kernel, using the kernel saved in CommandData. UR_CALL(urKernelRetain(Kernel)); diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index 8a9f36a432..9ed1330d42 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -1893,6 +1893,10 @@ ur_result_t ur_queue_handle_t_::createCommandList( ZeStruct ZeCommandListDesc; ZeCommandListDesc.commandQueueGroupOrdinal = QueueGroupOrdinal; + if (Device->useDriverInOrderLists() && isInOrderQueue()) { + ZeCommandListDesc.flags = ZE_COMMAND_LIST_FLAG_IN_ORDER; + } + ZE2UR_CALL(zeCommandListCreate, (Context->ZeContext, Device->ZeDevice, &ZeCommandListDesc, &ZeCommandList)); @@ -2006,8 +2010,11 @@ ur_command_list_ptr_t &ur_queue_handle_t_::ur_queue_group_t::getImmCmdList() { Priority = "High"; } - // Evaluate performance of explicit usage for "0" index. - if (QueueIndex != 0) { + if (Queue->Device->useDriverInOrderLists() && Queue->isInOrderQueue()) { + ZeCommandQueueDesc.flags = ZE_COMMAND_QUEUE_FLAG_IN_ORDER; + urPrint("Using in-order driver implementation\n"); + } else if (QueueIndex != 0) { + // Evaluate performance of explicit usage for "0" index. ZeCommandQueueDesc.flags = ZE_COMMAND_QUEUE_FLAG_EXPLICIT_ONLY; }