diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index 918b04400a..f19cce1000 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -1029,6 +1029,19 @@ bool ur_device_handle_t_::useRelaxedAllocationLimits() { return EnableRelaxedAllocationLimits; } +bool ur_device_handle_t_::useDriverInOrderLists() { + // Use in-order lists implementation from L0 driver instead + // of adapter's implementation. + static const bool UseDriverInOrderLists = [] { + const char *UrRet = std::getenv("UR_L0_USE_DRIVER_INORDER_LISTS"); + if (!UrRet) + return false; + return std::atoi(UrRet) != 0; + }(); + + return UseDriverInOrderLists; +} + ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal, int SubSubDeviceIndex) { // Maintain various device properties cache. diff --git a/source/adapters/level_zero/device.hpp b/source/adapters/level_zero/device.hpp index 94480336c5..a57a97d38d 100644 --- a/source/adapters/level_zero/device.hpp +++ b/source/adapters/level_zero/device.hpp @@ -143,6 +143,9 @@ struct ur_device_handle_t_ : _ur_object { // Read env settings to select immediate commandlist mode. ImmCmdlistMode useImmediateCommandLists(); + // Whether Adapter uses driver's implementation of in-order lists or not + bool useDriverInOrderLists(); + // Returns whether immediate command lists are used on this device. ImmCmdlistMode ImmCommandListUsed{}; diff --git a/source/adapters/level_zero/kernel.cpp b/source/adapters/level_zero/kernel.cpp index 0e5ce3215a..07eb4904e6 100644 --- a/source/adapters/level_zero/kernel.cpp +++ b/source/adapters/level_zero/kernel.cpp @@ -191,9 +191,28 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( ZE2UR_CALL(zeKernelSetGroupSize, (ZeKernel, WG[0], WG[1], WG[2])); bool UseCopyEngine = false; + + bool UseTmpWaitList = false; + if (Queue->Device->useDriverInOrderLists() && Queue->isInOrderQueue()) { + for (uint32_t i = 0; i < NumEventsInWaitList; i++) { + auto EventQueue = EventWaitList[i]->UrQueue; + if (Queue != EventQueue) { + UseTmpWaitList = true; + break; + } + } + } else { + UseTmpWaitList = true; + } + _ur_ze_event_list_t TmpWaitList; - UR_CALL(TmpWaitList.createAndRetainUrZeEventList( - NumEventsInWaitList, EventWaitList, Queue, UseCopyEngine)); + TmpWaitList.ZeEventList = nullptr; + TmpWaitList.Length = 0; + + if (UseTmpWaitList) { + UR_CALL(TmpWaitList.createAndRetainUrZeEventList( + NumEventsInWaitList, EventWaitList, Queue, UseCopyEngine)); + } // Get a new command list to be used on this call ur_command_list_ptr_t CommandList{}; @@ -208,14 +227,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( UR_CALL(createEventAndAssociateQueue(Queue, Event, UR_COMMAND_KERNEL_LAUNCH, CommandList, IsInternal, false)); ZeEvent = (*Event)->ZeEvent; + + // If UseTmpWaitList is false, then it will leave TmpWaitList.ZeEventList as + // nullptr and TmpWaitList.Length as 0 (i.e. an empty WaitList) (*Event)->WaitList = TmpWaitList; // Save the kernel in the event, so that when the event is signalled // the code can do a urKernelRelease on this kernel. (*Event)->CommandData = (void *)Kernel; - // Increment the reference count of the Kernel and indicate that the Kernel is - // in use. Once the event has been signalled, the code in + // Increment the reference count of the Kernel and indicate that the Kernel + // is in use. Once the event has been signalled, the code in // CleanupCompletedEvent(Event) will do a urKernelRelease to update the // reference count on the kernel, using the kernel saved in CommandData. UR_CALL(urKernelRetain(Kernel)); diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index 8a9f36a432..9ed1330d42 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -1893,6 +1893,10 @@ ur_result_t ur_queue_handle_t_::createCommandList( ZeStruct ZeCommandListDesc; ZeCommandListDesc.commandQueueGroupOrdinal = QueueGroupOrdinal; + if (Device->useDriverInOrderLists() && isInOrderQueue()) { + ZeCommandListDesc.flags = ZE_COMMAND_LIST_FLAG_IN_ORDER; + } + ZE2UR_CALL(zeCommandListCreate, (Context->ZeContext, Device->ZeDevice, &ZeCommandListDesc, &ZeCommandList)); @@ -2006,8 +2010,11 @@ ur_command_list_ptr_t &ur_queue_handle_t_::ur_queue_group_t::getImmCmdList() { Priority = "High"; } - // Evaluate performance of explicit usage for "0" index. - if (QueueIndex != 0) { + if (Queue->Device->useDriverInOrderLists() && Queue->isInOrderQueue()) { + ZeCommandQueueDesc.flags = ZE_COMMAND_QUEUE_FLAG_IN_ORDER; + urPrint("Using in-order driver implementation\n"); + } else if (QueueIndex != 0) { + // Evaluate performance of explicit usage for "0" index. ZeCommandQueueDesc.flags = ZE_COMMAND_QUEUE_FLAG_EXPLICIT_ONLY; }