Skip to content

Commit

Permalink
[L0] Add support for in-order lists using L0 driver
Browse files Browse the repository at this point in the history
Signed-off-by: Raiyan Latif <raiyan.latif@intel.com>
  • Loading branch information
raiyanla authored and kbenzie committed Mar 12, 2024
1 parent 252a3cc commit 11ecfd3
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 16 deletions.
13 changes: 13 additions & 0 deletions source/adapters/level_zero/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1054,6 +1054,19 @@ bool ur_device_handle_t_::useRelaxedAllocationLimits() {
return EnableRelaxedAllocationLimits;
}

bool ur_device_handle_t_::useDriverInOrderLists() {
// Use in-order lists implementation from L0 driver instead
// of adapter's implementation.
static const bool UseDriverInOrderLists = [] {
const char *UrRet = std::getenv("UR_L0_USE_DRIVER_INORDER_LISTS");
if (!UrRet)
return false;
return std::atoi(UrRet) != 0;
}();

return UseDriverInOrderLists;
}

ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal,
int SubSubDeviceIndex) {
// Maintain various device properties cache.
Expand Down
3 changes: 3 additions & 0 deletions source/adapters/level_zero/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,9 @@ struct ur_device_handle_t_ : _ur_object {
// Read env settings to select immediate commandlist mode.
ImmCmdlistMode useImmediateCommandLists();

// Whether Adapter uses driver's implementation of in-order lists or not
bool useDriverInOrderLists();

// Returns whether immediate command lists are used on this device.
ImmCmdlistMode ImmCommandListUsed{};

Expand Down
57 changes: 44 additions & 13 deletions source/adapters/level_zero/event.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,20 @@ static const bool UseMultipleCmdlistBarriers = [] {
return std::atoi(UseMultipleCmdlistBarriersFlag) > 0;
}();

bool WaitListEmptyOrAllEventsFromSameQueue(
ur_queue_handle_t Queue, uint32_t NumEventsInWaitList,
const ur_event_handle_t *EventWaitList) {
if (!NumEventsInWaitList)
return true;

for (uint32_t i = 0; i < NumEventsInWaitList; ++i) {
if (Queue != EventWaitList[i]->UrQueue)
return false;
}

return true;
}

UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait(
ur_queue_handle_t Queue, ///< [in] handle of the queue object
uint32_t NumEventsInWaitList, ///< [in] size of the event wait list
Expand Down Expand Up @@ -206,21 +220,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier(
bool IsInternal = OutEvent == nullptr;
ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent;

auto WaitListEmptyOrAllEventsFromSameQueue = [Queue, NumEventsInWaitList,
EventWaitList]() {
if (!NumEventsInWaitList)
return true;

for (uint32_t I = 0; I < NumEventsInWaitList; ++I)
if (Queue != EventWaitList[I]->UrQueue)
return false;

return true;
};

// For in-order queue and wait-list which is empty or has events from
// the same queue just use the last command event as the barrier event.
if (Queue->isInOrderQueue() && WaitListEmptyOrAllEventsFromSameQueue() &&
if (Queue->isInOrderQueue() &&
WaitListEmptyOrAllEventsFromSameQueue(Queue, NumEventsInWaitList,
EventWaitList) &&
Queue->LastCommandEvent && !Queue->LastCommandEvent->IsDiscarded) {
UR_CALL(urEventRetain(Queue->LastCommandEvent));
*Event = Queue->LastCommandEvent;
Expand Down Expand Up @@ -1189,6 +1193,23 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList(
CurQueue->LastCommandEvent && CurQueue->LastCommandEvent->IsDiscarded)
IncludeLastCommandEvent = false;

// If we are using L0 native implementation for handling in-order queues,
// then we don't need to add the last enqueued event into the waitlist, as
// the native driver implementation will already ensure in-order semantics.
// The only exception is when a different immediate command was last used on
// the same UR Queue.
if (CurQueue->Device->useDriverInOrderLists() && CurQueue->isInOrderQueue() &&
CurQueue->UsingImmCmdLists) {
auto QueueGroup = CurQueue->getQueueGroup(UseCopyEngine);
uint32_t QueueGroupOrdinal, QueueIndex;
auto NextIndex = QueueGroup.getQueueIndex(&QueueGroupOrdinal, &QueueIndex,
/*QueryOnly */ true);
auto NextImmCmdList = QueueGroup.ImmCmdLists[NextIndex];
IncludeLastCommandEvent &=
CurQueue->LastUsedCommandList != CurQueue->CommandListMap.end() &&
NextImmCmdList != CurQueue->LastUsedCommandList;
}

try {
uint32_t TmpListLength = 0;

Expand All @@ -1205,6 +1226,16 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList(
this->UrEventList = new ur_event_handle_t[EventListLength];
}

// For in-order queue and wait-list which is empty or has events only from
// the same queue then we don't need to wait on any other additional events
if (CurQueue->Device->useDriverInOrderLists() &&
CurQueue->isInOrderQueue() &&
WaitListEmptyOrAllEventsFromSameQueue(CurQueue, EventListLength,
EventList)) {
this->Length = TmpListLength;
return UR_RESULT_SUCCESS;
}

if (EventListLength > 0) {
for (uint32_t I = 0; I < EventListLength; I++) {
{
Expand Down
4 changes: 2 additions & 2 deletions source/adapters/level_zero/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,8 +214,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
// the code can do a urKernelRelease on this kernel.
(*Event)->CommandData = (void *)Kernel;

// Increment the reference count of the Kernel and indicate that the Kernel is
// in use. Once the event has been signalled, the code in
// Increment the reference count of the Kernel and indicate that the Kernel
// is in use. Once the event has been signalled, the code in
// CleanupCompletedEvent(Event) will do a urKernelRelease to update the
// reference count on the kernel, using the kernel saved in CommandData.
UR_CALL(urKernelRetain(Kernel));
Expand Down
10 changes: 9 additions & 1 deletion source/adapters/level_zero/queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1870,6 +1870,10 @@ ur_result_t ur_queue_handle_t_::createCommandList(
ZeStruct<ze_command_list_desc_t> ZeCommandListDesc;
ZeCommandListDesc.commandQueueGroupOrdinal = QueueGroupOrdinal;

if (Device->useDriverInOrderLists() && isInOrderQueue()) {
ZeCommandListDesc.flags = ZE_COMMAND_LIST_FLAG_IN_ORDER;
}

ZE2UR_CALL(zeCommandListCreate, (Context->ZeContext, Device->ZeDevice,
&ZeCommandListDesc, &ZeCommandList));

Expand Down Expand Up @@ -1985,7 +1989,11 @@ ur_command_list_ptr_t &ur_queue_handle_t_::ur_queue_group_t::getImmCmdList() {

// Evaluate performance of explicit usage for "0" index.
if (QueueIndex != 0) {
ZeCommandQueueDesc.flags = ZE_COMMAND_QUEUE_FLAG_EXPLICIT_ONLY;
ZeCommandQueueDesc.flags |= ZE_COMMAND_QUEUE_FLAG_EXPLICIT_ONLY;
}

if (Queue->Device->useDriverInOrderLists() && Queue->isInOrderQueue()) {
ZeCommandQueueDesc.flags |= ZE_COMMAND_QUEUE_FLAG_IN_ORDER;
}

// Check if context's command list cache has an immediate command list with
Expand Down

0 comments on commit 11ecfd3

Please sign in to comment.