Skip to content

Commit

Permalink
[L0] Add support for in-order lists using L0 driver
Browse files Browse the repository at this point in the history
Signed-off-by: Raiyan Latif <raiyan.latif@intel.com>
  • Loading branch information
raiyanla committed Mar 1, 2024
1 parent 8499b57 commit 862364b
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 4 deletions.
16 changes: 16 additions & 0 deletions source/adapters/level_zero/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1054,6 +1054,22 @@ bool ur_device_handle_t_::useRelaxedAllocationLimits() {
return EnableRelaxedAllocationLimits;
}

bool ur_device_handle_t_::useDriverInOrderLists() {
// Use in-order lists implementation from L0 driver instead
// of adapter's implementation.
static const bool UseDriverInOrderLists = [] {
// Temporary Enable by Default to test CI Results
return true;

const char *UrRet = std::getenv("UR_L0_USE_DRIVER_INORDER_LISTS");
if (!UrRet)
return false;
return std::atoi(UrRet) != 0;
}();

return UseDriverInOrderLists;
}

ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal,
int SubSubDeviceIndex) {
// Maintain various device properties cache.
Expand Down
3 changes: 3 additions & 0 deletions source/adapters/level_zero/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,9 @@ struct ur_device_handle_t_ : _ur_object {
// Read env settings to select immediate commandlist mode.
ImmCmdlistMode useImmediateCommandLists();

// Whether Adapter uses driver's implementation of in-order lists or not
bool useDriverInOrderLists();

// Returns whether immediate command lists are used on this device.
ImmCmdlistMode ImmCommandListUsed{};

Expand Down
40 changes: 40 additions & 0 deletions source/adapters/level_zero/event.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1189,6 +1189,27 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList(
CurQueue->LastCommandEvent && CurQueue->LastCommandEvent->IsDiscarded)
IncludeLastCommandEvent = false;


// If we are using L0 native implementation for handling in-order queues,
// then we don't need to add the last enqueued event into the waitlist, as
// the native driver implementation will already ensure in-order semantics.
// The only exception is when a different immediate command was last used on
// the same UR Queue.
if (CurQueue->Device->useDriverInOrderLists()) {
if (CurQueue->UsingImmCmdLists) {
auto QueueGroup = CurQueue->getQueueGroup(UseCopyEngine);
uint32_t QueueGroupOrdinal, QueueIndex;
auto NextIndex = QueueGroup.getQueueIndex(&QueueGroupOrdinal, &QueueIndex,
/*QueryOnly */ true);
auto NextImmCmdList = QueueGroup.ImmCmdLists[NextIndex];
IncludeLastCommandEvent &=
CurQueue->LastUsedCommandList != CurQueue->CommandListMap.end() &&
NextImmCmdList != CurQueue->LastUsedCommandList;
} else {
IncludeLastCommandEvent = false;
}
}

try {
uint32_t TmpListLength = 0;

Expand All @@ -1205,6 +1226,25 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList(
this->UrEventList = new ur_event_handle_t[EventListLength];
}

auto WaitListEmptyOrAllEventsFromSameQueue = [CurQueue, EventListLength,
EventList]() {
if (!EventListLength)
return true;
for (uint32_t i = 0; i < EventListLength; ++i) {
if (CurQueue != EventList[i]->UrQueue)
return false;
}
return true;
};

// For in-order queue and wait-list which is empty or has events only from
// the same queue then we don't need to wait on any other additional events
if (CurQueue->Device->useDriverInOrderLists() &&
CurQueue->isInOrderQueue() && WaitListEmptyOrAllEventsFromSameQueue()) {
this->Length = TmpListLength;
return UR_RESULT_SUCCESS;
}

if (EventListLength > 0) {
for (uint32_t I = 0; I < EventListLength; I++) {
{
Expand Down
4 changes: 2 additions & 2 deletions source/adapters/level_zero/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,8 +214,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
// the code can do a urKernelRelease on this kernel.
(*Event)->CommandData = (void *)Kernel;

// Increment the reference count of the Kernel and indicate that the Kernel is
// in use. Once the event has been signalled, the code in
// Increment the reference count of the Kernel and indicate that the Kernel
// is in use. Once the event has been signalled, the code in
// CleanupCompletedEvent(Event) will do a urKernelRelease to update the
// reference count on the kernel, using the kernel saved in CommandData.
UR_CALL(urKernelRetain(Kernel));
Expand Down
10 changes: 8 additions & 2 deletions source/adapters/level_zero/queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1870,6 +1870,10 @@ ur_result_t ur_queue_handle_t_::createCommandList(
ZeStruct<ze_command_list_desc_t> ZeCommandListDesc;
ZeCommandListDesc.commandQueueGroupOrdinal = QueueGroupOrdinal;

if (Device->useDriverInOrderLists() && isInOrderQueue()) {
ZeCommandListDesc.flags = ZE_COMMAND_LIST_FLAG_IN_ORDER;
}

ZE2UR_CALL(zeCommandListCreate, (Context->ZeContext, Device->ZeDevice,
&ZeCommandListDesc, &ZeCommandList));

Expand Down Expand Up @@ -1983,8 +1987,10 @@ ur_command_list_ptr_t &ur_queue_handle_t_::ur_queue_group_t::getImmCmdList() {
Priority = "High";
}

// Evaluate performance of explicit usage for "0" index.
if (QueueIndex != 0) {
if (Queue->Device->useDriverInOrderLists() && Queue->isInOrderQueue()) {
ZeCommandQueueDesc.flags = ZE_COMMAND_QUEUE_FLAG_IN_ORDER;
} else if (QueueIndex != 0) {
// Evaluate performance of explicit usage for "0" index.
ZeCommandQueueDesc.flags = ZE_COMMAND_QUEUE_FLAG_EXPLICIT_ONLY;
}

Expand Down

0 comments on commit 862364b

Please sign in to comment.