Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[L0] Add support for in-order lists using L0 driver #1372

Merged
merged 1 commit into from
Mar 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions source/adapters/level_zero/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1054,6 +1054,19 @@ bool ur_device_handle_t_::useRelaxedAllocationLimits() {
return EnableRelaxedAllocationLimits;
}

bool ur_device_handle_t_::useDriverInOrderLists() {
// Use in-order lists implementation from L0 driver instead
// of adapter's implementation.
static const bool UseDriverInOrderLists = [] {
const char *UrRet = std::getenv("UR_L0_USE_DRIVER_INORDER_LISTS");
if (!UrRet)
return false;
return std::atoi(UrRet) != 0;
}();

return UseDriverInOrderLists;
}

ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal,
int SubSubDeviceIndex) {
// Maintain various device properties cache.
Expand Down
3 changes: 3 additions & 0 deletions source/adapters/level_zero/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,9 @@ struct ur_device_handle_t_ : _ur_object {
// Read env settings to select immediate commandlist mode.
ImmCmdlistMode useImmediateCommandLists();

// Whether Adapter uses driver's implementation of in-order lists or not
bool useDriverInOrderLists();

// Returns whether immediate command lists are used on this device.
ImmCmdlistMode ImmCommandListUsed{};

Expand Down
57 changes: 44 additions & 13 deletions source/adapters/level_zero/event.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,20 @@ static const bool UseMultipleCmdlistBarriers = [] {
return std::atoi(UseMultipleCmdlistBarriersFlag) > 0;
}();

bool WaitListEmptyOrAllEventsFromSameQueue(
ur_queue_handle_t Queue, uint32_t NumEventsInWaitList,
const ur_event_handle_t *EventWaitList) {
if (!NumEventsInWaitList)
return true;

for (uint32_t i = 0; i < NumEventsInWaitList; ++i) {
if (Queue != EventWaitList[i]->UrQueue)
return false;
}

return true;
}

UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait(
ur_queue_handle_t Queue, ///< [in] handle of the queue object
uint32_t NumEventsInWaitList, ///< [in] size of the event wait list
Expand Down Expand Up @@ -206,21 +220,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier(
bool IsInternal = OutEvent == nullptr;
ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent;

auto WaitListEmptyOrAllEventsFromSameQueue = [Queue, NumEventsInWaitList,
EventWaitList]() {
if (!NumEventsInWaitList)
return true;

for (uint32_t I = 0; I < NumEventsInWaitList; ++I)
if (Queue != EventWaitList[I]->UrQueue)
return false;

return true;
};

// For in-order queue and wait-list which is empty or has events from
// the same queue just use the last command event as the barrier event.
if (Queue->isInOrderQueue() && WaitListEmptyOrAllEventsFromSameQueue() &&
if (Queue->isInOrderQueue() &&
WaitListEmptyOrAllEventsFromSameQueue(Queue, NumEventsInWaitList,
EventWaitList) &&
Queue->LastCommandEvent && !Queue->LastCommandEvent->IsDiscarded) {
UR_CALL(urEventRetain(Queue->LastCommandEvent));
*Event = Queue->LastCommandEvent;
Expand Down Expand Up @@ -1189,6 +1193,23 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList(
CurQueue->LastCommandEvent && CurQueue->LastCommandEvent->IsDiscarded)
IncludeLastCommandEvent = false;

// If we are using L0 native implementation for handling in-order queues,
// then we don't need to add the last enqueued event into the waitlist, as
// the native driver implementation will already ensure in-order semantics.
// The only exception is when a different immediate command was last used on
// the same UR Queue.
if (CurQueue->Device->useDriverInOrderLists() && CurQueue->isInOrderQueue() &&
CurQueue->UsingImmCmdLists) {
auto QueueGroup = CurQueue->getQueueGroup(UseCopyEngine);
uint32_t QueueGroupOrdinal, QueueIndex;
auto NextIndex = QueueGroup.getQueueIndex(&QueueGroupOrdinal, &QueueIndex,
/*QueryOnly */ true);
auto NextImmCmdList = QueueGroup.ImmCmdLists[NextIndex];
IncludeLastCommandEvent &=
CurQueue->LastUsedCommandList != CurQueue->CommandListMap.end() &&
NextImmCmdList != CurQueue->LastUsedCommandList;
}

try {
uint32_t TmpListLength = 0;

Expand All @@ -1205,6 +1226,16 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList(
this->UrEventList = new ur_event_handle_t[EventListLength];
}

// For in-order queue and wait-list which is empty or has events only from
// the same queue then we don't need to wait on any other additional events
if (CurQueue->Device->useDriverInOrderLists() &&
CurQueue->isInOrderQueue() &&
WaitListEmptyOrAllEventsFromSameQueue(CurQueue, EventListLength,
EventList)) {
this->Length = TmpListLength;
return UR_RESULT_SUCCESS;
}

if (EventListLength > 0) {
for (uint32_t I = 0; I < EventListLength; I++) {
{
Expand Down
4 changes: 2 additions & 2 deletions source/adapters/level_zero/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,8 +214,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
// the code can do a urKernelRelease on this kernel.
(*Event)->CommandData = (void *)Kernel;

// Increment the reference count of the Kernel and indicate that the Kernel is
// in use. Once the event has been signalled, the code in
// Increment the reference count of the Kernel and indicate that the Kernel
// is in use. Once the event has been signalled, the code in
// CleanupCompletedEvent(Event) will do a urKernelRelease to update the
// reference count on the kernel, using the kernel saved in CommandData.
UR_CALL(urKernelRetain(Kernel));
Expand Down
10 changes: 9 additions & 1 deletion source/adapters/level_zero/queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1870,6 +1870,10 @@ ur_result_t ur_queue_handle_t_::createCommandList(
ZeStruct<ze_command_list_desc_t> ZeCommandListDesc;
ZeCommandListDesc.commandQueueGroupOrdinal = QueueGroupOrdinal;

if (Device->useDriverInOrderLists() && isInOrderQueue()) {
ZeCommandListDesc.flags = ZE_COMMAND_LIST_FLAG_IN_ORDER;
}

ZE2UR_CALL(zeCommandListCreate, (Context->ZeContext, Device->ZeDevice,
&ZeCommandListDesc, &ZeCommandList));

Expand Down Expand Up @@ -1985,7 +1989,11 @@ ur_command_list_ptr_t &ur_queue_handle_t_::ur_queue_group_t::getImmCmdList() {

// Evaluate performance of explicit usage for "0" index.
if (QueueIndex != 0) {
ZeCommandQueueDesc.flags = ZE_COMMAND_QUEUE_FLAG_EXPLICIT_ONLY;
ZeCommandQueueDesc.flags |= ZE_COMMAND_QUEUE_FLAG_EXPLICIT_ONLY;
}

if (Queue->Device->useDriverInOrderLists() && Queue->isInOrderQueue()) {
ZeCommandQueueDesc.flags |= ZE_COMMAND_QUEUE_FLAG_IN_ORDER;
}

// Check if context's command list cache has an immediate command list with
Expand Down