Skip to content

Commit

Permalink
Merge pull request #1218 from Bensuo/maxime/imm-cmd-list-support
Browse files Browse the repository at this point in the history
[EXP][CMDBUF] L0 Immediate command-list support
  • Loading branch information
aarongreig authored Feb 6, 2024
2 parents 5b89ee8 + c8e150c commit 1cd402e
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 49 deletions.
70 changes: 28 additions & 42 deletions source/adapters/level_zero/command_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_(
ZeStruct<ze_command_list_desc_t> ZeDesc,
const ur_exp_command_buffer_desc_t *Desc)
: Context(Context), Device(Device), ZeCommandList(CommandList),
ZeCommandListDesc(ZeDesc), QueueProperties(), SyncPoints(),
NextSyncPoint(0), CommandListMap() {
ZeCommandListDesc(ZeDesc), ZeFencesList(), QueueProperties(),
SyncPoints(), NextSyncPoint(0) {
(void)Desc;
urContextRetain(Context);
urDeviceRetain(Device);
Expand Down Expand Up @@ -132,10 +132,8 @@ ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() {
}

// Release Fences allocated to command_buffer
for (auto it = CommandListMap.begin(); it != CommandListMap.end(); ++it) {
if (it->second.ZeFence != nullptr) {
ZE_CALL_NOCHECK(zeFenceDestroy, (it->second.ZeFence));
}
for (auto &ZeFence : ZeFencesList) {
ZE_CALL_NOCHECK(zeFenceDestroy, (ZeFence));
}
}

Expand Down Expand Up @@ -464,7 +462,6 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device,
ZE2UR_CALL(
zeCommandListAppendBarrier,
(ZeCommandList, nullptr, 1, &RetCommandBuffer->WaitEvent->ZeEvent));

return UR_RESULT_SUCCESS;
}

Expand Down Expand Up @@ -856,12 +853,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
ur_exp_command_buffer_handle_t CommandBuffer, ur_queue_handle_t Queue,
uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList,
ur_event_handle_t *Event) {
// There are issues with immediate command lists so return an error if the
// queue is in that mode.
if (Queue->UsingImmCmdLists) {
return UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES;
}

std::scoped_lock<ur_shared_mutex> lock(Queue->Mutex);
// Use compute engine rather than copy engine
const auto UseCopyEngine = false;
Expand All @@ -871,22 +862,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(

ze_fence_handle_t ZeFence;
ZeStruct<ze_fence_desc_t> ZeFenceDesc;
ur_command_list_ptr_t CommandListPtr;

ZE2UR_CALL(zeFenceCreate, (ZeCommandQueue, &ZeFenceDesc, &ZeFence));
// TODO: Refactor so requiring a map iterator is not required here, currently
// required for executeCommandList though.
ZeStruct<ze_command_queue_desc_t> ZeQueueDesc;
ZeQueueDesc.ordinal = QueueGroupOrdinal;
CommandListPtr = CommandBuffer->CommandListMap.insert(
std::pair<ze_command_list_handle_t, ur_command_list_info_t>(
CommandBuffer->ZeCommandList,
{ZeFence, false, false, ZeCommandQueue, ZeQueueDesc}));

// Previous execution will have closed the command list, we need to reopen
// it otherwise calling `executeCommandList` will return early.
CommandListPtr->second.IsClosed = false;
CommandListPtr->second.ZeFenceInUse = true;
CommandBuffer->ZeFencesList.push_back(ZeFence);

// Create command-list to execute before `CommandListPtr` and will signal
// when `EventWaitList` dependencies are complete.
Expand All @@ -908,6 +886,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
(WaitCommandList->first, ZeEvent));
}

bool MustSignalWaitEvent = true;
if (NumEventsInWaitList) {
_ur_ze_event_list_t TmpWaitList;
UR_CALL(TmpWaitList.createAndRetainUrZeEventList(
Expand All @@ -920,17 +899,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
else
CommandBuffer->WaitEvent->WaitList.insert(TmpWaitList);

ZE2UR_CALL(zeCommandListAppendBarrier,
(WaitCommandList->first, CommandBuffer->WaitEvent->ZeEvent,
CommandBuffer->WaitEvent->WaitList.Length,
CommandBuffer->WaitEvent->WaitList.ZeEventList));
} else {
ZE2UR_CALL(zeCommandListAppendSignalEvent,
(WaitCommandList->first, CommandBuffer->WaitEvent->ZeEvent));
if (!CommandBuffer->WaitEvent->WaitList.isEmpty()) {
ZE2UR_CALL(zeCommandListAppendBarrier,
(WaitCommandList->first, CommandBuffer->WaitEvent->ZeEvent,
CommandBuffer->WaitEvent->WaitList.Length,
CommandBuffer->WaitEvent->WaitList.ZeEventList));
Queue->executeCommandList(WaitCommandList, false, false);
MustSignalWaitEvent = false;
}
}

if (MustSignalWaitEvent) {
ZE2UR_CALL(zeEventHostSignal, (CommandBuffer->WaitEvent->ZeEvent));
}

// Submit main command-list. This command-list is of a batch command-list
// type, regardless of the UR Queue type. We therefore need to submit the list
// directly using the Level-Zero API to avoid type mismatches if using UR
// functions.
ZE2UR_CALL(zeCommandQueueExecuteCommandLists,
(ZeCommandQueue, 1, &CommandBuffer->ZeCommandList, ZeFence));

// Execution event for this enqueue of the UR command-buffer
ur_event_handle_t RetEvent{};

// Create a command-list to signal RetEvent on completion
ur_command_list_ptr_t SignalCommandList{};
UR_CALL(Queue->Context->getAvailableCommandList(Queue, SignalCommandList,
Expand All @@ -943,7 +935,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
if (Event) {
UR_CALL(createEventAndAssociateQueue(Queue, &RetEvent,
UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP,
SignalCommandList, false));
SignalCommandList, false, true));

if ((Queue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE)) {
// Multiple submissions of a command buffer implies that we need to save
Expand Down Expand Up @@ -972,13 +964,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
}
}

// Execution our command-lists asynchronously
// TODO Look using a single `zeCommandQueueExecuteCommandLists()` call
// passing all three command-lists, rather than individual calls which
// introduces latency.
UR_CALL(Queue->executeCommandList(WaitCommandList, false, false));
UR_CALL(Queue->executeCommandList(CommandListPtr, false, false));
UR_CALL(Queue->executeCommandList(SignalCommandList, false, false));
Queue->executeCommandList(SignalCommandList, false, false);

if (Event) {
*Event = RetEvent;
Expand Down
11 changes: 4 additions & 7 deletions source/adapters/level_zero/command_buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object {
ze_command_list_handle_t ZeCommandList;
// Level Zero command list descriptor
ZeStruct<ze_command_list_desc_t> ZeCommandListDesc;
// List of Level Zero fences created when submitting a graph.
// This list is needed to release all fences retained by the
// command_buffer.
std::vector<ze_fence_handle_t> ZeFencesList;
// Queue properties from command-buffer descriptor
// TODO: Do we need these?
ur_queue_properties_t QueueProperties;
Expand All @@ -60,13 +64,6 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object {
// Next sync_point value (may need to consider ways to reuse values if 32-bits
// is not enough)
ur_exp_command_buffer_sync_point_t NextSyncPoint;
// Command list map so we can use queue::executeCommandList.
// Command list map is also used to release all the Fences retained by the
// command_buffer std::unordered_multimap<ze_command_list_handle_t,
// ur_command_list_info_t> CommandListMap; CommandListMap is redefined as a
// multimap to enable mutiple commands enqueing into the same command_buffer
std::unordered_multimap<ze_command_list_handle_t, ur_command_list_info_t>
CommandListMap;
// Event which will signals the most recent execution of the command-buffer
// has finished
ur_event_handle_t SignalEvent = nullptr;
Expand Down

0 comments on commit 1cd402e

Please sign in to comment.