Skip to content

Commit

Permalink
Merge pull request #1366 from nrspruit/fix_multidevice_event_cache
Browse files Browse the repository at this point in the history
[L0] Fix the multi device event cache to allocate lists as pointers
  • Loading branch information
kbenzie committed Feb 22, 2024
2 parents 4814e71 + 123c00f commit b4150ad
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 27 deletions.
14 changes: 8 additions & 6 deletions source/adapters/level_zero/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -395,27 +395,29 @@ ur_result_t ur_context_handle_t_::finalize() {

if (!DisableEventsCaching) {
std::scoped_lock<ur_mutex> Lock(EventCacheMutex);
for (auto &EventCache : EventCaches) {
for (auto &Event : EventCache) {
for (auto EventCache : EventCaches) {
for (auto Event : *EventCache) {
auto ZeResult = ZE_CALL_NOCHECK(zeEventDestroy, (Event->ZeEvent));
// Gracefully handle the case that L0 was already unloaded.
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
return ze2urResult(ZeResult);
delete Event;
}
EventCache.clear();
EventCache->clear();
delete EventCache;
}
}
{
std::scoped_lock<ur_mutex> Lock(ZeEventPoolCacheMutex);
for (auto &ZePoolCache : ZeEventPoolCache) {
for (auto &ZePool : ZePoolCache) {
for (auto ZePoolCache : ZeEventPoolCache) {
for (auto ZePool : *ZePoolCache) {
auto ZeResult = ZE_CALL_NOCHECK(zeEventPoolDestroy, (ZePool));
// Gracefully handle the case that L0 was already unloaded.
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
return ze2urResult(ZeResult);
}
ZePoolCache.clear();
ZePoolCache->clear();
delete ZePoolCache;
}
}

Expand Down
54 changes: 40 additions & 14 deletions source/adapters/level_zero/context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ struct ur_context_handle_t_ : _ur_object {
// head.
//
// Cache of event pools to which host-visible events are added to.
std::vector<std::list<ze_event_pool_handle_t>> ZeEventPoolCache{4};
std::vector<std::list<ze_event_pool_handle_t> *> ZeEventPoolCache;
std::vector<std::unordered_map<ze_device_handle_t,
std::list<ze_event_pool_handle_t> *>>
ZeEventPoolCacheDeviceMap{4};
Expand All @@ -165,7 +165,7 @@ struct ur_context_handle_t_ : _ur_object {
ur_mutex EventCacheMutex;

// Caches for events.
std::vector<std::list<ur_event_handle_t>> EventCaches{4};
std::vector<std::list<ur_event_handle_t> *> EventCaches;
std::vector<
std::unordered_map<ur_device_handle_t, std::list<ur_event_handle_t> *>>
EventCachesDeviceMap{4};
Expand Down Expand Up @@ -207,31 +207,44 @@ struct ur_context_handle_t_ : _ur_object {

auto getZeEventPoolCache(bool HostVisible, bool WithProfiling,
ze_device_handle_t ZeDevice) {
// Adding 4 initial global caches for provided scope and profiling modes:
// Host Scope, Device Scope, with Profiling, without Profiling.
if (ZeEventPoolCache.empty()) {
for (int i = 0; i < 4; i++) {
std::list<ze_event_pool_handle_t> *deviceZeEventPoolCache =
new std::list<ze_event_pool_handle_t>;
ZeEventPoolCache.push_back(deviceZeEventPoolCache);
}
}
if (HostVisible) {
if (ZeDevice) {
auto ZeEventPoolCacheMap = WithProfiling
? &ZeEventPoolCacheDeviceMap[0]
: &ZeEventPoolCacheDeviceMap[1];
if (ZeEventPoolCacheMap->find(ZeDevice) == ZeEventPoolCacheMap->end()) {
ZeEventPoolCache.emplace_back();
(*ZeEventPoolCacheMap)[ZeDevice] = &ZeEventPoolCache.back();
std::list<ze_event_pool_handle_t> *deviceZeEventPoolCache =
new std::list<ze_event_pool_handle_t>;
ZeEventPoolCache.push_back(deviceZeEventPoolCache);
(*ZeEventPoolCacheMap)[ZeDevice] = deviceZeEventPoolCache;
}
return (*ZeEventPoolCacheMap)[ZeDevice];
} else {
return WithProfiling ? &ZeEventPoolCache[0] : &ZeEventPoolCache[1];
return WithProfiling ? ZeEventPoolCache[0] : ZeEventPoolCache[1];
}
} else {
if (ZeDevice) {
auto ZeEventPoolCacheMap = WithProfiling
? &ZeEventPoolCacheDeviceMap[2]
: &ZeEventPoolCacheDeviceMap[3];
if (ZeEventPoolCacheMap->find(ZeDevice) == ZeEventPoolCacheMap->end()) {
ZeEventPoolCache.emplace_back();
(*ZeEventPoolCacheMap)[ZeDevice] = &ZeEventPoolCache.back();
std::list<ze_event_pool_handle_t> *deviceZeEventPoolCache =
new std::list<ze_event_pool_handle_t>;
ZeEventPoolCache.push_back(deviceZeEventPoolCache);
(*ZeEventPoolCacheMap)[ZeDevice] = deviceZeEventPoolCache;
}
return (*ZeEventPoolCacheMap)[ZeDevice];
} else {
return WithProfiling ? &ZeEventPoolCache[2] : &ZeEventPoolCache[3];
return WithProfiling ? ZeEventPoolCache[2] : ZeEventPoolCache[3];
}
}
}
Expand Down Expand Up @@ -274,29 +287,42 @@ struct ur_context_handle_t_ : _ur_object {
// Get the cache of events for a provided scope and profiling mode.
auto getEventCache(bool HostVisible, bool WithProfiling,
ur_device_handle_t Device) {
// Adding 4 initial global caches for provided scope and profiling modes:
// Host Scope, Device Scope, with Profiling, without Profiling.
if (EventCaches.empty()) {
for (int i = 0; i < 4; i++) {
std::list<ur_event_handle_t> *deviceEventCache =
new std::list<ur_event_handle_t>;
EventCaches.push_back(deviceEventCache);
}
}
if (HostVisible) {
if (Device) {
auto EventCachesMap =
WithProfiling ? &EventCachesDeviceMap[0] : &EventCachesDeviceMap[1];
if (EventCachesMap->find(Device) == EventCachesMap->end()) {
EventCaches.emplace_back();
(*EventCachesMap)[Device] = &EventCaches.back();
std::list<ur_event_handle_t> *deviceEventCache =
new std::list<ur_event_handle_t>;
EventCaches.push_back(deviceEventCache);
(*EventCachesMap)[Device] = deviceEventCache;
}
return (*EventCachesMap)[Device];
} else {
return WithProfiling ? &EventCaches[0] : &EventCaches[1];
return WithProfiling ? EventCaches[0] : EventCaches[1];
}
} else {
if (Device) {
auto EventCachesMap =
WithProfiling ? &EventCachesDeviceMap[2] : &EventCachesDeviceMap[3];
if (EventCachesMap->find(Device) == EventCachesMap->end()) {
EventCaches.emplace_back();
(*EventCachesMap)[Device] = &EventCaches.back();
std::list<ur_event_handle_t> *deviceEventCache =
new std::list<ur_event_handle_t>;
EventCaches.push_back(deviceEventCache);
(*EventCachesMap)[Device] = deviceEventCache;
}
return (*EventCachesMap)[Device];
} else {
return WithProfiling ? &EventCaches[2] : &EventCaches[3];
return WithProfiling ? EventCaches[2] : EventCaches[3];
}
}
}
Expand Down
37 changes: 31 additions & 6 deletions source/adapters/level_zero/queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1267,17 +1267,28 @@ ur_queue_handle_t_::resetDiscardedEvent(ur_command_list_ptr_t CommandList) {
}

ur_result_t ur_queue_handle_t_::addEventToQueueCache(ur_event_handle_t Event) {
// Adding 2 initial global caches for provided scope:
// Host Scope, Device Scope.
if (EventCaches.empty()) {
for (int i = 0; i < 2; i++) {
std::list<ur_event_handle_t> *deviceEventCache =
new std::list<ur_event_handle_t>;
EventCaches.push_back(deviceEventCache);
}
}
if (!Event->IsMultiDevice && Event->UrQueue) {
auto Device = Event->UrQueue->Device;
auto EventCachesMap = Event->isHostVisible() ? &EventCachesDeviceMap[0]
: &EventCachesDeviceMap[1];
if (EventCachesMap->find(Device) == EventCachesMap->end()) {
EventCaches.emplace_back();
(*EventCachesMap)[Device] = &EventCaches.back();
std::list<ur_event_handle_t> *deviceEventCache =
new std::list<ur_event_handle_t>;
EventCaches.push_back(deviceEventCache);
(*EventCachesMap)[Device] = deviceEventCache;
}
(*EventCachesMap)[Device]->emplace_back(Event);
} else {
auto Cache = Event->isHostVisible() ? &EventCaches[0] : &EventCaches[1];
auto Cache = Event->isHostVisible() ? EventCaches[0] : EventCaches[1];
Cache->emplace_back(Event);
}
return UR_RESULT_SUCCESS;
Expand All @@ -1301,9 +1312,13 @@ ur_result_t urQueueReleaseInternal(ur_queue_handle_t Queue) {
if (!UrQueue->RefCount.decrementAndTest())
return UR_RESULT_SUCCESS;

for (auto &Cache : UrQueue->EventCaches)
for (auto &Event : Cache)
for (auto Cache : UrQueue->EventCaches) {
for (auto Event : *Cache) {
UR_CALL(urEventReleaseInternal(Event));
}
Cache->clear();
delete Cache;
}

if (UrQueue->OwnZeCommandQueue) {
for (auto &QueueMap :
Expand Down Expand Up @@ -1460,6 +1475,16 @@ ur_event_handle_t ur_queue_handle_t_::getEventFromQueueCache(bool IsMultiDevice,
bool HostVisible) {
std::list<ur_event_handle_t> *Cache;

// Adding 2 initial global caches for provided scope:
// Host Scope, Device Scope.
if (EventCaches.empty()) {
for (int i = 0; i < 2; i++) {
std::list<ur_event_handle_t> *deviceEventCache =
new std::list<ur_event_handle_t>;
EventCaches.push_back(deviceEventCache);
}
}

if (!IsMultiDevice) {
auto Device = this->Device;
Cache = HostVisible ? EventCachesDeviceMap[0][Device]
Expand All @@ -1468,7 +1493,7 @@ ur_event_handle_t ur_queue_handle_t_::getEventFromQueueCache(bool IsMultiDevice,
return nullptr;
}
} else {
Cache = HostVisible ? &EventCaches[0] : &EventCaches[1];
Cache = HostVisible ? EventCaches[0] : EventCaches[1];
}

// If we don't have any events, return nullptr.
Expand Down
2 changes: 1 addition & 1 deletion source/adapters/level_zero/queue.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ struct ur_queue_handle_t_ : _ur_object {
// requested type of event. Each list contains events which can be reused
// inside all command lists in the queue as described in the 2-event model.
// Leftover events in the cache are relased at the queue destruction.
std::vector<std::list<ur_event_handle_t>> EventCaches{2};
std::vector<std::list<ur_event_handle_t> *> EventCaches;
std::vector<
std::unordered_map<ur_device_handle_t, std::list<ur_event_handle_t> *>>
EventCachesDeviceMap{2};
Expand Down

0 comments on commit b4150ad

Please sign in to comment.