From 2aa10ddf6aeaf07ce6b498ff000e7e3aed1b3146 Mon Sep 17 00:00:00 2001 From: Piotr Balcer Date: Wed, 28 Feb 2024 13:49:23 +0000 Subject: [PATCH 1/2] Revert "[L0] Fix the multi device event cache to allocate lists as pointers" This reverts commit 123c00f129e19db34c987990f13c336e5bef2db1. --- source/adapters/level_zero/context.cpp | 14 +++---- source/adapters/level_zero/context.hpp | 54 +++++++------------------- source/adapters/level_zero/queue.cpp | 37 +++--------------- source/adapters/level_zero/queue.hpp | 2 +- 4 files changed, 27 insertions(+), 80 deletions(-) diff --git a/source/adapters/level_zero/context.cpp b/source/adapters/level_zero/context.cpp index a094ace868..c4b5423adb 100644 --- a/source/adapters/level_zero/context.cpp +++ b/source/adapters/level_zero/context.cpp @@ -395,29 +395,27 @@ ur_result_t ur_context_handle_t_::finalize() { if (!DisableEventsCaching) { std::scoped_lock Lock(EventCacheMutex); - for (auto EventCache : EventCaches) { - for (auto Event : *EventCache) { + for (auto &EventCache : EventCaches) { + for (auto &Event : EventCache) { auto ZeResult = ZE_CALL_NOCHECK(zeEventDestroy, (Event->ZeEvent)); // Gracefully handle the case that L0 was already unloaded. if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED) return ze2urResult(ZeResult); delete Event; } - EventCache->clear(); - delete EventCache; + EventCache.clear(); } } { std::scoped_lock Lock(ZeEventPoolCacheMutex); - for (auto ZePoolCache : ZeEventPoolCache) { - for (auto ZePool : *ZePoolCache) { + for (auto &ZePoolCache : ZeEventPoolCache) { + for (auto &ZePool : ZePoolCache) { auto ZeResult = ZE_CALL_NOCHECK(zeEventPoolDestroy, (ZePool)); // Gracefully handle the case that L0 was already unloaded. if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED) return ze2urResult(ZeResult); } - ZePoolCache->clear(); - delete ZePoolCache; + ZePoolCache.clear(); } } diff --git a/source/adapters/level_zero/context.hpp b/source/adapters/level_zero/context.hpp index 09605c0643..2c80ff0e33 100644 --- a/source/adapters/level_zero/context.hpp +++ b/source/adapters/level_zero/context.hpp @@ -141,7 +141,7 @@ struct ur_context_handle_t_ : _ur_object { // head. // // Cache of event pools to which host-visible events are added to. - std::vector *> ZeEventPoolCache; + std::vector> ZeEventPoolCache{4}; std::vector *>> ZeEventPoolCacheDeviceMap{4}; @@ -165,7 +165,7 @@ struct ur_context_handle_t_ : _ur_object { ur_mutex EventCacheMutex; // Caches for events. - std::vector *> EventCaches; + std::vector> EventCaches{4}; std::vector< std::unordered_map *>> EventCachesDeviceMap{4}; @@ -207,29 +207,18 @@ struct ur_context_handle_t_ : _ur_object { auto getZeEventPoolCache(bool HostVisible, bool WithProfiling, ze_device_handle_t ZeDevice) { - // Adding 4 initial global caches for provided scope and profiling modes: - // Host Scope, Device Scope, with Profiling, without Profiling. - if (ZeEventPoolCache.empty()) { - for (int i = 0; i < 4; i++) { - std::list *deviceZeEventPoolCache = - new std::list; - ZeEventPoolCache.push_back(deviceZeEventPoolCache); - } - } if (HostVisible) { if (ZeDevice) { auto ZeEventPoolCacheMap = WithProfiling ? &ZeEventPoolCacheDeviceMap[0] : &ZeEventPoolCacheDeviceMap[1]; if (ZeEventPoolCacheMap->find(ZeDevice) == ZeEventPoolCacheMap->end()) { - std::list *deviceZeEventPoolCache = - new std::list; - ZeEventPoolCache.push_back(deviceZeEventPoolCache); - (*ZeEventPoolCacheMap)[ZeDevice] = deviceZeEventPoolCache; + ZeEventPoolCache.emplace_back(); + (*ZeEventPoolCacheMap)[ZeDevice] = &ZeEventPoolCache.back(); } return (*ZeEventPoolCacheMap)[ZeDevice]; } else { - return WithProfiling ? ZeEventPoolCache[0] : ZeEventPoolCache[1]; + return WithProfiling ? &ZeEventPoolCache[0] : &ZeEventPoolCache[1]; } } else { if (ZeDevice) { @@ -237,14 +226,12 @@ struct ur_context_handle_t_ : _ur_object { ? &ZeEventPoolCacheDeviceMap[2] : &ZeEventPoolCacheDeviceMap[3]; if (ZeEventPoolCacheMap->find(ZeDevice) == ZeEventPoolCacheMap->end()) { - std::list *deviceZeEventPoolCache = - new std::list; - ZeEventPoolCache.push_back(deviceZeEventPoolCache); - (*ZeEventPoolCacheMap)[ZeDevice] = deviceZeEventPoolCache; + ZeEventPoolCache.emplace_back(); + (*ZeEventPoolCacheMap)[ZeDevice] = &ZeEventPoolCache.back(); } return (*ZeEventPoolCacheMap)[ZeDevice]; } else { - return WithProfiling ? ZeEventPoolCache[2] : ZeEventPoolCache[3]; + return WithProfiling ? &ZeEventPoolCache[2] : &ZeEventPoolCache[3]; } } } @@ -287,42 +274,29 @@ struct ur_context_handle_t_ : _ur_object { // Get the cache of events for a provided scope and profiling mode. auto getEventCache(bool HostVisible, bool WithProfiling, ur_device_handle_t Device) { - // Adding 4 initial global caches for provided scope and profiling modes: - // Host Scope, Device Scope, with Profiling, without Profiling. - if (EventCaches.empty()) { - for (int i = 0; i < 4; i++) { - std::list *deviceEventCache = - new std::list; - EventCaches.push_back(deviceEventCache); - } - } if (HostVisible) { if (Device) { auto EventCachesMap = WithProfiling ? &EventCachesDeviceMap[0] : &EventCachesDeviceMap[1]; if (EventCachesMap->find(Device) == EventCachesMap->end()) { - std::list *deviceEventCache = - new std::list; - EventCaches.push_back(deviceEventCache); - (*EventCachesMap)[Device] = deviceEventCache; + EventCaches.emplace_back(); + (*EventCachesMap)[Device] = &EventCaches.back(); } return (*EventCachesMap)[Device]; } else { - return WithProfiling ? EventCaches[0] : EventCaches[1]; + return WithProfiling ? &EventCaches[0] : &EventCaches[1]; } } else { if (Device) { auto EventCachesMap = WithProfiling ? &EventCachesDeviceMap[2] : &EventCachesDeviceMap[3]; if (EventCachesMap->find(Device) == EventCachesMap->end()) { - std::list *deviceEventCache = - new std::list; - EventCaches.push_back(deviceEventCache); - (*EventCachesMap)[Device] = deviceEventCache; + EventCaches.emplace_back(); + (*EventCachesMap)[Device] = &EventCaches.back(); } return (*EventCachesMap)[Device]; } else { - return WithProfiling ? EventCaches[2] : EventCaches[3]; + return WithProfiling ? &EventCaches[2] : &EventCaches[3]; } } } diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index 8a9f36a432..241e3a23a2 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -1267,28 +1267,17 @@ ur_queue_handle_t_::resetDiscardedEvent(ur_command_list_ptr_t CommandList) { } ur_result_t ur_queue_handle_t_::addEventToQueueCache(ur_event_handle_t Event) { - // Adding 2 initial global caches for provided scope: - // Host Scope, Device Scope. - if (EventCaches.empty()) { - for (int i = 0; i < 2; i++) { - std::list *deviceEventCache = - new std::list; - EventCaches.push_back(deviceEventCache); - } - } if (!Event->IsMultiDevice && Event->UrQueue) { auto Device = Event->UrQueue->Device; auto EventCachesMap = Event->isHostVisible() ? &EventCachesDeviceMap[0] : &EventCachesDeviceMap[1]; if (EventCachesMap->find(Device) == EventCachesMap->end()) { - std::list *deviceEventCache = - new std::list; - EventCaches.push_back(deviceEventCache); - (*EventCachesMap)[Device] = deviceEventCache; + EventCaches.emplace_back(); + (*EventCachesMap)[Device] = &EventCaches.back(); } (*EventCachesMap)[Device]->emplace_back(Event); } else { - auto Cache = Event->isHostVisible() ? EventCaches[0] : EventCaches[1]; + auto Cache = Event->isHostVisible() ? &EventCaches[0] : &EventCaches[1]; Cache->emplace_back(Event); } return UR_RESULT_SUCCESS; @@ -1312,13 +1301,9 @@ ur_result_t urQueueReleaseInternal(ur_queue_handle_t Queue) { if (!UrQueue->RefCount.decrementAndTest()) return UR_RESULT_SUCCESS; - for (auto Cache : UrQueue->EventCaches) { - for (auto Event : *Cache) { + for (auto &Cache : UrQueue->EventCaches) + for (auto &Event : Cache) UR_CALL(urEventReleaseInternal(Event)); - } - Cache->clear(); - delete Cache; - } if (UrQueue->OwnZeCommandQueue) { for (auto &QueueMap : @@ -1475,16 +1460,6 @@ ur_event_handle_t ur_queue_handle_t_::getEventFromQueueCache(bool IsMultiDevice, bool HostVisible) { std::list *Cache; - // Adding 2 initial global caches for provided scope: - // Host Scope, Device Scope. - if (EventCaches.empty()) { - for (int i = 0; i < 2; i++) { - std::list *deviceEventCache = - new std::list; - EventCaches.push_back(deviceEventCache); - } - } - if (!IsMultiDevice) { auto Device = this->Device; Cache = HostVisible ? EventCachesDeviceMap[0][Device] @@ -1493,7 +1468,7 @@ ur_event_handle_t ur_queue_handle_t_::getEventFromQueueCache(bool IsMultiDevice, return nullptr; } } else { - Cache = HostVisible ? EventCaches[0] : EventCaches[1]; + Cache = HostVisible ? &EventCaches[0] : &EventCaches[1]; } // If we don't have any events, return nullptr. diff --git a/source/adapters/level_zero/queue.hpp b/source/adapters/level_zero/queue.hpp index b437a87fd9..b255e5963e 100644 --- a/source/adapters/level_zero/queue.hpp +++ b/source/adapters/level_zero/queue.hpp @@ -342,7 +342,7 @@ struct ur_queue_handle_t_ : _ur_object { // requested type of event. Each list contains events which can be reused // inside all command lists in the queue as described in the 2-event model. // Leftover events in the cache are relased at the queue destruction. - std::vector *> EventCaches; + std::vector> EventCaches{2}; std::vector< std::unordered_map *>> EventCachesDeviceMap{2}; From 8a5d6d397cf036b832ff9e909dbfb5d269e57212 Mon Sep 17 00:00:00 2001 From: Piotr Balcer Date: Wed, 28 Feb 2024 14:33:26 +0000 Subject: [PATCH 2/2] [L0] avoid using pointers into event cache related vectors --- source/adapters/level_zero/context.hpp | 34 ++++++++++++++------------ source/adapters/level_zero/queue.cpp | 12 +++++---- source/adapters/level_zero/queue.hpp | 3 +-- 3 files changed, 27 insertions(+), 22 deletions(-) diff --git a/source/adapters/level_zero/context.hpp b/source/adapters/level_zero/context.hpp index 2c80ff0e33..8cb1d5369f 100644 --- a/source/adapters/level_zero/context.hpp +++ b/source/adapters/level_zero/context.hpp @@ -142,8 +142,7 @@ struct ur_context_handle_t_ : _ur_object { // // Cache of event pools to which host-visible events are added to. std::vector> ZeEventPoolCache{4}; - std::vector *>> + std::vector> ZeEventPoolCacheDeviceMap{4}; // This map will be used to determine if a pool is full or not @@ -165,9 +164,9 @@ struct ur_context_handle_t_ : _ur_object { ur_mutex EventCacheMutex; // Caches for events. - std::vector> EventCaches{4}; - std::vector< - std::unordered_map *>> + using EventCache = std::vector>; + EventCache EventCaches{4}; + std::vector> EventCachesDeviceMap{4}; // Initialize the PI context. @@ -205,8 +204,9 @@ struct ur_context_handle_t_ : _ur_object { // Add ur_event_handle_t to cache. void addEventToContextCache(ur_event_handle_t); - auto getZeEventPoolCache(bool HostVisible, bool WithProfiling, - ze_device_handle_t ZeDevice) { + std::list * + getZeEventPoolCache(bool HostVisible, bool WithProfiling, + ze_device_handle_t ZeDevice) { if (HostVisible) { if (ZeDevice) { auto ZeEventPoolCacheMap = WithProfiling @@ -214,9 +214,10 @@ struct ur_context_handle_t_ : _ur_object { : &ZeEventPoolCacheDeviceMap[1]; if (ZeEventPoolCacheMap->find(ZeDevice) == ZeEventPoolCacheMap->end()) { ZeEventPoolCache.emplace_back(); - (*ZeEventPoolCacheMap)[ZeDevice] = &ZeEventPoolCache.back(); + ZeEventPoolCacheMap->insert( + std::make_pair(ZeDevice, ZeEventPoolCache.size() - 1)); } - return (*ZeEventPoolCacheMap)[ZeDevice]; + return &ZeEventPoolCache[(*ZeEventPoolCacheMap)[ZeDevice]]; } else { return WithProfiling ? &ZeEventPoolCache[0] : &ZeEventPoolCache[1]; } @@ -227,9 +228,10 @@ struct ur_context_handle_t_ : _ur_object { : &ZeEventPoolCacheDeviceMap[3]; if (ZeEventPoolCacheMap->find(ZeDevice) == ZeEventPoolCacheMap->end()) { ZeEventPoolCache.emplace_back(); - (*ZeEventPoolCacheMap)[ZeDevice] = &ZeEventPoolCache.back(); + ZeEventPoolCacheMap->insert( + std::make_pair(ZeDevice, ZeEventPoolCache.size() - 1)); } - return (*ZeEventPoolCacheMap)[ZeDevice]; + return &ZeEventPoolCache[(*ZeEventPoolCacheMap)[ZeDevice]]; } else { return WithProfiling ? &ZeEventPoolCache[2] : &ZeEventPoolCache[3]; } @@ -280,9 +282,10 @@ struct ur_context_handle_t_ : _ur_object { WithProfiling ? &EventCachesDeviceMap[0] : &EventCachesDeviceMap[1]; if (EventCachesMap->find(Device) == EventCachesMap->end()) { EventCaches.emplace_back(); - (*EventCachesMap)[Device] = &EventCaches.back(); + EventCachesMap->insert( + std::make_pair(Device, EventCaches.size() - 1)); } - return (*EventCachesMap)[Device]; + return &EventCaches[(*EventCachesMap)[Device]]; } else { return WithProfiling ? &EventCaches[0] : &EventCaches[1]; } @@ -292,9 +295,10 @@ struct ur_context_handle_t_ : _ur_object { WithProfiling ? &EventCachesDeviceMap[2] : &EventCachesDeviceMap[3]; if (EventCachesMap->find(Device) == EventCachesMap->end()) { EventCaches.emplace_back(); - (*EventCachesMap)[Device] = &EventCaches.back(); + EventCachesMap->insert( + std::make_pair(Device, EventCaches.size() - 1)); } - return (*EventCachesMap)[Device]; + return &EventCaches[(*EventCachesMap)[Device]]; } else { return WithProfiling ? &EventCaches[2] : &EventCaches[3]; } diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index 241e3a23a2..2009c3c6f5 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -1273,9 +1273,9 @@ ur_result_t ur_queue_handle_t_::addEventToQueueCache(ur_event_handle_t Event) { : &EventCachesDeviceMap[1]; if (EventCachesMap->find(Device) == EventCachesMap->end()) { EventCaches.emplace_back(); - (*EventCachesMap)[Device] = &EventCaches.back(); + EventCachesMap->insert(std::make_pair(Device, EventCaches.size() - 1)); } - (*EventCachesMap)[Device]->emplace_back(Event); + EventCaches[EventCachesMap->at(Device)].emplace_back(Event); } else { auto Cache = Event->isHostVisible() ? &EventCaches[0] : &EventCaches[1]; Cache->emplace_back(Event); @@ -1301,9 +1301,11 @@ ur_result_t urQueueReleaseInternal(ur_queue_handle_t Queue) { if (!UrQueue->RefCount.decrementAndTest()) return UR_RESULT_SUCCESS; - for (auto &Cache : UrQueue->EventCaches) + for (auto &Cache : UrQueue->EventCaches) { for (auto &Event : Cache) UR_CALL(urEventReleaseInternal(Event)); + Cache.clear(); + } if (UrQueue->OwnZeCommandQueue) { for (auto &QueueMap : @@ -1462,8 +1464,8 @@ ur_event_handle_t ur_queue_handle_t_::getEventFromQueueCache(bool IsMultiDevice, if (!IsMultiDevice) { auto Device = this->Device; - Cache = HostVisible ? EventCachesDeviceMap[0][Device] - : EventCachesDeviceMap[1][Device]; + Cache = HostVisible ? &EventCaches[EventCachesDeviceMap[0][Device]] + : &EventCaches[EventCachesDeviceMap[1][Device]]; if (!Cache) { return nullptr; } diff --git a/source/adapters/level_zero/queue.hpp b/source/adapters/level_zero/queue.hpp index b255e5963e..06751e03c1 100644 --- a/source/adapters/level_zero/queue.hpp +++ b/source/adapters/level_zero/queue.hpp @@ -343,8 +343,7 @@ struct ur_queue_handle_t_ : _ur_object { // inside all command lists in the queue as described in the 2-event model. // Leftover events in the cache are relased at the queue destruction. std::vector> EventCaches{2}; - std::vector< - std::unordered_map *>> + std::vector> EventCachesDeviceMap{2}; // adjust the queue's batch size, knowing that the current command list