diff --git a/source/adapters/level_zero/context.cpp b/source/adapters/level_zero/context.cpp index c4b5423adb..a094ace868 100644 --- a/source/adapters/level_zero/context.cpp +++ b/source/adapters/level_zero/context.cpp @@ -395,27 +395,29 @@ ur_result_t ur_context_handle_t_::finalize() { if (!DisableEventsCaching) { std::scoped_lock Lock(EventCacheMutex); - for (auto &EventCache : EventCaches) { - for (auto &Event : EventCache) { + for (auto EventCache : EventCaches) { + for (auto Event : *EventCache) { auto ZeResult = ZE_CALL_NOCHECK(zeEventDestroy, (Event->ZeEvent)); // Gracefully handle the case that L0 was already unloaded. if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED) return ze2urResult(ZeResult); delete Event; } - EventCache.clear(); + EventCache->clear(); + delete EventCache; } } { std::scoped_lock Lock(ZeEventPoolCacheMutex); - for (auto &ZePoolCache : ZeEventPoolCache) { - for (auto &ZePool : ZePoolCache) { + for (auto ZePoolCache : ZeEventPoolCache) { + for (auto ZePool : *ZePoolCache) { auto ZeResult = ZE_CALL_NOCHECK(zeEventPoolDestroy, (ZePool)); // Gracefully handle the case that L0 was already unloaded. if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED) return ze2urResult(ZeResult); } - ZePoolCache.clear(); + ZePoolCache->clear(); + delete ZePoolCache; } } diff --git a/source/adapters/level_zero/context.hpp b/source/adapters/level_zero/context.hpp index 2c80ff0e33..09605c0643 100644 --- a/source/adapters/level_zero/context.hpp +++ b/source/adapters/level_zero/context.hpp @@ -141,7 +141,7 @@ struct ur_context_handle_t_ : _ur_object { // head. // // Cache of event pools to which host-visible events are added to. - std::vector> ZeEventPoolCache{4}; + std::vector *> ZeEventPoolCache; std::vector *>> ZeEventPoolCacheDeviceMap{4}; @@ -165,7 +165,7 @@ struct ur_context_handle_t_ : _ur_object { ur_mutex EventCacheMutex; // Caches for events. - std::vector> EventCaches{4}; + std::vector *> EventCaches; std::vector< std::unordered_map *>> EventCachesDeviceMap{4}; @@ -207,18 +207,29 @@ struct ur_context_handle_t_ : _ur_object { auto getZeEventPoolCache(bool HostVisible, bool WithProfiling, ze_device_handle_t ZeDevice) { + // Adding 4 initial global caches for provided scope and profiling modes: + // Host Scope, Device Scope, with Profiling, without Profiling. + if (ZeEventPoolCache.empty()) { + for (int i = 0; i < 4; i++) { + std::list *deviceZeEventPoolCache = + new std::list; + ZeEventPoolCache.push_back(deviceZeEventPoolCache); + } + } if (HostVisible) { if (ZeDevice) { auto ZeEventPoolCacheMap = WithProfiling ? &ZeEventPoolCacheDeviceMap[0] : &ZeEventPoolCacheDeviceMap[1]; if (ZeEventPoolCacheMap->find(ZeDevice) == ZeEventPoolCacheMap->end()) { - ZeEventPoolCache.emplace_back(); - (*ZeEventPoolCacheMap)[ZeDevice] = &ZeEventPoolCache.back(); + std::list *deviceZeEventPoolCache = + new std::list; + ZeEventPoolCache.push_back(deviceZeEventPoolCache); + (*ZeEventPoolCacheMap)[ZeDevice] = deviceZeEventPoolCache; } return (*ZeEventPoolCacheMap)[ZeDevice]; } else { - return WithProfiling ? &ZeEventPoolCache[0] : &ZeEventPoolCache[1]; + return WithProfiling ? ZeEventPoolCache[0] : ZeEventPoolCache[1]; } } else { if (ZeDevice) { @@ -226,12 +237,14 @@ struct ur_context_handle_t_ : _ur_object { ? &ZeEventPoolCacheDeviceMap[2] : &ZeEventPoolCacheDeviceMap[3]; if (ZeEventPoolCacheMap->find(ZeDevice) == ZeEventPoolCacheMap->end()) { - ZeEventPoolCache.emplace_back(); - (*ZeEventPoolCacheMap)[ZeDevice] = &ZeEventPoolCache.back(); + std::list *deviceZeEventPoolCache = + new std::list; + ZeEventPoolCache.push_back(deviceZeEventPoolCache); + (*ZeEventPoolCacheMap)[ZeDevice] = deviceZeEventPoolCache; } return (*ZeEventPoolCacheMap)[ZeDevice]; } else { - return WithProfiling ? &ZeEventPoolCache[2] : &ZeEventPoolCache[3]; + return WithProfiling ? ZeEventPoolCache[2] : ZeEventPoolCache[3]; } } } @@ -274,29 +287,42 @@ struct ur_context_handle_t_ : _ur_object { // Get the cache of events for a provided scope and profiling mode. auto getEventCache(bool HostVisible, bool WithProfiling, ur_device_handle_t Device) { + // Adding 4 initial global caches for provided scope and profiling modes: + // Host Scope, Device Scope, with Profiling, without Profiling. + if (EventCaches.empty()) { + for (int i = 0; i < 4; i++) { + std::list *deviceEventCache = + new std::list; + EventCaches.push_back(deviceEventCache); + } + } if (HostVisible) { if (Device) { auto EventCachesMap = WithProfiling ? &EventCachesDeviceMap[0] : &EventCachesDeviceMap[1]; if (EventCachesMap->find(Device) == EventCachesMap->end()) { - EventCaches.emplace_back(); - (*EventCachesMap)[Device] = &EventCaches.back(); + std::list *deviceEventCache = + new std::list; + EventCaches.push_back(deviceEventCache); + (*EventCachesMap)[Device] = deviceEventCache; } return (*EventCachesMap)[Device]; } else { - return WithProfiling ? &EventCaches[0] : &EventCaches[1]; + return WithProfiling ? EventCaches[0] : EventCaches[1]; } } else { if (Device) { auto EventCachesMap = WithProfiling ? &EventCachesDeviceMap[2] : &EventCachesDeviceMap[3]; if (EventCachesMap->find(Device) == EventCachesMap->end()) { - EventCaches.emplace_back(); - (*EventCachesMap)[Device] = &EventCaches.back(); + std::list *deviceEventCache = + new std::list; + EventCaches.push_back(deviceEventCache); + (*EventCachesMap)[Device] = deviceEventCache; } return (*EventCachesMap)[Device]; } else { - return WithProfiling ? &EventCaches[2] : &EventCaches[3]; + return WithProfiling ? EventCaches[2] : EventCaches[3]; } } } diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index 241e3a23a2..8a9f36a432 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -1267,17 +1267,28 @@ ur_queue_handle_t_::resetDiscardedEvent(ur_command_list_ptr_t CommandList) { } ur_result_t ur_queue_handle_t_::addEventToQueueCache(ur_event_handle_t Event) { + // Adding 2 initial global caches for provided scope: + // Host Scope, Device Scope. + if (EventCaches.empty()) { + for (int i = 0; i < 2; i++) { + std::list *deviceEventCache = + new std::list; + EventCaches.push_back(deviceEventCache); + } + } if (!Event->IsMultiDevice && Event->UrQueue) { auto Device = Event->UrQueue->Device; auto EventCachesMap = Event->isHostVisible() ? &EventCachesDeviceMap[0] : &EventCachesDeviceMap[1]; if (EventCachesMap->find(Device) == EventCachesMap->end()) { - EventCaches.emplace_back(); - (*EventCachesMap)[Device] = &EventCaches.back(); + std::list *deviceEventCache = + new std::list; + EventCaches.push_back(deviceEventCache); + (*EventCachesMap)[Device] = deviceEventCache; } (*EventCachesMap)[Device]->emplace_back(Event); } else { - auto Cache = Event->isHostVisible() ? &EventCaches[0] : &EventCaches[1]; + auto Cache = Event->isHostVisible() ? EventCaches[0] : EventCaches[1]; Cache->emplace_back(Event); } return UR_RESULT_SUCCESS; @@ -1301,9 +1312,13 @@ ur_result_t urQueueReleaseInternal(ur_queue_handle_t Queue) { if (!UrQueue->RefCount.decrementAndTest()) return UR_RESULT_SUCCESS; - for (auto &Cache : UrQueue->EventCaches) - for (auto &Event : Cache) + for (auto Cache : UrQueue->EventCaches) { + for (auto Event : *Cache) { UR_CALL(urEventReleaseInternal(Event)); + } + Cache->clear(); + delete Cache; + } if (UrQueue->OwnZeCommandQueue) { for (auto &QueueMap : @@ -1460,6 +1475,16 @@ ur_event_handle_t ur_queue_handle_t_::getEventFromQueueCache(bool IsMultiDevice, bool HostVisible) { std::list *Cache; + // Adding 2 initial global caches for provided scope: + // Host Scope, Device Scope. + if (EventCaches.empty()) { + for (int i = 0; i < 2; i++) { + std::list *deviceEventCache = + new std::list; + EventCaches.push_back(deviceEventCache); + } + } + if (!IsMultiDevice) { auto Device = this->Device; Cache = HostVisible ? EventCachesDeviceMap[0][Device] @@ -1468,7 +1493,7 @@ ur_event_handle_t ur_queue_handle_t_::getEventFromQueueCache(bool IsMultiDevice, return nullptr; } } else { - Cache = HostVisible ? &EventCaches[0] : &EventCaches[1]; + Cache = HostVisible ? EventCaches[0] : EventCaches[1]; } // If we don't have any events, return nullptr. diff --git a/source/adapters/level_zero/queue.hpp b/source/adapters/level_zero/queue.hpp index b255e5963e..b437a87fd9 100644 --- a/source/adapters/level_zero/queue.hpp +++ b/source/adapters/level_zero/queue.hpp @@ -342,7 +342,7 @@ struct ur_queue_handle_t_ : _ur_object { // requested type of event. Each list contains events which can be reused // inside all command lists in the queue as described in the 2-event model. // Leftover events in the cache are relased at the queue destruction. - std::vector> EventCaches{2}; + std::vector *> EventCaches; std::vector< std::unordered_map *>> EventCachesDeviceMap{2};