Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Candidate for v0.8.13 release tag #1422

Merged
merged 3 commits into from
Mar 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

cmake_minimum_required(VERSION 3.14.0 FATAL_ERROR)
project(unified-runtime VERSION 0.8.12)
project(unified-runtime VERSION 0.8.13)

include(GNUInstallDirs)
include(CheckCXXSourceCompiles)
Expand Down
9 changes: 8 additions & 1 deletion source/adapters/level_zero/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2022 Intel Corporation
# Copyright (C) 2022-2024 Intel Corporation
# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
# See LICENSE.TXT
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Expand Down Expand Up @@ -119,6 +119,13 @@ add_ur_adapter(${TARGET_NAME}
${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp
)

if(NOT WIN32)
target_sources(ur_adapter_level_zero
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/adapter_lib_init_linux.cpp
)
endif()

# TODO: fix level_zero adapter conversion warnings
target_compile_options(${TARGET_NAME} PRIVATE
$<$<CXX_COMPILER_ID:MSVC>:/wd4805 /wd4244>
Expand Down
72 changes: 53 additions & 19 deletions source/adapters/level_zero/adapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,14 @@
#include "adapter.hpp"
#include "ur_level_zero.hpp"

// Due to multiple DLLMain definitions with SYCL, Global Adapter is init at
// variable creation.
#if defined(_WIN32)
ur_adapter_handle_t_ *GlobalAdapter = new ur_adapter_handle_t_();
#else
ur_adapter_handle_t_ *GlobalAdapter;
#endif

UR_APIEXPORT ur_result_t UR_APICALL
urInit(ur_device_init_flags_t
DeviceFlags, ///< [in] device initialization flags.
Expand Down Expand Up @@ -48,8 +56,7 @@ ur_result_t initPlatforms(PlatformVec &platforms) noexcept try {
ur_result_t adapterStateInit() { return UR_RESULT_SUCCESS; }

ur_adapter_handle_t_::ur_adapter_handle_t_() {

Adapter.PlatformCache.Compute = [](Result<PlatformVec> &result) {
PlatformCache.Compute = [](Result<PlatformVec> &result) {
static std::once_flag ZeCallCountInitialized;
try {
std::call_once(ZeCallCountInitialized, []() {
Expand All @@ -63,7 +70,7 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() {
}

// initialize level zero only once.
if (Adapter.ZeResult == std::nullopt) {
if (GlobalAdapter->ZeResult == std::nullopt) {
// Setting these environment variables before running zeInit will enable
// the validation layer in the Level Zero loader.
if (UrL0Debug & UR_L0_DEBUG_VALIDATION) {
Expand All @@ -82,20 +89,21 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() {
// We must only initialize the driver once, even if urPlatformGet() is
// called multiple times. Declaring the return value as "static" ensures
// it's only called once.
Adapter.ZeResult = ZE_CALL_NOCHECK(zeInit, (ZE_INIT_FLAG_GPU_ONLY));
GlobalAdapter->ZeResult =
ZE_CALL_NOCHECK(zeInit, (ZE_INIT_FLAG_GPU_ONLY));
}
assert(Adapter.ZeResult !=
assert(GlobalAdapter->ZeResult !=
std::nullopt); // verify that level-zero is initialized
PlatformVec platforms;

// Absorb the ZE_RESULT_ERROR_UNINITIALIZED and just return 0 Platforms.
if (*Adapter.ZeResult == ZE_RESULT_ERROR_UNINITIALIZED) {
if (*GlobalAdapter->ZeResult == ZE_RESULT_ERROR_UNINITIALIZED) {
result = std::move(platforms);
return;
}
if (*Adapter.ZeResult != ZE_RESULT_SUCCESS) {
if (*GlobalAdapter->ZeResult != ZE_RESULT_SUCCESS) {
urPrint("zeInit: Level Zero initialization failure\n");
result = ze2urResult(*Adapter.ZeResult);
result = ze2urResult(*GlobalAdapter->ZeResult);
return;
}

Expand All @@ -108,7 +116,11 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() {
};
}

ur_adapter_handle_t_ Adapter{};
void globalAdapterOnDemandCleanup() {
if (GlobalAdapter) {
delete GlobalAdapter;
}
}

ur_result_t adapterStateTeardown() {
bool LeakFound = false;
Expand Down Expand Up @@ -195,6 +207,11 @@ ur_result_t adapterStateTeardown() {
}
if (LeakFound)
return UR_RESULT_ERROR_INVALID_MEM_OBJECT;
// Due to multiple DLLMain definitions with SYCL, register to cleanup the
// Global Adapter after refcnt is 0
#if defined(_WIN32)
std::atexit(globalAdapterOnDemandCleanup);
#endif

return UR_RESULT_SUCCESS;
}
Expand All @@ -221,11 +238,23 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet(
///< adapters available.
) {
if (NumEntries > 0 && Adapters) {
std::lock_guard<std::mutex> Lock{Adapter.Mutex};
if (Adapter.RefCount++ == 0) {
adapterStateInit();
if (GlobalAdapter) {
std::lock_guard<std::mutex> Lock{GlobalAdapter->Mutex};
if (GlobalAdapter->RefCount++ == 0) {
adapterStateInit();
}
} else {
// If the GetAdapter is called after the Library began or was torndown,
// then temporarily create a new Adapter handle and register a new
// cleanup.
GlobalAdapter = new ur_adapter_handle_t_();
std::lock_guard<std::mutex> Lock{GlobalAdapter->Mutex};
if (GlobalAdapter->RefCount++ == 0) {
adapterStateInit();
}
std::atexit(globalAdapterOnDemandCleanup);
}
*Adapters = &Adapter;
*Adapters = GlobalAdapter;
}

if (NumAdapters) {
Expand All @@ -236,17 +265,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet(
}

UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) {
std::lock_guard<std::mutex> Lock{Adapter.Mutex};
if (--Adapter.RefCount == 0) {
return adapterStateTeardown();
// Check first if the Adapter pointer is valid
if (GlobalAdapter) {
std::lock_guard<std::mutex> Lock{GlobalAdapter->Mutex};
if (--GlobalAdapter->RefCount == 0) {
return adapterStateTeardown();
}
}

return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) {
std::lock_guard<std::mutex> Lock{Adapter.Mutex};
Adapter.RefCount++;
if (GlobalAdapter) {
std::lock_guard<std::mutex> Lock{GlobalAdapter->Mutex};
GlobalAdapter->RefCount++;
}

return UR_RESULT_SUCCESS;
}
Expand Down Expand Up @@ -275,7 +309,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t,
case UR_ADAPTER_INFO_BACKEND:
return ReturnValue(UR_ADAPTER_BACKEND_LEVEL_ZERO);
case UR_ADAPTER_INFO_REFERENCE_COUNT:
return ReturnValue(Adapter.RefCount.load());
return ReturnValue(GlobalAdapter->RefCount.load());
default:
return UR_RESULT_ERROR_INVALID_ENUMERATION;
}
Expand Down
2 changes: 1 addition & 1 deletion source/adapters/level_zero/adapter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ struct ur_adapter_handle_t_ {
ZeCache<Result<PlatformVec>> PlatformCache;
};

extern ur_adapter_handle_t_ Adapter;
extern ur_adapter_handle_t_ *GlobalAdapter;
25 changes: 25 additions & 0 deletions source/adapters/level_zero/adapter_lib_init_linux.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
//===--------- adapter_lib_init_linux.cpp - Level Zero Adapter ------------===//
//
// Copyright (C) 2023 Intel Corporation
//
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
// Exceptions. See LICENSE.TXT
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "adapter.hpp"
#include "ur_level_zero.hpp"

void __attribute__((constructor)) createAdapterHandle() {
if (!GlobalAdapter) {
GlobalAdapter = new ur_adapter_handle_t_();
}
}

void __attribute__((destructor)) deleteAdapterHandle() {
if (GlobalAdapter) {
delete GlobalAdapter;
GlobalAdapter = nullptr;
}
}
14 changes: 8 additions & 6 deletions source/adapters/level_zero/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -398,27 +398,29 @@ ur_result_t ur_context_handle_t_::finalize() {

if (!DisableEventsCaching) {
std::scoped_lock<ur_mutex> Lock(EventCacheMutex);
for (auto &EventCache : EventCaches) {
for (auto &Event : EventCache) {
for (auto EventCache : EventCaches) {
for (auto Event : *EventCache) {
auto ZeResult = ZE_CALL_NOCHECK(zeEventDestroy, (Event->ZeEvent));
// Gracefully handle the case that L0 was already unloaded.
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
return ze2urResult(ZeResult);
delete Event;
}
EventCache.clear();
EventCache->clear();
delete EventCache;
}
}
{
std::scoped_lock<ur_mutex> Lock(ZeEventPoolCacheMutex);
for (auto &ZePoolCache : ZeEventPoolCache) {
for (auto &ZePool : ZePoolCache) {
for (auto ZePoolCache : ZeEventPoolCache) {
for (auto ZePool : *ZePoolCache) {
auto ZeResult = ZE_CALL_NOCHECK(zeEventPoolDestroy, (ZePool));
// Gracefully handle the case that L0 was already unloaded.
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
return ze2urResult(ZeResult);
}
ZePoolCache.clear();
ZePoolCache->clear();
delete ZePoolCache;
}
}

Expand Down
78 changes: 50 additions & 28 deletions source/adapters/level_zero/context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,9 @@ struct ur_context_handle_t_ : _ur_object {
// head.
//
// Cache of event pools to which host-visible events are added to.
std::vector<std::list<ze_event_pool_handle_t>> ZeEventPoolCache{4};
std::vector<std::unordered_map<ze_device_handle_t, size_t>>
std::vector<std::list<ze_event_pool_handle_t> *> ZeEventPoolCache;
std::vector<std::unordered_map<ze_device_handle_t,
std::list<ze_event_pool_handle_t> *>>
ZeEventPoolCacheDeviceMap{4};

// This map will be used to determine if a pool is full or not
Expand All @@ -164,9 +165,9 @@ struct ur_context_handle_t_ : _ur_object {
ur_mutex EventCacheMutex;

// Caches for events.
using EventCache = std::vector<std::list<ur_event_handle_t>>;
EventCache EventCaches{4};
std::vector<std::unordered_map<ur_device_handle_t, size_t>>
std::vector<std::list<ur_event_handle_t> *> EventCaches;
std::vector<
std::unordered_map<ur_device_handle_t, std::list<ur_event_handle_t> *>>
EventCachesDeviceMap{4};

// Initialize the PI context.
Expand Down Expand Up @@ -204,36 +205,46 @@ struct ur_context_handle_t_ : _ur_object {
// Add ur_event_handle_t to cache.
void addEventToContextCache(ur_event_handle_t);

std::list<ze_event_pool_handle_t> *
getZeEventPoolCache(bool HostVisible, bool WithProfiling,
ze_device_handle_t ZeDevice) {
auto getZeEventPoolCache(bool HostVisible, bool WithProfiling,
ze_device_handle_t ZeDevice) {
// Adding 4 initial global caches for provided scope and profiling modes:
// Host Scope, Device Scope, with Profiling, without Profiling.
if (ZeEventPoolCache.empty()) {
for (int i = 0; i < 4; i++) {
std::list<ze_event_pool_handle_t> *deviceZeEventPoolCache =
new std::list<ze_event_pool_handle_t>;
ZeEventPoolCache.push_back(deviceZeEventPoolCache);
}
}
if (HostVisible) {
if (ZeDevice) {
auto ZeEventPoolCacheMap = WithProfiling
? &ZeEventPoolCacheDeviceMap[0]
: &ZeEventPoolCacheDeviceMap[1];
if (ZeEventPoolCacheMap->find(ZeDevice) == ZeEventPoolCacheMap->end()) {
ZeEventPoolCache.emplace_back();
ZeEventPoolCacheMap->insert(
std::make_pair(ZeDevice, ZeEventPoolCache.size() - 1));
std::list<ze_event_pool_handle_t> *deviceZeEventPoolCache =
new std::list<ze_event_pool_handle_t>;
ZeEventPoolCache.push_back(deviceZeEventPoolCache);
(*ZeEventPoolCacheMap)[ZeDevice] = deviceZeEventPoolCache;
}
return &ZeEventPoolCache[(*ZeEventPoolCacheMap)[ZeDevice]];
return (*ZeEventPoolCacheMap)[ZeDevice];
} else {
return WithProfiling ? &ZeEventPoolCache[0] : &ZeEventPoolCache[1];
return WithProfiling ? ZeEventPoolCache[0] : ZeEventPoolCache[1];
}
} else {
if (ZeDevice) {
auto ZeEventPoolCacheMap = WithProfiling
? &ZeEventPoolCacheDeviceMap[2]
: &ZeEventPoolCacheDeviceMap[3];
if (ZeEventPoolCacheMap->find(ZeDevice) == ZeEventPoolCacheMap->end()) {
ZeEventPoolCache.emplace_back();
ZeEventPoolCacheMap->insert(
std::make_pair(ZeDevice, ZeEventPoolCache.size() - 1));
std::list<ze_event_pool_handle_t> *deviceZeEventPoolCache =
new std::list<ze_event_pool_handle_t>;
ZeEventPoolCache.push_back(deviceZeEventPoolCache);
(*ZeEventPoolCacheMap)[ZeDevice] = deviceZeEventPoolCache;
}
return &ZeEventPoolCache[(*ZeEventPoolCacheMap)[ZeDevice]];
return (*ZeEventPoolCacheMap)[ZeDevice];
} else {
return WithProfiling ? &ZeEventPoolCache[2] : &ZeEventPoolCache[3];
return WithProfiling ? ZeEventPoolCache[2] : ZeEventPoolCache[3];
}
}
}
Expand Down Expand Up @@ -276,31 +287,42 @@ struct ur_context_handle_t_ : _ur_object {
// Get the cache of events for a provided scope and profiling mode.
auto getEventCache(bool HostVisible, bool WithProfiling,
ur_device_handle_t Device) {
// Adding 4 initial global caches for provided scope and profiling modes:
// Host Scope, Device Scope, with Profiling, without Profiling.
if (EventCaches.empty()) {
for (int i = 0; i < 4; i++) {
std::list<ur_event_handle_t> *deviceEventCache =
new std::list<ur_event_handle_t>;
EventCaches.push_back(deviceEventCache);
}
}
if (HostVisible) {
if (Device) {
auto EventCachesMap =
WithProfiling ? &EventCachesDeviceMap[0] : &EventCachesDeviceMap[1];
if (EventCachesMap->find(Device) == EventCachesMap->end()) {
EventCaches.emplace_back();
EventCachesMap->insert(
std::make_pair(Device, EventCaches.size() - 1));
std::list<ur_event_handle_t> *deviceEventCache =
new std::list<ur_event_handle_t>;
EventCaches.push_back(deviceEventCache);
(*EventCachesMap)[Device] = deviceEventCache;
}
return &EventCaches[(*EventCachesMap)[Device]];
return (*EventCachesMap)[Device];
} else {
return WithProfiling ? &EventCaches[0] : &EventCaches[1];
return WithProfiling ? EventCaches[0] : EventCaches[1];
}
} else {
if (Device) {
auto EventCachesMap =
WithProfiling ? &EventCachesDeviceMap[2] : &EventCachesDeviceMap[3];
if (EventCachesMap->find(Device) == EventCachesMap->end()) {
EventCaches.emplace_back();
EventCachesMap->insert(
std::make_pair(Device, EventCaches.size() - 1));
std::list<ur_event_handle_t> *deviceEventCache =
new std::list<ur_event_handle_t>;
EventCaches.push_back(deviceEventCache);
(*EventCachesMap)[Device] = deviceEventCache;
}
return &EventCaches[(*EventCachesMap)[Device]];
return (*EventCachesMap)[Device];
} else {
return WithProfiling ? &EventCaches[2] : &EventCaches[3];
return WithProfiling ? EventCaches[2] : EventCaches[3];
}
}
}
Expand Down
Loading
Loading