Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[L0] move platform cache into the adapter structure #1252

Merged
merged 1 commit into from
Feb 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -213,15 +213,7 @@ jobs:
working-directory: ${{github.workspace}}/build
run: ctest -C ${{matrix.build_type}} --output-on-failure -L "adapter-specific" --timeout 180

# Temporarily disabling platform test for L0, because of hang
# See issue: #824
- name: Test L0 adapter
if: matrix.adapter.name == 'L0'
working-directory: ${{github.workspace}}/build
run: ctest -C ${{matrix.build_type}} --output-on-failure -L "conformance" -E "platform-adapter_level_zero" --timeout 180

- name: Test adapters
if: matrix.adapter.name != 'L0'
working-directory: ${{github.workspace}}/build
run: env UR_CTS_ADAPTER_PLATFORM="${{matrix.adapter.platform}}" ctest -C ${{matrix.build_type}} --output-on-failure -L "conformance" --timeout 180

Expand Down
97 changes: 87 additions & 10 deletions source/adapters/level_zero/adapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,92 @@

ur_adapter_handle_t_ Adapter{};

ur_result_t adapterStateTeardown() {
// reclaim ur_platform_handle_t objects here since we don't have
// urPlatformRelease.
for (ur_platform_handle_t Platform : *URPlatformsCache) {
delete Platform;
ur_result_t initPlatforms(PlatformVec &platforms) noexcept try {
uint32_t ZeDriverCount = 0;
ZE2UR_CALL(zeDriverGet, (&ZeDriverCount, nullptr));
if (ZeDriverCount == 0) {
return UR_RESULT_SUCCESS;
}

std::vector<ze_driver_handle_t> ZeDrivers;
ZeDrivers.resize(ZeDriverCount);

ZE2UR_CALL(zeDriverGet, (&ZeDriverCount, ZeDrivers.data()));
for (uint32_t I = 0; I < ZeDriverCount; ++I) {
auto platform = std::make_unique<ur_platform_handle_t_>(ZeDrivers[I]);
UR_CALL(platform->initialize());

// Save a copy in the cache for future uses.
platforms.push_back(std::move(platform));
}
return UR_RESULT_SUCCESS;
} catch (...) {
return exceptionToResult(std::current_exception());
}

ur_result_t adapterStateInit() {
static std::once_flag ZeCallCountInitialized;
try {
std::call_once(ZeCallCountInitialized, []() {
if (UrL0LeaksDebug) {
ZeCallCount = new std::map<std::string, int>;
}
});
} catch (const std::bad_alloc &) {
return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
} catch (...) {
return UR_RESULT_ERROR_UNKNOWN;
}
delete URPlatformsCache;
delete URPlatformsCacheMutex;

// initialize level zero only once.
if (Adapter.ZeResult == std::nullopt) {
// Setting these environment variables before running zeInit will enable the
// validation layer in the Level Zero loader.
if (UrL0Debug & UR_L0_DEBUG_VALIDATION) {
setEnvVar("ZE_ENABLE_VALIDATION_LAYER", "1");
setEnvVar("ZE_ENABLE_PARAMETER_VALIDATION", "1");
}

if (getenv("SYCL_ENABLE_PCI") != nullptr) {
urPrint("WARNING: SYCL_ENABLE_PCI is deprecated and no longer needed.\n");
}

// TODO: We can still safely recover if something goes wrong during the
// init. Implement handling segfault using sigaction.

// We must only initialize the driver once, even if urPlatformGet() is
// called multiple times. Declaring the return value as "static" ensures
// it's only called once.
Adapter.ZeResult = ZE_CALL_NOCHECK(zeInit, (ZE_INIT_FLAG_GPU_ONLY));
}

Adapter.PlatformCache.Compute = [](Result<PlatformVec> &result) {
assert(Adapter.ZeResult !=
std::nullopt); // verify that level-zero is initialized
PlatformVec platforms;

// Absorb the ZE_RESULT_ERROR_UNINITIALIZED and just return 0 Platforms.
if (*Adapter.ZeResult == ZE_RESULT_ERROR_UNINITIALIZED) {
result = std::move(platforms);
return;
}
if (*Adapter.ZeResult != ZE_RESULT_SUCCESS) {
urPrint("zeInit: Level Zero initialization failure\n");
result = ze2urResult(*Adapter.ZeResult);
return;
}

ur_result_t err = initPlatforms(platforms);
if (err == UR_RESULT_SUCCESS) {
result = std::move(platforms);
} else {
result = err;
}
};
return UR_RESULT_SUCCESS;
}

ur_result_t adapterStateTeardown() {
bool LeakFound = false;

// Print the balance of various create/destroy native calls.
Expand Down Expand Up @@ -126,9 +203,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet(
) {
if (NumEntries > 0 && Adapters) {
std::lock_guard<std::mutex> Lock{Adapter.Mutex};
// TODO: Some initialization that happens in urPlatformsGet could be moved
// here for when RefCount reaches 1
Adapter.RefCount++;
if (Adapter.RefCount++ == 0) {
adapterStateInit();
}
*Adapters = &Adapter;
}

Expand Down
8 changes: 8 additions & 0 deletions source/adapters/level_zero/adapter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,18 @@

#include <atomic>
#include <mutex>
#include <optional>
#include <ur/ur.hpp>
#include <ze_api.h>

using PlatformVec = std::vector<std::unique_ptr<ur_platform_handle_t_>>;

struct ur_adapter_handle_t_ {
std::atomic<uint32_t> RefCount = 0;
std::mutex Mutex;

std::optional<ze_result_t> ZeResult;
ZeCache<Result<PlatformVec>> PlatformCache;
};

extern ur_adapter_handle_t_ Adapter;
24 changes: 12 additions & 12 deletions source/adapters/level_zero/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
//===----------------------------------------------------------------------===//

#include "device.hpp"
#include "adapter.hpp"
#include "ur_level_zero.hpp"
#include "ur_util.hpp"
#include <algorithm>
Expand Down Expand Up @@ -1321,21 +1322,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle(
// Level Zero devices when we initialized the platforms/devices cache, so the
// "NativeHandle" must already be in the cache. If it is not, this must not be
// a valid Level Zero device.
//
// TODO: maybe we should populate cache of platforms if it wasn't already.
// For now assert that is was populated.
UR_ASSERT(URPlatformCachePopulated, UR_RESULT_ERROR_INVALID_VALUE);
const std::lock_guard<SpinLock> Lock{*URPlatformsCacheMutex};

ur_device_handle_t Dev = nullptr;
for (ur_platform_handle_t ThePlatform : *URPlatformsCache) {
Dev = ThePlatform->getDeviceFromNativeHandle(ZeDevice);
if (Dev) {
// Check that the input Platform, if was given, matches the found one.
UR_ASSERT(!Platform || Platform == ThePlatform,
UR_RESULT_ERROR_INVALID_PLATFORM);
break;
if (const auto *platforms = Adapter.PlatformCache->get_value()) {
for (const auto &p : *platforms) {
Dev = p->getDeviceFromNativeHandle(ZeDevice);
if (Dev) {
// Check that the input Platform, if was given, matches the found one.
UR_ASSERT(!Platform || Platform == p.get(),
UR_RESULT_ERROR_INVALID_PLATFORM);
break;
}
}
} else {
return Adapter.PlatformCache->get_error();
}

if (Dev == nullptr)
Expand Down
105 changes: 12 additions & 93 deletions source/adapters/level_zero/platform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,101 +27,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet(
uint32_t *NumPlatforms ///< [out][optional] returns the total number of
///< platforms available.
) {
static std::once_flag ZeCallCountInitialized;
try {
std::call_once(ZeCallCountInitialized, []() {
if (UrL0LeaksDebug) {
ZeCallCount = new std::map<std::string, int>;
}
});
} catch (const std::bad_alloc &) {
return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
} catch (...) {
return UR_RESULT_ERROR_UNKNOWN;
}

// Setting these environment variables before running zeInit will enable the
// validation layer in the Level Zero loader.
if (UrL0Debug & UR_L0_DEBUG_VALIDATION) {
setEnvVar("ZE_ENABLE_VALIDATION_LAYER", "1");
setEnvVar("ZE_ENABLE_PARAMETER_VALIDATION", "1");
}

if (getenv("SYCL_ENABLE_PCI") != nullptr) {
urPrint("WARNING: SYCL_ENABLE_PCI is deprecated and no longer needed.\n");
}

// TODO: We can still safely recover if something goes wrong during the init.
// Implement handling segfault using sigaction.

// We must only initialize the driver once, even if urPlatformGet() is called
// multiple times. Declaring the return value as "static" ensures it's only
// called once.
static ze_result_t ZeResult =
ZE_CALL_NOCHECK(zeInit, (ZE_INIT_FLAG_GPU_ONLY));

// Absorb the ZE_RESULT_ERROR_UNINITIALIZED and just return 0 Platforms.
if (ZeResult == ZE_RESULT_ERROR_UNINITIALIZED) {
UR_ASSERT(NumEntries == 0, UR_RESULT_ERROR_INVALID_VALUE);
if (NumPlatforms)
*NumPlatforms = 0;
return UR_RESULT_SUCCESS;
}

if (ZeResult != ZE_RESULT_SUCCESS) {
urPrint("zeInit: Level Zero initialization failure\n");
return ze2urResult(ZeResult);
}

// Cache ur_platform_handle_t for reuse in the future
// It solves two problems;
// 1. sycl::platform equality issue; we always return the same
// ur_platform_handle_t
// 2. performance; we can save time by immediately return from cache.
//

const std::lock_guard<SpinLock> Lock{*URPlatformsCacheMutex};
if (!URPlatformCachePopulated) {
try {
// Level Zero does not have concept of Platforms, but Level Zero driver is
// the closest match.
uint32_t ZeDriverCount = 0;
ZE2UR_CALL(zeDriverGet, (&ZeDriverCount, nullptr));
if (ZeDriverCount == 0) {
URPlatformCachePopulated = true;
} else {
std::vector<ze_driver_handle_t> ZeDrivers;
ZeDrivers.resize(ZeDriverCount);

ZE2UR_CALL(zeDriverGet, (&ZeDriverCount, ZeDrivers.data()));
for (uint32_t I = 0; I < ZeDriverCount; ++I) {
auto Platform = new ur_platform_handle_t_(ZeDrivers[I]);
// Save a copy in the cache for future uses.
URPlatformsCache->push_back(Platform);

UR_CALL(Platform->initialize());
}
URPlatformCachePopulated = true;
// Platform handles are cached for reuse. This is to ensure consistent
// handle pointers across invocations and to improve retrieval performance.
if (const auto *cached_platforms = Adapter.PlatformCache->get_value()) {
uint32_t nplatforms = (uint32_t)cached_platforms->size();
if (NumPlatforms) {
*NumPlatforms = nplatforms;
}
if (Platforms) {
for (uint32_t i = 0; i < std::min(nplatforms, NumEntries); ++i) {
Platforms[i] = cached_platforms->at(i).get();
}
} catch (const std::bad_alloc &) {
return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
} catch (...) {
return UR_RESULT_ERROR_UNKNOWN;
}
}

// Populate returned platforms from the cache.
if (Platforms) {
UR_ASSERT(NumEntries <= URPlatformsCache->size(),
UR_RESULT_ERROR_INVALID_PLATFORM);
std::copy_n(URPlatformsCache->begin(), NumEntries, Platforms);
}

if (NumPlatforms) {
if (*NumPlatforms == 0)
*NumPlatforms = URPlatformsCache->size();
else
*NumPlatforms = (std::min)(URPlatformsCache->size(), (size_t)NumEntries);
} else {
return Adapter.PlatformCache->get_error();
}

return UR_RESULT_SUCCESS;
Expand Down
6 changes: 0 additions & 6 deletions source/ur/ur.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,3 @@ bool PrintTrace = [] {
}
return false;
}();

// Apparatus for maintaining immutable cache of platforms.
std::vector<ur_platform_handle_t> *URPlatformsCache =
new std::vector<ur_platform_handle_t>;
SpinLock *URPlatformsCacheMutex = new SpinLock;
bool URPlatformCachePopulated = false;
31 changes: 21 additions & 10 deletions source/ur/ur.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <shared_mutex>
#include <string>
#include <thread>
#include <variant>
#include <vector>

#include <ur_api.h>
Expand Down Expand Up @@ -191,16 +192,6 @@ struct _ur_platform {};
// Controls tracing UR calls from within the UR itself.
extern bool PrintTrace;

// Apparatus for maintaining immutable cache of platforms.
//
// Note we only create a simple pointer variables such that C++ RT won't
// deallocate them automatically at the end of the main program.
// The heap memory allocated for these global variables reclaimed only at
// explicit tear-down.
extern std::vector<ur_platform_handle_t> *URPlatformsCache;
extern SpinLock *URPlatformsCacheMutex;
extern bool URPlatformCachePopulated;

// The getInfo*/ReturnHelper facilities provide shortcut way of
// writing return bytes for the various getInfo APIs.
namespace ur {
Expand Down Expand Up @@ -310,3 +301,23 @@ class UrReturnHelper {
void *param_value;
size_t *param_value_size_ret;
};

template <typename T> class Result {
public:
Result(ur_result_t err) : value_or_err(err) {}
Result(T value) : value_or_err(std::move(value)) {}
Result() : value_or_err(UR_RESULT_ERROR_UNINITIALIZED) {}

bool is_err() { return std::holds_alternative<ur_result_t>(value_or_err); }
explicit operator bool() const { return !is_err(); }

const T *get_value() { return std::get_if<T>(&value_or_err); }

ur_result_t get_error() {
auto *err = std::get_if<ur_result_t>(&value_or_err);
return err ? *err : UR_RESULT_SUCCESS;
}

private:
std::variant<ur_result_t, T> value_or_err;
};
Loading