Skip to content

Commit

Permalink
Merge pull request oneapi-src#1252 from pbalcer/l0-platform-hang
Browse files Browse the repository at this point in the history
[L0] move platform cache into the adapter structure
  • Loading branch information
pbalcer authored and aarongreig committed Feb 9, 2024
1 parent fcc8d9f commit f5efeac
Show file tree
Hide file tree
Showing 7 changed files with 136 additions and 146 deletions.
8 changes: 0 additions & 8 deletions .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -209,15 +209,7 @@ jobs:
working-directory: ${{github.workspace}}/build
run: ctest -C ${{matrix.build_type}} --output-on-failure -L "adapter-specific" --timeout 180

# Temporarily disabling platform test for L0, because of hang
# See issue: #824
- name: Test L0 adapter
if: matrix.adapter.name == 'L0'
working-directory: ${{github.workspace}}/build
run: ctest -C ${{matrix.build_type}} --output-on-failure -L "conformance" -E "platform-adapter_level_zero" --timeout 180

- name: Test adapters
if: matrix.adapter.name != 'L0'
working-directory: ${{github.workspace}}/build
run: ctest -C ${{matrix.build_type}} --output-on-failure -L "conformance" --timeout 180

Expand Down
100 changes: 83 additions & 17 deletions source/adapters/level_zero/adapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,26 +13,92 @@

ur_adapter_handle_t_ Adapter{};

UR_APIEXPORT ur_result_t UR_APICALL
urInit(ur_device_init_flags_t
DeviceFlags, ///< [in] device initialization flags.
///< must be 0 (default) or a combination of
///< ::ur_device_init_flag_t.
ur_loader_config_handle_t) {
std::ignore = DeviceFlags;
ur_result_t initPlatforms(PlatformVec &platforms) noexcept try {
uint32_t ZeDriverCount = 0;
ZE2UR_CALL(zeDriverGet, (&ZeDriverCount, nullptr));
if (ZeDriverCount == 0) {
return UR_RESULT_SUCCESS;
}

std::vector<ze_driver_handle_t> ZeDrivers;
ZeDrivers.resize(ZeDriverCount);

ZE2UR_CALL(zeDriverGet, (&ZeDriverCount, ZeDrivers.data()));
for (uint32_t I = 0; I < ZeDriverCount; ++I) {
auto platform = std::make_unique<ur_platform_handle_t_>(ZeDrivers[I]);
UR_CALL(platform->initialize());

// Save a copy in the cache for future uses.
platforms.push_back(std::move(platform));
}
return UR_RESULT_SUCCESS;
} catch (...) {
return exceptionToResult(std::current_exception());
}

ur_result_t adapterStateTeardown() {
// reclaim ur_platform_handle_t objects here since we don't have
// urPlatformRelease.
for (ur_platform_handle_t Platform : *URPlatformsCache) {
delete Platform;
ur_result_t adapterStateInit() {
static std::once_flag ZeCallCountInitialized;
try {
std::call_once(ZeCallCountInitialized, []() {
if (UrL0LeaksDebug) {
ZeCallCount = new std::map<std::string, int>;
}
});
} catch (const std::bad_alloc &) {
return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
} catch (...) {
return UR_RESULT_ERROR_UNKNOWN;
}
delete URPlatformsCache;
delete URPlatformsCacheMutex;

// initialize level zero only once.
if (Adapter.ZeResult == std::nullopt) {
// Setting these environment variables before running zeInit will enable the
// validation layer in the Level Zero loader.
if (UrL0Debug & UR_L0_DEBUG_VALIDATION) {
setEnvVar("ZE_ENABLE_VALIDATION_LAYER", "1");
setEnvVar("ZE_ENABLE_PARAMETER_VALIDATION", "1");
}

if (getenv("SYCL_ENABLE_PCI") != nullptr) {
urPrint("WARNING: SYCL_ENABLE_PCI is deprecated and no longer needed.\n");
}

// TODO: We can still safely recover if something goes wrong during the
// init. Implement handling segfault using sigaction.

// We must only initialize the driver once, even if urPlatformGet() is
// called multiple times. Declaring the return value as "static" ensures
// it's only called once.
Adapter.ZeResult = ZE_CALL_NOCHECK(zeInit, (ZE_INIT_FLAG_GPU_ONLY));
}

Adapter.PlatformCache.Compute = [](Result<PlatformVec> &result) {
assert(Adapter.ZeResult !=
std::nullopt); // verify that level-zero is initialized
PlatformVec platforms;

// Absorb the ZE_RESULT_ERROR_UNINITIALIZED and just return 0 Platforms.
if (*Adapter.ZeResult == ZE_RESULT_ERROR_UNINITIALIZED) {
result = std::move(platforms);
return;
}
if (*Adapter.ZeResult != ZE_RESULT_SUCCESS) {
urPrint("zeInit: Level Zero initialization failure\n");
result = ze2urResult(*Adapter.ZeResult);
return;
}

ur_result_t err = initPlatforms(platforms);
if (err == UR_RESULT_SUCCESS) {
result = std::move(platforms);
} else {
result = err;
}
};
return UR_RESULT_SUCCESS;
}

ur_result_t adapterStateTeardown() {
bool LeakFound = false;

// Print the balance of various create/destroy native calls.
Expand Down Expand Up @@ -144,9 +210,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet(
) {
if (NumEntries > 0 && Adapters) {
std::lock_guard<std::mutex> Lock{Adapter.Mutex};
// TODO: Some initialization that happens in urPlatformsGet could be moved
// here for when RefCount reaches 1
Adapter.RefCount++;
if (Adapter.RefCount++ == 0) {
adapterStateInit();
}
*Adapters = &Adapter;
}

Expand Down
8 changes: 8 additions & 0 deletions source/adapters/level_zero/adapter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,18 @@

#include <atomic>
#include <mutex>
#include <optional>
#include <ur/ur.hpp>
#include <ze_api.h>

using PlatformVec = std::vector<std::unique_ptr<ur_platform_handle_t_>>;

struct ur_adapter_handle_t_ {
std::atomic<uint32_t> RefCount = 0;
std::mutex Mutex;

std::optional<ze_result_t> ZeResult;
ZeCache<Result<PlatformVec>> PlatformCache;
};

extern ur_adapter_handle_t_ Adapter;
24 changes: 12 additions & 12 deletions source/adapters/level_zero/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
//===----------------------------------------------------------------------===//

#include "device.hpp"
#include "adapter.hpp"
#include "ur_level_zero.hpp"
#include "ur_util.hpp"
#include <algorithm>
Expand Down Expand Up @@ -1318,21 +1319,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle(
// Level Zero devices when we initialized the platforms/devices cache, so the
// "NativeHandle" must already be in the cache. If it is not, this must not be
// a valid Level Zero device.
//
// TODO: maybe we should populate cache of platforms if it wasn't already.
// For now assert that is was populated.
UR_ASSERT(URPlatformCachePopulated, UR_RESULT_ERROR_INVALID_VALUE);
const std::lock_guard<SpinLock> Lock{*URPlatformsCacheMutex};

ur_device_handle_t Dev = nullptr;
for (ur_platform_handle_t ThePlatform : *URPlatformsCache) {
Dev = ThePlatform->getDeviceFromNativeHandle(ZeDevice);
if (Dev) {
// Check that the input Platform, if was given, matches the found one.
UR_ASSERT(!Platform || Platform == ThePlatform,
UR_RESULT_ERROR_INVALID_PLATFORM);
break;
if (const auto *platforms = Adapter.PlatformCache->get_value()) {
for (const auto &p : *platforms) {
Dev = p->getDeviceFromNativeHandle(ZeDevice);
if (Dev) {
// Check that the input Platform, if was given, matches the found one.
UR_ASSERT(!Platform || Platform == p.get(),
UR_RESULT_ERROR_INVALID_PLATFORM);
break;
}
}
} else {
return Adapter.PlatformCache->get_error();
}

if (Dev == nullptr)
Expand Down
105 changes: 12 additions & 93 deletions source/adapters/level_zero/platform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,101 +27,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet(
uint32_t *NumPlatforms ///< [out][optional] returns the total number of
///< platforms available.
) {
static std::once_flag ZeCallCountInitialized;
try {
std::call_once(ZeCallCountInitialized, []() {
if (UrL0LeaksDebug) {
ZeCallCount = new std::map<std::string, int>;
}
});
} catch (const std::bad_alloc &) {
return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
} catch (...) {
return UR_RESULT_ERROR_UNKNOWN;
}

// Setting these environment variables before running zeInit will enable the
// validation layer in the Level Zero loader.
if (UrL0Debug & UR_L0_DEBUG_VALIDATION) {
setEnvVar("ZE_ENABLE_VALIDATION_LAYER", "1");
setEnvVar("ZE_ENABLE_PARAMETER_VALIDATION", "1");
}

if (getenv("SYCL_ENABLE_PCI") != nullptr) {
urPrint("WARNING: SYCL_ENABLE_PCI is deprecated and no longer needed.\n");
}

// TODO: We can still safely recover if something goes wrong during the init.
// Implement handling segfault using sigaction.

// We must only initialize the driver once, even if urPlatformGet() is called
// multiple times. Declaring the return value as "static" ensures it's only
// called once.
static ze_result_t ZeResult =
ZE_CALL_NOCHECK(zeInit, (ZE_INIT_FLAG_GPU_ONLY));

// Absorb the ZE_RESULT_ERROR_UNINITIALIZED and just return 0 Platforms.
if (ZeResult == ZE_RESULT_ERROR_UNINITIALIZED) {
UR_ASSERT(NumEntries == 0, UR_RESULT_ERROR_INVALID_VALUE);
if (NumPlatforms)
*NumPlatforms = 0;
return UR_RESULT_SUCCESS;
}

if (ZeResult != ZE_RESULT_SUCCESS) {
urPrint("zeInit: Level Zero initialization failure\n");
return ze2urResult(ZeResult);
}

// Cache ur_platform_handle_t for reuse in the future
// It solves two problems;
// 1. sycl::platform equality issue; we always return the same
// ur_platform_handle_t
// 2. performance; we can save time by immediately return from cache.
//

const std::lock_guard<SpinLock> Lock{*URPlatformsCacheMutex};
if (!URPlatformCachePopulated) {
try {
// Level Zero does not have concept of Platforms, but Level Zero driver is
// the closest match.
uint32_t ZeDriverCount = 0;
ZE2UR_CALL(zeDriverGet, (&ZeDriverCount, nullptr));
if (ZeDriverCount == 0) {
URPlatformCachePopulated = true;
} else {
std::vector<ze_driver_handle_t> ZeDrivers;
ZeDrivers.resize(ZeDriverCount);

ZE2UR_CALL(zeDriverGet, (&ZeDriverCount, ZeDrivers.data()));
for (uint32_t I = 0; I < ZeDriverCount; ++I) {
auto Platform = new ur_platform_handle_t_(ZeDrivers[I]);
// Save a copy in the cache for future uses.
URPlatformsCache->push_back(Platform);

UR_CALL(Platform->initialize());
}
URPlatformCachePopulated = true;
// Platform handles are cached for reuse. This is to ensure consistent
// handle pointers across invocations and to improve retrieval performance.
if (const auto *cached_platforms = Adapter.PlatformCache->get_value()) {
uint32_t nplatforms = (uint32_t)cached_platforms->size();
if (NumPlatforms) {
*NumPlatforms = nplatforms;
}
if (Platforms) {
for (uint32_t i = 0; i < std::min(nplatforms, NumEntries); ++i) {
Platforms[i] = cached_platforms->at(i).get();
}
} catch (const std::bad_alloc &) {
return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
} catch (...) {
return UR_RESULT_ERROR_UNKNOWN;
}
}

// Populate returned platforms from the cache.
if (Platforms) {
UR_ASSERT(NumEntries <= URPlatformsCache->size(),
UR_RESULT_ERROR_INVALID_PLATFORM);
std::copy_n(URPlatformsCache->begin(), NumEntries, Platforms);
}

if (NumPlatforms) {
if (*NumPlatforms == 0)
*NumPlatforms = URPlatformsCache->size();
else
*NumPlatforms = (std::min)(URPlatformsCache->size(), (size_t)NumEntries);
} else {
return Adapter.PlatformCache->get_error();
}

return UR_RESULT_SUCCESS;
Expand Down
6 changes: 0 additions & 6 deletions source/ur/ur.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,3 @@ bool PrintTrace = [] {
}
return false;
}();

// Apparatus for maintaining immutable cache of platforms.
std::vector<ur_platform_handle_t> *URPlatformsCache =
new std::vector<ur_platform_handle_t>;
SpinLock *URPlatformsCacheMutex = new SpinLock;
bool URPlatformCachePopulated = false;
31 changes: 21 additions & 10 deletions source/ur/ur.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <shared_mutex>
#include <string>
#include <thread>
#include <variant>
#include <vector>

#include <ur_api.h>
Expand Down Expand Up @@ -176,16 +177,6 @@ struct _ur_platform {};
// Controls tracing UR calls from within the UR itself.
extern bool PrintTrace;

// Apparatus for maintaining immutable cache of platforms.
//
// Note we only create a simple pointer variables such that C++ RT won't
// deallocate them automatically at the end of the main program.
// The heap memory allocated for these global variables reclaimed only at
// explicit tear-down.
extern std::vector<ur_platform_handle_t> *URPlatformsCache;
extern SpinLock *URPlatformsCacheMutex;
extern bool URPlatformCachePopulated;

// The getInfo*/ReturnHelper facilities provide shortcut way of
// writing return bytes for the various getInfo APIs.
namespace ur {
Expand Down Expand Up @@ -295,3 +286,23 @@ class UrReturnHelper {
void *param_value;
size_t *param_value_size_ret;
};

template <typename T> class Result {
public:
Result(ur_result_t err) : value_or_err(err) {}
Result(T value) : value_or_err(std::move(value)) {}
Result() : value_or_err(UR_RESULT_ERROR_UNINITIALIZED) {}

bool is_err() { return std::holds_alternative<ur_result_t>(value_or_err); }
explicit operator bool() const { return !is_err(); }

const T *get_value() { return std::get_if<T>(&value_or_err); }

ur_result_t get_error() {
auto *err = std::get_if<ur_result_t>(&value_or_err);
return err ? *err : UR_RESULT_SUCCESS;
}

private:
std::variant<ur_result_t, T> value_or_err;
};

0 comments on commit f5efeac

Please sign in to comment.