Skip to content

Commit

Permalink
Merge pull request #1959 from igchor/v2_enqueue_kernel_separate_adapter
Browse files Browse the repository at this point in the history
[L0 v2] implement enqueueKernelLaunch and queueFinish
  • Loading branch information
igchor authored Aug 20, 2024
2 parents 9c58db2 + df3956a commit 2dcccff
Show file tree
Hide file tree
Showing 34 changed files with 1,291 additions and 831 deletions.
11 changes: 11 additions & 0 deletions source/adapters/level_zero/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -179,11 +179,15 @@ if(UR_BUILD_ADAPTER_L0_V2)
${CMAKE_CURRENT_SOURCE_DIR}/common.hpp
${CMAKE_CURRENT_SOURCE_DIR}/device.hpp
${CMAKE_CURRENT_SOURCE_DIR}/platform.hpp
${CMAKE_CURRENT_SOURCE_DIR}/program.hpp
${CMAKE_CURRENT_SOURCE_DIR}/helpers/kernel_helpers.hpp
${CMAKE_CURRENT_SOURCE_DIR}/adapter.cpp
${CMAKE_CURRENT_SOURCE_DIR}/common.cpp
${CMAKE_CURRENT_SOURCE_DIR}/device.cpp
${CMAKE_CURRENT_SOURCE_DIR}/ur_interface_loader.cpp
${CMAKE_CURRENT_SOURCE_DIR}/platform.cpp
${CMAKE_CURRENT_SOURCE_DIR}/program.cpp
${CMAKE_CURRENT_SOURCE_DIR}/helpers/kernel_helpers.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp
# v2-only sources
${CMAKE_CURRENT_SOURCE_DIR}/v2/command_list_cache.hpp
Expand All @@ -194,8 +198,10 @@ if(UR_BUILD_ADAPTER_L0_V2)
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_provider_normal.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_provider.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/kernel.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_api.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/usm.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/api.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/command_list_cache.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/context.cpp
Expand All @@ -204,9 +210,11 @@ if(UR_BUILD_ADAPTER_L0_V2)
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_provider_counter.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_provider_normal.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/kernel.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_api.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_create.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/usm.cpp
)

if(NOT WIN32)
Expand All @@ -220,6 +228,8 @@ if(UR_BUILD_ADAPTER_L0_V2)
)
endif()

target_compile_definitions(ur_adapter_level_zero_v2 PUBLIC UR_ADAPTER_LEVEL_ZERO_V2)

# TODO: fix level_zero adapter conversion warnings
target_compile_options(ur_adapter_level_zero_v2 PRIVATE
$<$<CXX_COMPILER_ID:MSVC>:/wd4805 /wd4244>
Expand All @@ -245,6 +255,7 @@ if(UR_BUILD_ADAPTER_L0_V2)

target_include_directories(ur_adapter_level_zero_v2 PRIVATE
"${CMAKE_CURRENT_SOURCE_DIR}/../.."
"${CMAKE_CURRENT_SOURCE_DIR}/../../ur"
LevelZeroLoader-Headers
)
endif()
1 change: 1 addition & 0 deletions source/adapters/level_zero/command_buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "common.hpp"

#include "context.hpp"
#include "kernel.hpp"
#include "queue.hpp"

struct command_buffer_profiling_t {
Expand Down
16 changes: 15 additions & 1 deletion source/adapters/level_zero/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@

#include <umf_pools/disjoint_pool_config_parser.hpp>

#include "logger/ur_logger.hpp"

struct _ur_platform_handle_t;

static auto getUrResultString = [](ur_result_t Result) {
Expand Down Expand Up @@ -168,7 +170,7 @@ static auto getUrResultString = [](ur_result_t Result) {
}
};

// Trace an internal PI call; returns in case of an error.
// Trace an internal UR call; returns in case of an error.
#define UR_CALL(Call) \
{ \
if (PrintTrace) \
Expand All @@ -180,6 +182,18 @@ static auto getUrResultString = [](ur_result_t Result) {
return Result; \
}

// Trace an internal UR call; throw in case of an error.
#define UR_CALL_THROWS(Call) \
{ \
if (PrintTrace) \
logger::always("UR ---> {}", #Call); \
ur_result_t Result = (Call); \
if (PrintTrace) \
logger::always("UR <--- {}({})", #Call, getUrResultString(Result)); \
if (Result != UR_RESULT_SUCCESS) \
throw Result; \
}

// Controls UR L0 calls tracing.
enum UrDebugLevel {
UR_L0_DEBUG_NONE = 0x0,
Expand Down
9 changes: 9 additions & 0 deletions source/adapters/level_zero/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -829,3 +829,12 @@ bool ur_context_handle_t_::isValidDevice(ur_device_handle_t Device) const {
}
return false;
}

const std::vector<ur_device_handle_t> &
ur_context_handle_t_::getDevices() const {
return Devices;
}

ze_context_handle_t ur_context_handle_t_::getZeHandle() const {
return ZeContext;
}
6 changes: 6 additions & 0 deletions source/adapters/level_zero/context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,9 @@ struct ur_context_handle_t_ : _ur_object {
// Return the Platform, which is the same for all devices in the context
ur_platform_handle_t getPlatform() const;

// Get vector of devices from this context
const std::vector<ur_device_handle_t> &getDevices() const;

// Get index of the free slot in the available pool. If there is no available
// pool then create new one. The HostVisible parameter tells if we need a
// slot for a host-visible event. The ProfilingEnabled tells is we need a
Expand Down Expand Up @@ -303,6 +306,9 @@ struct ur_context_handle_t_ : _ur_object {
// For that the Device or its root devices need to be in the context.
bool isValidDevice(ur_device_handle_t Device) const;

// Get handle to the L0 context
ze_context_handle_t getZeHandle() const;

private:
// Get the cache of events for a provided scope and profiling mode.
auto getEventCache(bool HostVisible, bool WithProfiling,
Expand Down
7 changes: 6 additions & 1 deletion source/adapters/level_zero/helpers/kernel_helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,14 @@
#include "logger/ur_logger.hpp"

#include "../common.hpp"
#include "../context.hpp"
#include "../device.hpp"

#ifdef UR_ADAPTER_LEVEL_ZERO_V2
#include "../context.hpp"
#else
#include "../v2/context.hpp"
#endif

ur_result_t getSuggestedLocalWorkSize(ur_device_handle_t hDevice,
ze_kernel_handle_t hZeKernel,
size_t GlobalWorkSize3D[3],
Expand Down
2 changes: 2 additions & 0 deletions source/adapters/level_zero/platform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,8 @@ ur_result_t ur_platform_handle_t_::populateDeviceCacheIfNeeded() {
return UR_RESULT_SUCCESS;
}

size_t ur_platform_handle_t_::getNumDevices() { return URDevicesCache.size(); }

ur_device_handle_t ur_platform_handle_t_::getDeviceById(DeviceId id) {
for (auto &dev : URDevicesCache) {
if (dev->Id == id) {
Expand Down
2 changes: 2 additions & 0 deletions source/adapters/level_zero/platform.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ struct ur_platform_handle_t_ : public _ur_platform {
// Check the device cache and load it if necessary.
ur_result_t populateDeviceCacheIfNeeded();

size_t getNumDevices();

ur_device_handle_t getDeviceById(DeviceId);

// Return the PI device from cache that represents given native device.
Expand Down
28 changes: 17 additions & 11 deletions source/adapters/level_zero/program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,14 @@
//===----------------------------------------------------------------------===//

#include "program.hpp"
#include "device.hpp"
#include "logger/ur_logger.hpp"
#include "ur_level_zero.hpp"

#ifdef UR_ADAPTER_LEVEL_ZERO_V2
#include "context.hpp"
#else
#include "v2/context.hpp"
#endif

extern "C" {
// Check to see if a Level Zero module has any unresolved symbols.
Expand Down Expand Up @@ -115,8 +121,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild(
const char *Options ///< [in][optional] pointer to build options
///< null-terminated string.
) {
return urProgramBuildExp(Program, Context->Devices.size(),
Context->Devices.data(), Options);
std::vector<ur_device_handle_t> Devices = Context->getDevices();
return urProgramBuildExp(Program, Devices.size(), Devices.data(), Options);
}

UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp(
Expand Down Expand Up @@ -174,7 +180,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp(

for (uint32_t i = 0; i < numDevices; i++) {
ze_device_handle_t ZeDevice = phDevices[i]->ZeDevice;
ze_context_handle_t ZeContext = hProgram->Context->ZeContext;
ze_context_handle_t ZeContext = hProgram->Context->getZeHandle();
ze_module_handle_t ZeModuleHandle = nullptr;
ze_module_build_log_handle_t ZeBuildLog{};

Expand Down Expand Up @@ -266,7 +272,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCompile(
// ze-opt-greater-than-4GB-buffer-required to disable
// stateful optimizations and be able to use larger than
// 4GB allocations on these kernels.
if (Context->Devices[0]->useRelaxedAllocationLimits()) {
if (Context->getDevices()[0]->useRelaxedAllocationLimits()) {
Program->BuildFlags += " -ze-opt-greater-than-4GB-buffer-required";
}
}
Expand All @@ -285,9 +291,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramLink(
ur_program_handle_t
*Program ///< [out] pointer to handle of program object created.
) {
return urProgramLinkExp(Context, Context->Devices.size(),
Context->Devices.data(), Count, Programs, Options,
Program);
std::vector<ur_device_handle_t> Devices = Context->getDevices();
return urProgramLinkExp(Context, Devices.size(), Devices.data(), Count,
Programs, Options, Program);
}

UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp(
Expand Down Expand Up @@ -426,7 +432,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp(

// Call the Level Zero API to compile, link, and create the module.
ze_device_handle_t ZeDevice = phDevices[i]->ZeDevice;
ze_context_handle_t ZeContext = hContext->ZeContext;
ze_context_handle_t ZeContext = hContext->getZeHandle();
ze_module_handle_t ZeModule = nullptr;
ze_module_build_log_handle_t ZeBuildLog = nullptr;
ze_result_t ZeResult =
Expand Down Expand Up @@ -652,7 +658,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetInfo(
for (auto &ZeModulePair : Program->ZeModuleMap) {
auto It = Program->ZeModuleMap.find(ZeModulePair.first);
if (It != Program->ZeModuleMap.end()) {
for (auto &Device : Program->Context->Devices) {
for (auto &Device : Program->Context->getDevices()) {
if (Device->ZeDevice == ZeModulePair.first) {
devices.push_back(Device);
}
Expand All @@ -661,7 +667,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetInfo(
}
return ReturnValue(devices.data(), devices.size());
} else {
return ReturnValue(Program->Context->Devices[0]);
return ReturnValue(Program->Context->getDevices()[0]);
}
case UR_PROGRAM_INFO_BINARY_SIZES: {
std::shared_lock<ur_shared_mutex> Guard(Program->Mutex);
Expand Down
Loading

0 comments on commit 2dcccff

Please sign in to comment.