Skip to content

Commit

Permalink
Merge pull request #1913 from igchor/separate_adapter
Browse files Browse the repository at this point in the history
[L0 v2] Make L0 v2 implementation a seperate adapter
  • Loading branch information
pbalcer committed Aug 6, 2024
2 parents 9024918 + c5d8106 commit d2ffcce
Show file tree
Hide file tree
Showing 48 changed files with 2,039 additions and 146 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ option(UR_BUILD_ADAPTER_CUDA "Build the CUDA adapter" OFF)
option(UR_BUILD_ADAPTER_HIP "Build the HIP adapter" OFF)
option(UR_BUILD_ADAPTER_NATIVE_CPU "Build the Native-CPU adapter" OFF)
option(UR_BUILD_ADAPTER_ALL "Build all currently supported adapters" OFF)
option(UR_BUILD_ADAPTER_L0_V2 "Build the (experimental) Level-Zero v2 adapter" OFF)
option(UR_BUILD_EXAMPLE_CODEGEN "Build the codegen example." OFF)
option(VAL_USE_LIBBACKTRACE_BACKTRACE "enable libbacktrace validation backtrace for linux" OFF)
option(UR_ENABLE_ASSERTIONS "Enable assertions for all build types" OFF)
Expand Down
2 changes: 1 addition & 1 deletion scripts/generate_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,7 @@ def generate_level_zero_queue_api(path, section, namespace, tags, version, specs

name = "queue_api"
filename = "queue_api.cpp"
layer_dstpath = os.path.join(path, "adapters/level_zero")
layer_dstpath = os.path.join(path, "adapters", "level_zero", "v2")
os.makedirs(layer_dstpath, exist_ok=True)
fout = os.path.join(layer_dstpath, filename)

Expand Down
85 changes: 78 additions & 7 deletions source/adapters/level_zero/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,6 @@ add_ur_adapter(${TARGET_NAME}
${CMAKE_CURRENT_SOURCE_DIR}/queue_api.hpp
${CMAKE_CURRENT_SOURCE_DIR}/queue.hpp
${CMAKE_CURRENT_SOURCE_DIR}/sampler.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_factory.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/context.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/command_list_cache.hpp
${CMAKE_CURRENT_SOURCE_DIR}/ur_level_zero.cpp
${CMAKE_CURRENT_SOURCE_DIR}/common.cpp
${CMAKE_CURRENT_SOURCE_DIR}/context.cpp
Expand All @@ -136,9 +132,6 @@ add_ur_adapter(${TARGET_NAME}
${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp
${CMAKE_CURRENT_SOURCE_DIR}/image.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/context.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/command_list_cache.cpp
)

if(NOT WIN32)
Expand Down Expand Up @@ -175,3 +168,81 @@ target_include_directories(${TARGET_NAME} PRIVATE
"${CMAKE_CURRENT_SOURCE_DIR}/../../"
LevelZeroLoader-Headers
)

if(UR_BUILD_ADAPTER_L0_V2)
add_ur_adapter(ur_adapter_level_zero_v2
SHARED
# sources shared with legacy adapter
${CMAKE_CURRENT_SOURCE_DIR}/adapter.hpp
${CMAKE_CURRENT_SOURCE_DIR}/common.hpp
${CMAKE_CURRENT_SOURCE_DIR}/device.hpp
${CMAKE_CURRENT_SOURCE_DIR}/platform.hpp
${CMAKE_CURRENT_SOURCE_DIR}/adapter.cpp
${CMAKE_CURRENT_SOURCE_DIR}/common.cpp
${CMAKE_CURRENT_SOURCE_DIR}/device.cpp
${CMAKE_CURRENT_SOURCE_DIR}/ur_interface_loader.cpp
${CMAKE_CURRENT_SOURCE_DIR}/platform.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp
# v2-only sources
${CMAKE_CURRENT_SOURCE_DIR}/v2/command_list_cache.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/context.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_pool_cache.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_pool.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_provider_counter.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_provider_normal.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_provider.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_api.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/api.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/command_list_cache.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/context.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_pool_cache.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_pool.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_provider_counter.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_provider_normal.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_api.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_create.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.cpp
)

# api.cpp contains NOT_SUPPORTED functions-only
set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/v2/api.cpp
PROPERTIES APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-unused-parameter")

if(NOT WIN32)
target_sources(ur_adapter_level_zero_v2
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/adapter_lib_init_linux.cpp
)
endif()

# TODO: fix level_zero adapter conversion warnings
target_compile_options(ur_adapter_level_zero_v2 PRIVATE
$<$<CXX_COMPILER_ID:MSVC>:/wd4805 /wd4244>
)

set_target_properties(ur_adapter_level_zero_v2 PROPERTIES
VERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}"
SOVERSION "${PROJECT_VERSION_MAJOR}"
)

if (WIN32)
# 0x800: Search for the DLL only in the System32 folder
target_link_options(ur_adapter_level_zero_v2 PUBLIC /DEPENDENTLOADFLAG:0x800)
endif()

target_link_libraries(ur_adapter_level_zero_v2 PRIVATE
${PROJECT_NAME}::headers
${PROJECT_NAME}::common
${PROJECT_NAME}::umf
LevelZeroLoader
LevelZeroLoader-Headers
)

target_include_directories(ur_adapter_level_zero_v2 PRIVATE
"${CMAKE_CURRENT_SOURCE_DIR}/../.."
LevelZeroLoader-Headers
)
endif()
4 changes: 1 addition & 3 deletions source/adapters/level_zero/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@
#include "queue.hpp"
#include "ur_level_zero.hpp"

#include "v2/context.hpp"

UR_APIEXPORT ur_result_t UR_APICALL urContextCreate(
uint32_t DeviceCount, ///< [in] the number of devices given in phDevices
const ur_device_handle_t
Expand All @@ -38,7 +36,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreate(
ZE2UR_CALL(zeContextCreate, (Platform->ZeDriver, &ContextDesc, &ZeContext));
try {
ur_context_handle_t_ *Context =
new v2::ur_context_handle_t_(ZeContext, DeviceCount, Devices, true);
new ur_context_handle_t_(ZeContext, DeviceCount, Devices, true);

Context->initialize();
*RetContext = reinterpret_cast<ur_context_handle_t>(Context);
Expand Down
8 changes: 0 additions & 8 deletions source/adapters/level_zero/queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@
#include "ur_util.hpp"
#include "ze_api.h"

#include "v2/queue_factory.hpp"

// Hard limit for the event completion batches.
static const uint64_t CompletionBatchesMax = [] {
// Default value chosen empirically to maximize the number of asynchronous
Expand Down Expand Up @@ -501,12 +499,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate(

UR_ASSERT(Context->isValidDevice(Device), UR_RESULT_ERROR_INVALID_DEVICE);

// optimized path for immediate, in-order command lists
if (v2::shouldUseQueueV2(Device, Flags)) {
*Queue = v2::createQueue(Context, Device, Props);
return UR_RESULT_SUCCESS;
}

// Create placeholder queues in the compute queue group.
// Actual L0 queues will be created at first use.
std::vector<ze_command_queue_handle_t> ZeComputeCommandQueues(
Expand Down
9 changes: 5 additions & 4 deletions source/adapters/level_zero/v2/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@

This is the home directory for L0 v2 adapter sources. This is a redesigned version of the L0 adapter that focuses on maximizing the performance of each queue mode individually (immediate/batched, in-order/out-of-order).

L0 v2 adapter can be enabled by setting `UR_L0_USE_QUEUE_V2=1` env variable. If the variable is not set, legacy path will be used.
L0 v2 adapter can be enabled by setting passing `UR_BUILD_ADAPTER_L0_V2=1` option to cmake. When enabled, `libur_adapter_level_zero_v2.[so|dll]` will be created.

# Code structure

v2 adapter only rewrites certain functions (mostly urEnqueue* functions) while reusing the rest. `ur_queue_handle_t` has become an abstract class and each enqueue function a virtual function.
v2 adapters is is a standalone adapter but reuses some logic from the legacy L0 adapter implementation - most notably: adapter.cpp, platform.cpp, device.cpp

Legacy enqeue path is implemented in `ur_queue_handle_legacy_t` which inherits from `ur_queue_handle_t`. For new, optimized path, each queue mode will be implemented as a separate queue class (e.g. `v2::ur_queue_immediate_in_order_t`) inheriting from `ur_queue_handle_t`.
Each queue mode will be implemented as a separate queue class (e.g. `v2::ur_queue_immediate_in_order_t`) inheriting from `ur_queue_handle_t` which is an abstract class
in v2 adapter.

`ur_queue_handle_t` is auto-generated by `make generate-code` - for every API function that accepts `ur_queue_handle_t` as a first parameter, new pure virtual method is created. The API function is then
auto-implemented (see ../queue_api.cpp) by dispatching to that virtual method. Developer is only responsbile for implementing that virtual function for every queue base class.
auto-implemented (see ./queue_api.cpp) by dispatching to that virtual method. Developer is only responsbile for implementing that virtual function for every queue base class.
Loading

0 comments on commit d2ffcce

Please sign in to comment.