Skip to content

Commit

Permalink
Libomptarget adapter proof-of-concept
Browse files Browse the repository at this point in the history
  • Loading branch information
callumfare committed Jan 24, 2024
1 parent 8007b22 commit b395eef
Show file tree
Hide file tree
Showing 32 changed files with 5,321 additions and 0 deletions.
4 changes: 4 additions & 0 deletions source/adapters/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,7 @@ endif()
if(UR_BUILD_ADAPTER_NATIVE_CPU OR UR_BUILD_ADAPTER_ALL)
add_subdirectory(native_cpu)
endif()

if(UR_BUILD_ADAPTER_LIBOMPTARGET OR UR_BUILD_ADAPTER_ALL)
add_subdirectory(libomptarget)
endif()
4 changes: 4 additions & 0 deletions source/adapters/libomptarget/.clang-format
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
Language: Cpp
BasedOnStyle: LLVM
...
68 changes: 68 additions & 0 deletions source/adapters/libomptarget/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Copyright (C) 2024 Intel Corporation
# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
# See LICENSE.TXT
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

set(TARGET_NAME ur_adapter_libomptarget)

set(UR_LIBOMPTARGET_LLVM_DIR "" CACHE PATH "Directory containing LLVM install")
set(UR_LIBOMPTARGET_INCLUDES_DIR "" CACHE PATH "Directory containing LLVM headers for libomptarget (e.g. <llvm>/openmp/libomptarget/include)")

if (UR_LIBOMPTARGET_LLVM_DIR STREQUAL "" OR UR_LIBOMPTARGET_INCLUDES_DIR STREQUAL "")
message(FATAL_ERROR "UR_LIBOMPTARGET_LLVM_DIR and UR_LIBOMPTARGET_INCLUDES_DIR must be defined for the libomptarget adapter")
endif()

find_package(CUDAToolkit QUIET)

add_ur_adapter(${TARGET_NAME} SHARED
${CMAKE_CURRENT_SOURCE_DIR}/ur_interface_loader.cpp
${CMAKE_CURRENT_SOURCE_DIR}/adapter.hpp
${CMAKE_CURRENT_SOURCE_DIR}/adapter.cpp
${CMAKE_CURRENT_SOURCE_DIR}/command_buffer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/common.hpp
${CMAKE_CURRENT_SOURCE_DIR}/common.cpp
${CMAKE_CURRENT_SOURCE_DIR}/context.hpp
${CMAKE_CURRENT_SOURCE_DIR}/context.cpp
${CMAKE_CURRENT_SOURCE_DIR}/device.hpp
${CMAKE_CURRENT_SOURCE_DIR}/device.cpp
${CMAKE_CURRENT_SOURCE_DIR}/enqueue.cpp
${CMAKE_CURRENT_SOURCE_DIR}/event.cpp
${CMAKE_CURRENT_SOURCE_DIR}/event.hpp
${CMAKE_CURRENT_SOURCE_DIR}/image.cpp
${CMAKE_CURRENT_SOURCE_DIR}/kernel.cpp
${CMAKE_CURRENT_SOURCE_DIR}/kernel.hpp
${CMAKE_CURRENT_SOURCE_DIR}/memory.cpp
${CMAKE_CURRENT_SOURCE_DIR}/memory.hpp
${CMAKE_CURRENT_SOURCE_DIR}/platform.hpp
${CMAKE_CURRENT_SOURCE_DIR}/platform.cpp
${CMAKE_CURRENT_SOURCE_DIR}/program.cpp
${CMAKE_CURRENT_SOURCE_DIR}/program.hpp
${CMAKE_CURRENT_SOURCE_DIR}/queue.cpp
${CMAKE_CURRENT_SOURCE_DIR}/queue.hpp
${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp
${CMAKE_CURRENT_SOURCE_DIR}/usm.cpp
${CMAKE_CURRENT_SOURCE_DIR}/usm.hpp
${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.hpp
)

set_target_properties(${TARGET_NAME} PROPERTIES
VERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}"
SOVERSION "${PROJECT_VERSION_MAJOR}"
)

target_link_libraries(${TARGET_NAME} PRIVATE
${PROJECT_NAME}::headers
${PROJECT_NAME}::common
CUDA::cuda_driver
${UR_LIBOMPTARGET_LLVM_DIR}/lib/libomptarget.so
${UR_LIBOMPTARGET_LLVM_DIR}/lib/libomptarget.rtl.cuda.nextgen.so
)

target_include_directories(${TARGET_NAME} PRIVATE
"${CMAKE_CURRENT_SOURCE_DIR}/../../"
${UR_LIBOMPTARGET_LLVM_DIR}/include
${UR_LIBOMPTARGET_INCLUDES_DIR}
${CUDAToolkit_INCLUDE_DIRS}
)
34 changes: 34 additions & 0 deletions source/adapters/libomptarget/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# libomptarget adapter
This adapter is a very rough proof-of-concept which was implemented to carry out
gap analysis between Unified Runtime and the libomptarget plugin interfaces.
There is enough functionality to run some basic SYCL programs of certain work
sizes.

There were a lot of gaps found and generally these have been documented in the
adapter code. The adapter is hard-coded to use the CUDA libomptarget plugin,
and as libomptarget lacks a way to programmatically query information about the
context and device, we have hard-coded it to use CUDA directly for these
features. If we did not do this then no SYCL application would be able to work.


## Building

An LLVM build with the `openmp` project enabled is required. This adapter was
developed in August 2023 and as such will likely not work with the most up to
date LLVM. To check out a known good version:
```
git clone https://github.com/llvm/llvm-project/
git checkout af35be5
```

Once LLVM with OpenMP is built, the adapter can be enabled:

```
cmake .. -DUR_BUILD_ADAPTER_LIBOMPTARGET=ON\
-DUR_LIBOMPTARGET_LLVM_DIR=<llvm install dir>\
-DUR_LIBOMPTARGET_INCLUDES_DIR=<llvm source dir>/openmp/libomptarget/include
```

* `UR_LIBOMPTARGET_LLVM_DIR` should point to the install directory of your LLVM build

* `UR_LIBOMPTARGET_INCLUDES_DIR` should point to the directory containing `omptargetplugin.h` in the LLVM source
71 changes: 71 additions & 0 deletions source/adapters/libomptarget/adapter.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
//===--------- adapter.cpp - Libomptarget Adapter --------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===-----------------------------------------------------------------===//

#include "adapter.hpp"

ur_adapter_handle_t_ adapter{};

UR_APIEXPORT ur_result_t UR_APICALL
urAdapterGet(uint32_t NumEntries, ur_adapter_handle_t *phAdapters,
uint32_t *pNumAdapters) {
if (NumEntries > 0 && phAdapters) {
adapter.RefCount++;
*phAdapters = &adapter;
}

if (pNumAdapters) {
*pNumAdapters = 1;
}

return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL
urAdapterRetain([[maybe_unused]] ur_adapter_handle_t) {
std::lock_guard<std::mutex> Lock{adapter.Mutex};
if (++adapter.RefCount == 1) {
OMPT_RETURN_ON_FAILURE(__tgt_rtl_init_plugin());
}
return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL
urAdapterRelease([[maybe_unused]] ur_adapter_handle_t) {
std::lock_guard<std::mutex> Lock{adapter.Mutex};
if (--adapter.RefCount == 0) {
OMPT_RETURN_ON_FAILURE(__tgt_rtl_deinit_plugin());
}
return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL
urAdapterGetLastError([[maybe_unused]] ur_adapter_handle_t,
const char **ppMessage, int32_t *pError) {
*ppMessage = omptarget_adapter::ErrorMessage;
*pError = omptarget_adapter::ErrorMessageCode;

return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(
[[maybe_unused]] ur_adapter_handle_t, ur_adapter_info_t propName,
size_t propSize, void *pPropValue, size_t *pPropSizeRet) {
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);

switch (propName) {
case UR_ADAPTER_INFO_BACKEND:
/* Using UNKNOWN since there is no libomptarget enum at the moment */
return ReturnValue(UR_ADAPTER_BACKEND_CUDA);
case UR_ADAPTER_INFO_REFERENCE_COUNT:
return ReturnValue(adapter.RefCount.load());
default:
return UR_RESULT_ERROR_INVALID_ENUMERATION;
}

return UR_RESULT_SUCCESS;
}
17 changes: 17 additions & 0 deletions source/adapters/libomptarget/adapter.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
//===--------- adapter.hpp - Libomptarget Adapter --------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===-----------------------------------------------------------------===//
#pragma once

#include "common.hpp"

struct ur_adapter_handle_t_ {
std::atomic<uint32_t> RefCount = 0;
std::mutex Mutex;
};

extern ur_adapter_handle_t_ adapter;
169 changes: 169 additions & 0 deletions source/adapters/libomptarget/command_buffer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
//===--------- command_buffer.cpp - Libomptarget Adapter --------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===-----------------------------------------------------------------===//

#include "common.hpp"

/// Stub implementations of UR experimental feature command-buffers

UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
[[maybe_unused]] ur_context_handle_t hContext,
[[maybe_unused]] ur_device_handle_t hDevice,
[[maybe_unused]] const ur_exp_command_buffer_desc_t *pCommandBufferDesc,
[[maybe_unused]] ur_exp_command_buffer_handle_t *phCommandBuffer) {
OMPT_DIE("Feature is not implemented");
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainExp(
[[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer) {
OMPT_DIE("Feature is not implemented");
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseExp(
[[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer) {
OMPT_DIE("Feature is not implemented");
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferFinalizeExp(
[[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer) {
OMPT_DIE("Feature is not implemented");
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
[[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer,
[[maybe_unused]] ur_kernel_handle_t hKernel,
[[maybe_unused]] uint32_t workDim,
[[maybe_unused]] const size_t *pGlobalWorkOffset,
[[maybe_unused]] const size_t *pGlobalWorkSize,
[[maybe_unused]] const size_t *pLocalWorkSize,
[[maybe_unused]] uint32_t numSyncPointsInWaitList,
[[maybe_unused]] const ur_exp_command_buffer_sync_point_t
*pSyncPointWaitList,
[[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) {
OMPT_DIE("Feature is not implemented");
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp(
[[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer,
[[maybe_unused]] void *pDst, [[maybe_unused]] const void *pSrc,
[[maybe_unused]] size_t size,
[[maybe_unused]] uint32_t numSyncPointsInWaitList,
[[maybe_unused]] const ur_exp_command_buffer_sync_point_t
*pSyncPointWaitList,
[[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) {
OMPT_DIE("Feature is not implemented");
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp(
[[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer,
[[maybe_unused]] ur_mem_handle_t hSrcMem,
[[maybe_unused]] ur_mem_handle_t hDstMem, [[maybe_unused]] size_t srcOffset,
[[maybe_unused]] size_t dstOffset, [[maybe_unused]] size_t size,
[[maybe_unused]] uint32_t numSyncPointsInWaitList,
[[maybe_unused]] const ur_exp_command_buffer_sync_point_t
*pSyncPointWaitList,
[[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) {
OMPT_DIE("Feature is not implemented");
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp(
[[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer,
[[maybe_unused]] ur_mem_handle_t hSrcMem,
[[maybe_unused]] ur_mem_handle_t hDstMem,
[[maybe_unused]] ur_rect_offset_t srcOrigin,
[[maybe_unused]] ur_rect_offset_t dstOrigin,
[[maybe_unused]] ur_rect_region_t region,
[[maybe_unused]] size_t srcRowPitch, [[maybe_unused]] size_t srcSlicePitch,
[[maybe_unused]] size_t dstRowPitch, [[maybe_unused]] size_t dstSlicePitch,
[[maybe_unused]] uint32_t numSyncPointsInWaitList,
[[maybe_unused]] const ur_exp_command_buffer_sync_point_t
*pSyncPointWaitList,
[[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) {
OMPT_DIE("Feature is not implemented");
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

UR_APIEXPORT
ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp(
[[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer,
[[maybe_unused]] ur_mem_handle_t hBuffer, [[maybe_unused]] size_t offset,
[[maybe_unused]] size_t size, [[maybe_unused]] const void *pSrc,
[[maybe_unused]] uint32_t numSyncPointsInWaitList,
[[maybe_unused]] const ur_exp_command_buffer_sync_point_t
*pSyncPointWaitList,
[[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) {
OMPT_DIE("Feature is not implemented");
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

UR_APIEXPORT
ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp(
[[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer,
[[maybe_unused]] ur_mem_handle_t hBuffer, [[maybe_unused]] size_t offset,
[[maybe_unused]] size_t size, [[maybe_unused]] void *pDst,
[[maybe_unused]] uint32_t numSyncPointsInWaitList,
[[maybe_unused]] const ur_exp_command_buffer_sync_point_t
*pSyncPointWaitList,
[[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) {
OMPT_DIE("Feature is not implemented");
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

UR_APIEXPORT
ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp(
[[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer,
[[maybe_unused]] ur_mem_handle_t hBuffer,
[[maybe_unused]] ur_rect_offset_t bufferOffset,
[[maybe_unused]] ur_rect_offset_t hostOffset,
[[maybe_unused]] ur_rect_region_t region,
[[maybe_unused]] size_t bufferRowPitch,
[[maybe_unused]] size_t bufferSlicePitch,
[[maybe_unused]] size_t hostRowPitch,
[[maybe_unused]] size_t hostSlicePitch, [[maybe_unused]] void *pSrc,
[[maybe_unused]] uint32_t numSyncPointsInWaitList,
[[maybe_unused]] const ur_exp_command_buffer_sync_point_t
*pSyncPointWaitList,
[[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) {
OMPT_DIE("Feature is not implemented");
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

UR_APIEXPORT
ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp(
[[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer,
[[maybe_unused]] ur_mem_handle_t hBuffer,
[[maybe_unused]] ur_rect_offset_t bufferOffset,
[[maybe_unused]] ur_rect_offset_t hostOffset,
[[maybe_unused]] ur_rect_region_t region,
[[maybe_unused]] size_t bufferRowPitch,
[[maybe_unused]] size_t bufferSlicePitch,
[[maybe_unused]] size_t hostRowPitch,
[[maybe_unused]] size_t hostSlicePitch, [[maybe_unused]] void *pDst,
[[maybe_unused]] uint32_t numSyncPointsInWaitList,
[[maybe_unused]] const ur_exp_command_buffer_sync_point_t
*pSyncPointWaitList,
[[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) {
OMPT_DIE("Feature is not implemented");
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
[[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer,
[[maybe_unused]] ur_queue_handle_t hQueue,
[[maybe_unused]] uint32_t numEventsInWaitList,
[[maybe_unused]] const ur_event_handle_t *phEventWaitList,
[[maybe_unused]] ur_event_handle_t *phEvent) {
OMPT_DIE("Feature is not implemented");
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
Loading

0 comments on commit b395eef

Please sign in to comment.