Skip to content

Commit

Permalink
Merge branch 'adapters' into eliminate-usage-of-regex-in-opencl
Browse files Browse the repository at this point in the history
  • Loading branch information
omarahmed1111 committed Nov 8, 2023
2 parents 56f6994 + 612a263 commit 2af233a
Show file tree
Hide file tree
Showing 39 changed files with 258 additions and 133 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,10 @@ jobs:
run: LD_LIBRARY_PATH=${{github.workspace}}/dpcpp_compiler/lib
cmake --build ${{github.workspace}}/build -j $(nproc)

- name: Test adapter specific
working-directory: ${{github.workspace}}/build
run: ctest -C ${{matrix.build_type}} --output-on-failure -L "adapter-specific" --timeout 180

# Temporarily disabling platform test for L0, because of hang
# See issue: #824
- name: Test L0 adapter
Expand Down
2 changes: 1 addition & 1 deletion examples/codegen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,5 +43,5 @@ if(LLVM_FOUND AND PkgConfig_FOUND AND LLVMSPIRVLib_FOUND)
)
endif()
else()
message(STATUS "The environment did not satisfy dependency requirements (LLVM, PkgConfig, LLVMSPIRVLib) for codegen example (skipping target).")
message(FATAL_ERROR "The environment did not satisfy dependency requirements (LLVM, PkgConfig, LLVMSPIRVLib) for codegen example (skipping target).")
endif()
21 changes: 21 additions & 0 deletions scripts/generate_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,25 @@ def _mako_info_hpp(path, namespace, tags, version, specs, meta):
specs=specs,
meta=meta)

"""
Entry-point:
generates linker version scripts
"""
def _mako_linker_scripts(path, ext, namespace, tags, version, specs, meta):
name = "adapter"
filename = f"{name}.{ext}.in"
fin = os.path.join(templates_dir, f"{filename}.mako")
fout = os.path.join(path, filename)
print("Generating %s..." % fout)
return util.makoWrite(
fin, fout,
name=name,
ver=version,
namespace=namespace,
tags=tags,
specs=specs,
meta=meta)

"""
Entry-point:
generates lib code
Expand Down Expand Up @@ -349,6 +368,8 @@ def generate_adapters(path, section, namespace, tags, version, specs, meta):

loc = 0
loc += _mako_null_adapter_cpp(dstpath, namespace, tags, version, specs, meta)
loc += _mako_linker_scripts(dstpath, "map", namespace, tags, version, specs, meta)
loc += _mako_linker_scripts(dstpath, "def", namespace, tags, version, specs, meta)
print("Generated %s lines of code.\n"%loc)

"""
Expand Down
11 changes: 11 additions & 0 deletions scripts/templates/adapter.def.in.mako
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<%!
import re
from templates import helper as th
%><%
n=namespace
%>\
LIBRARY @TARGET_LIBNAME@
EXPORTS
%for tbl in th.get_pfntables(specs, meta, n, tags):
${tbl['export']['name']}
%endfor
14 changes: 14 additions & 0 deletions scripts/templates/adapter.map.in.mako
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<%!
import re
from templates import helper as th
%><%
n=namespace
%>\
@TARGET_LIBNAME@ {
global:
%for tbl in th.get_pfntables(specs, meta, n, tags):
${tbl['export']['name']};
%endfor
local:
*;
};
6 changes: 3 additions & 3 deletions source/adapters/adapter.def.in
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
LIBRARY @TARGET_LIBNAME@
EXPORTS
urGetGlobalProcAddrTable
urGetBindlessImagesExpProcAddrTable
urGetCommandBufferExpProcAddrTable
urGetContextProcAddrTable
urGetDeviceProcAddrTable
urGetEnqueueProcAddrTable
urGetEventProcAddrTable
urGetGlobalProcAddrTable
urGetKernelProcAddrTable
urGetMemProcAddrTable
urGetPhysicalMemProcAddrTable
urGetPlatformProcAddrTable
urGetProgramProcAddrTable
urGetQueueProcAddrTable
urGetSamplerProcAddrTable
urGetUSMProcAddrTable
urGetUSMExpProcAddrTable
urGetUsmP2PExpProcAddrTable
urGetUSMProcAddrTable
urGetVirtualMemProcAddrTable
urGetDeviceProcAddrTable
6 changes: 3 additions & 3 deletions source/adapters/adapter.map.in
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
@TARGET_LIBNAME@ {
global:
urGetGlobalProcAddrTable;
urGetBindlessImagesExpProcAddrTable;
urGetCommandBufferExpProcAddrTable;
urGetContextProcAddrTable;
urGetDeviceProcAddrTable;
urGetEnqueueProcAddrTable;
urGetEventProcAddrTable;
urGetGlobalProcAddrTable;
urGetKernelProcAddrTable;
urGetMemProcAddrTable;
urGetPhysicalMemProcAddrTable;
urGetPlatformProcAddrTable;
urGetProgramProcAddrTable;
urGetQueueProcAddrTable;
urGetSamplerProcAddrTable;
urGetUSMProcAddrTable;
urGetUSMExpProcAddrTable;
urGetUsmP2PExpProcAddrTable;
urGetUSMProcAddrTable;
urGetVirtualMemProcAddrTable;
urGetDeviceProcAddrTable;
local:
*;
};
2 changes: 0 additions & 2 deletions source/adapters/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
# See LICENSE.TXT
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

set(CUDA_DIR "${CMAKE_CURRENT_SOURCE_DIR}" CACHE PATH "CUDA adapter directory")

set(TARGET_NAME ur_adapter_cuda)

add_ur_adapter(${TARGET_NAME}
Expand Down
8 changes: 2 additions & 6 deletions source/adapters/cuda/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -501,12 +501,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
return ReturnValue(
static_cast<uint64_t>(hDevice->getMaxChosenLocalMem()));
} else {
int LocalMemSize = 0;
UR_CHECK_ERROR(cuDeviceGetAttribute(
&LocalMemSize, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK,
hDevice->get()));
detail::ur::assertion(LocalMemSize >= 0);
return ReturnValue(static_cast<uint64_t>(LocalMemSize));
return ReturnValue(
static_cast<uint64_t>(hDevice->getMaxCapacityLocalMem()));
}
}
case UR_DEVICE_INFO_ERROR_CORRECTION_SUPPORT: {
Expand Down
6 changes: 3 additions & 3 deletions source/adapters/cuda/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ struct ur_device_handle_t_ {
UR_CHECK_ERROR(cuDeviceGetAttribute(
&MaxRegsPerBlock, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK,
cuDevice));
UR_CHECK_ERROR(cuDeviceGetAttribute(
&MaxCapacityLocalMem,
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN, cuDevice));

// Set local mem max size if env var is present
static const char *LocalMemSizePtrUR =
Expand All @@ -56,9 +59,6 @@ struct ur_device_handle_t_ {
: (LocalMemSizePtrPI ? LocalMemSizePtrPI : nullptr);

if (LocalMemSizePtr) {
cuDeviceGetAttribute(
&MaxCapacityLocalMem,
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN, cuDevice);
MaxChosenLocalMem = std::atoi(LocalMemSizePtr);
MaxLocalMemSizeChosen = true;
}
Expand Down
19 changes: 12 additions & 7 deletions source/adapters/cuda/enqueue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -284,9 +284,15 @@ setKernelParams(const ur_context_handle_t Context,
CudaImplicitOffset);
}

if (Context->getDevice()->maxLocalMemSizeChosen()) {
auto Device = Context->getDevice();
if (LocalSize > static_cast<uint32_t>(Device->getMaxCapacityLocalMem())) {
setErrorMessage("Excessive allocation of local memory on the device",
UR_RESULT_ERROR_ADAPTER_SPECIFIC);
return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
}

if (Device->maxLocalMemSizeChosen()) {
// Set up local memory requirements for kernel.
auto Device = Context->getDevice();
if (Device->getMaxChosenLocalMem() < 0) {
bool EnvVarHasURPrefix =
std::getenv("UR_CUDA_MAX_LOCAL_MEM_SIZE") != nullptr;
Expand All @@ -297,11 +303,6 @@ setKernelParams(const ur_context_handle_t Context,
UR_RESULT_ERROR_ADAPTER_SPECIFIC);
return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
}
if (LocalSize > static_cast<uint32_t>(Device->getMaxCapacityLocalMem())) {
setErrorMessage("Too much local memory allocated for device",
UR_RESULT_ERROR_ADAPTER_SPECIFIC);
return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
}
if (LocalSize > static_cast<uint32_t>(Device->getMaxChosenLocalMem())) {
bool EnvVarHasURPrefix =
std::getenv("UR_CUDA_MAX_LOCAL_MEM_SIZE") != nullptr;
Expand All @@ -319,6 +320,10 @@ setKernelParams(const ur_context_handle_t Context,
UR_CHECK_ERROR(cuFuncSetAttribute(
CuFunc, CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES,
Device->getMaxChosenLocalMem()));

} else {
UR_CHECK_ERROR(cuFuncSetAttribute(
CuFunc, CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, LocalSize));
}

} catch (ur_result_t Err) {
Expand Down
2 changes: 0 additions & 2 deletions source/adapters/hip/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
# See LICENSE.TXT
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

set(HIP_DIR "${CMAKE_CURRENT_SOURCE_DIR}" CACHE PATH "HIP adapter directory")

set(TARGET_NAME ur_adapter_hip)

# Set default UR HIP platform to AMD
Expand Down
16 changes: 10 additions & 6 deletions source/adapters/opencl/adapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,28 +12,29 @@

struct ur_adapter_handle_t_ {
std::atomic<uint32_t> RefCount = 0;
std::mutex Mutex;
};

ur_adapter_handle_t_ adapter{};

UR_APIEXPORT ur_result_t UR_APICALL urInit(ur_device_init_flags_t,
ur_loader_config_handle_t) {
cl_ext::ExtFuncPtrCache = new cl_ext::ExtFuncPtrCacheT();
return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL urTearDown(void *) {
if (cl_ext::ExtFuncPtrCache) {
delete cl_ext::ExtFuncPtrCache;
cl_ext::ExtFuncPtrCache = nullptr;
}
return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL
urAdapterGet(uint32_t NumEntries, ur_adapter_handle_t *phAdapters,
uint32_t *pNumAdapters) {
if (NumEntries > 0 && phAdapters) {
std::lock_guard<std::mutex> Lock{adapter.Mutex};
if (adapter.RefCount++ == 0) {
cl_ext::ExtFuncPtrCache = std::make_unique<cl_ext::ExtFuncPtrCacheT>();
}

*phAdapters = &adapter;
}

Expand All @@ -50,7 +51,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) {
}

UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) {
--adapter.RefCount;
std::lock_guard<std::mutex> Lock{adapter.Mutex};
if (--adapter.RefCount == 0) {
cl_ext::ExtFuncPtrCache.reset();
}
return UR_RESULT_SUCCESS;
}

Expand Down
2 changes: 1 addition & 1 deletion source/adapters/opencl/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ struct ExtFuncPtrCacheT {
// piTeardown to avoid issues with static destruction order (a user application
// might have static objects that indirectly access this cache in their
// destructor).
inline ExtFuncPtrCacheT *ExtFuncPtrCache;
inline std::unique_ptr<ExtFuncPtrCacheT> ExtFuncPtrCache;

// USM helper function to get an extension function pointer
template <typename T>
Expand Down
8 changes: 4 additions & 4 deletions source/adapters/opencl/enqueue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -350,9 +350,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe(
return mapCLErrorToUR(CLErr);
}

clEnqueueReadHostPipeINTEL_fn FuncPtr = nullptr;
cl_ext::clEnqueueReadHostPipeINTEL_fn FuncPtr = nullptr;
ur_result_t RetVal =
cl_ext::getExtFuncFromContext<clEnqueueReadHostPipeINTEL_fn>(
cl_ext::getExtFuncFromContext<cl_ext::clEnqueueReadHostPipeINTEL_fn>(
CLContext, cl_ext::ExtFuncPtrCache->clEnqueueReadHostPipeINTELCache,
cl_ext::EnqueueReadHostPipeName, &FuncPtr);

Expand Down Expand Up @@ -382,9 +382,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe(
return mapCLErrorToUR(CLErr);
}

clEnqueueWriteHostPipeINTEL_fn FuncPtr = nullptr;
cl_ext::clEnqueueWriteHostPipeINTEL_fn FuncPtr = nullptr;
ur_result_t RetVal =
cl_ext::getExtFuncFromContext<clEnqueueWriteHostPipeINTEL_fn>(
cl_ext::getExtFuncFromContext<cl_ext::clEnqueueWriteHostPipeINTEL_fn>(
CLContext, cl_ext::ExtFuncPtrCache->clEnqueueWriteHostPipeINTELCache,
cl_ext::EnqueueWriteHostPipeName, &FuncPtr);

Expand Down
2 changes: 2 additions & 0 deletions source/adapters/opencl/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
//===----------------------------------------------------------------------===//
#include "common.hpp"

#include <memory>

UR_APIEXPORT ur_result_t UR_APICALL
urKernelCreate(ur_program_handle_t hProgram, const char *pKernelName,
ur_kernel_handle_t *phKernel) {
Expand Down
58 changes: 48 additions & 10 deletions source/adapters/opencl/program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -316,20 +316,58 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants(
CL_RETURN_ON_FAILURE(clGetProgramInfo(CLProg, CL_PROGRAM_CONTEXT, sizeof(Ctx),
&Ctx, &RetSize));

cl_ext::clSetProgramSpecializationConstant_fn F = nullptr;
const ur_result_t URResult = cl_ext::getExtFuncFromContext<decltype(F)>(
Ctx, cl_ext::ExtFuncPtrCache->clSetProgramSpecializationConstantCache,
cl_ext::SetProgramSpecializationConstantName, &F);
std::unique_ptr<std::vector<cl_device_id>> DevicesInCtx;
cl_adapter::getDevicesFromContext(cl_adapter::cast<ur_context_handle_t>(Ctx),
DevicesInCtx);

if (URResult != UR_RESULT_SUCCESS) {
return URResult;
}
cl_platform_id CurPlatform;
clGetDeviceInfo((*DevicesInCtx)[0], CL_DEVICE_PLATFORM,
sizeof(cl_platform_id), &CurPlatform, nullptr);

oclv::OpenCLVersion PlatVer;
cl_adapter::getPlatformVersion(CurPlatform, PlatVer);

bool UseExtensionLookup = false;
if (PlatVer < oclv::V2_2) {
UseExtensionLookup = true;
} else {
for (cl_device_id Dev : *DevicesInCtx) {
oclv::OpenCLVersion DevVer;

for (uint32_t i = 0; i < count; ++i) {
CL_RETURN_ON_FAILURE(F(CLProg, pSpecConstants[i].id, pSpecConstants[i].size,
pSpecConstants[i].pValue));
cl_adapter::getDeviceVersion(Dev, DevVer);

if (DevVer < oclv::V2_2) {
UseExtensionLookup = true;
break;
}
}
}

if (UseExtensionLookup == false) {
for (uint32_t i = 0; i < count; ++i) {
CL_RETURN_ON_FAILURE(clSetProgramSpecializationConstant(
CLProg, pSpecConstants[i].id, pSpecConstants[i].size,
pSpecConstants[i].pValue));
}
} else {
cl_ext::clSetProgramSpecializationConstant_fn
SetProgramSpecializationConstant = nullptr;
const ur_result_t URResult = cl_ext::getExtFuncFromContext<
decltype(SetProgramSpecializationConstant)>(
Ctx, cl_ext::ExtFuncPtrCache->clSetProgramSpecializationConstantCache,
cl_ext::SetProgramSpecializationConstantName,
&SetProgramSpecializationConstant);

if (URResult != UR_RESULT_SUCCESS) {
return URResult;
}

for (uint32_t i = 0; i < count; ++i) {
CL_RETURN_ON_FAILURE(SetProgramSpecializationConstant(
CLProg, pSpecConstants[i].id, pSpecConstants[i].size,
pSpecConstants[i].pValue));
}
}
return UR_RESULT_SUCCESS;
}

Expand Down
1 change: 1 addition & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ enable_testing()

add_subdirectory(python)
add_subdirectory(loader)
add_subdirectory(adapters)
add_subdirectory(conformance)
add_subdirectory(unified_malloc_framework)
add_subdirectory(usm)
Expand Down
Loading

0 comments on commit 2af233a

Please sign in to comment.