diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index b667ce6860..0efd392e05 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -6,3 +6,11 @@ source/adapters/level_zero @oneapi-src/unified-runtime-level-zero-write # CUDA and HIP adapters source/adapters/cuda @oneapi-src/unified-runtime-cuda-write source/adapters/hip @oneapi-src/unified-runtime-hip-write + +# OpenCL adapter +source/adapters/opencl @oneapi-src/unified-runtime-opencl-write + +# Command-buffer experimental feature +source/adapters/**/command_buffer.* @oneapi-src/unified-runtime-command-buffer-write +scripts/core/EXP-COMMAND-BUFFER.rst @oneapi-src/unified-runtime-command-buffer-write +scripts/core/exp-command-buffer.yml @oneapi-src/unified-runtime-command-buffer-write diff --git a/source/adapters/CMakeLists.txt b/source/adapters/CMakeLists.txt index 58feef056d..14b6130efa 100644 --- a/source/adapters/CMakeLists.txt +++ b/source/adapters/CMakeLists.txt @@ -45,8 +45,5 @@ if(UR_BUILD_ADAPTER_HIP) endif() if(UR_BUILD_ADAPTER_OPENCL) - # Temporarily fetch the opencl adapter from a fork until the PR has been merged. - set(SYCL_ADAPTER_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/opencl") - FetchSource(https://github.com/fabiomestre/llvm.git opencl_adapter_unofficial "sycl/plugins/unified_runtime/ur" ${SYCL_ADAPTER_DIR}) add_subdirectory(opencl) endif() diff --git a/source/adapters/hip/context.cpp b/source/adapters/hip/context.cpp index 9779482e4b..8298d513d8 100644 --- a/source/adapters/hip/context.cpp +++ b/source/adapters/hip/context.cpp @@ -108,6 +108,7 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, UR_APIEXPORT ur_result_t UR_APICALL urContextRelease(ur_context_handle_t hContext) { if (hContext->decrementReferenceCount() == 0) { + hContext->invokeExtendedDeleters(); delete hContext; } return UR_RESULT_SUCCESS; diff --git a/source/adapters/level_zero/usm.cpp b/source/adapters/level_zero/usm.cpp index d75f3872b1..d06a0353e4 100644 --- a/source/adapters/level_zero/usm.cpp +++ b/source/adapters/level_zero/usm.cpp @@ -192,8 +192,9 @@ static ur_result_t USMDeviceAllocImpl(void **ResultPtr, reinterpret_cast(*ResultPtr) % Alignment == 0, UR_RESULT_ERROR_INVALID_VALUE); - return USMAllocationMakeResident(USMDeviceAllocationForceResidency, Context, - Device, *ResultPtr, Size); + USMAllocationMakeResident(USMDeviceAllocationForceResidency, Context, Device, + *ResultPtr, Size); + return UR_RESULT_SUCCESS; } static ur_result_t USMSharedAllocImpl(void **ResultPtr, @@ -224,9 +225,11 @@ static ur_result_t USMSharedAllocImpl(void **ResultPtr, reinterpret_cast(*ResultPtr) % Alignment == 0, UR_RESULT_ERROR_INVALID_VALUE); + USMAllocationMakeResident(USMSharedAllocationForceResidency, Context, Device, + *ResultPtr, Size); + // TODO: Handle PI_MEM_ALLOC_DEVICE_READ_ONLY. - return USMAllocationMakeResident(USMSharedAllocationForceResidency, Context, - Device, *ResultPtr, Size); + return UR_RESULT_SUCCESS; } static ur_result_t USMHostAllocImpl(void **ResultPtr, @@ -244,8 +247,9 @@ static ur_result_t USMHostAllocImpl(void **ResultPtr, reinterpret_cast(*ResultPtr) % Alignment == 0, UR_RESULT_ERROR_INVALID_VALUE); - return USMAllocationMakeResident(USMHostAllocationForceResidency, Context, - nullptr, *ResultPtr, Size); + USMAllocationMakeResident(USMHostAllocationForceResidency, Context, nullptr, + *ResultPtr, Size); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc( diff --git a/source/adapters/opencl/.clang-format b/source/adapters/opencl/.clang-format new file mode 100644 index 0000000000..c8daebc205 --- /dev/null +++ b/source/adapters/opencl/.clang-format @@ -0,0 +1,4 @@ +--- +Language: Cpp +BasedOnStyle: LLVM +... diff --git a/source/adapters/opencl/CMakeLists.txt b/source/adapters/opencl/CMakeLists.txt index da9c988552..dc43a68ffa 100644 --- a/source/adapters/opencl/CMakeLists.txt +++ b/source/adapters/opencl/CMakeLists.txt @@ -3,37 +3,37 @@ # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -set(OPENCL_DIR "${SYCL_ADAPTER_DIR}/sycl/plugins/unified_runtime/ur/adapters/opencl" CACHE PATH "OpenCL adapter directory") +set(OPENCL_DIR "${CMAKE_CURRENT_SOURCE_DIR}" CACHE PATH "OpenCL adapter directory") set(TARGET_NAME ur_adapter_opencl) add_ur_adapter(${TARGET_NAME} SHARED - ${OPENCL_DIR}/ur_interface_loader.cpp - ${OPENCL_DIR}/adapter.hpp - ${OPENCL_DIR}/adapter.cpp - ${OPENCL_DIR}/command_buffer.hpp - ${OPENCL_DIR}/command_buffer.cpp - ${OPENCL_DIR}/common.hpp - ${OPENCL_DIR}/common.cpp - ${OPENCL_DIR}/context.cpp - ${OPENCL_DIR}/context.hpp - ${OPENCL_DIR}/device.cpp - ${OPENCL_DIR}/device.hpp - ${OPENCL_DIR}/enqueue.cpp - ${OPENCL_DIR}/event.cpp - ${OPENCL_DIR}/image.cpp - ${OPENCL_DIR}/kernel.cpp - ${OPENCL_DIR}/memory.cpp - ${OPENCL_DIR}/platform.cpp - ${OPENCL_DIR}/platform.hpp - ${OPENCL_DIR}/program.cpp - ${OPENCL_DIR}/queue.cpp - ${OPENCL_DIR}/sampler.cpp - ${OPENCL_DIR}/usm.cpp - ${OPENCL_DIR}/usm_p2p.cpp - ${OPENCL_DIR}/../../ur.cpp - ${OPENCL_DIR}/../../ur.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/ur_interface_loader.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/adapter.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/adapter.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/command_buffer.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/command_buffer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/common.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/common.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/context.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/context.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/device.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/device.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/enqueue.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/event.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/image.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/memory.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/platform.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/platform.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/program.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/queue.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/usm.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.hpp ) set_target_properties(${TARGET_NAME} PROPERTIES @@ -43,44 +43,27 @@ set_target_properties(${TARGET_NAME} PROPERTIES find_package(Threads REQUIRED) -if (NOT DEFINED OpenCL_LIBRARY OR NOT DEFINED OpenCL_INCLUDE_DIR) - message(WARNING "OpenCL_LIBRARY and OpenCL_INCLUDE_DIR are not set. Using find_package() to find an OpenCL installation in the system.") +# The OpenCL target can be set manually on upstream cmake to avoid using find_package(). +if (NOT UR_OPENCL_ICD_LOADER_LIBRARY) find_package(OpenCL REQUIRED) + message(STATUS "OpenCL_LIBRARY: ${OpenCL_LIBRARY}") + message(STATUS "OpenCL_INCLUDE_DIR: ${OpenCL_INCLUDE_DIR}") + set(UR_OPENCL_ICD_LOADER_LIBRARY OpenCL::OpenCL) endif() -message(STATUS "OpenCL_LIBRARY: ${OpenCL_LIBRARY}") -message(STATUS "OpenCL_INCLUDE_DIR: ${OpenCL_INCLUDE_DIR}") - # Suppress a compiler message about undefined CL_TARGET_OPENCL_VERSION. # Define all symbols up to OpenCL 3.0. -target_compile_definitions(ur_adapter_opencl PRIVATE CL_TARGET_OPENCL_VERSION=300) - -# Make imported library global to use it within the project. -add_library(OpenCL-ICD SHARED IMPORTED GLOBAL) - -if (WIN32) - set_target_properties( - OpenCL-ICD PROPERTIES - IMPORTED_IMPLIB ${OpenCL_LIBRARY} - INTERFACE_INCLUDE_DIRECTORIES ${OpenCL_INCLUDE_DIR} - ) -else() - set_target_properties( - OpenCL-ICD PROPERTIES - IMPORTED_LOCATION ${OpenCL_LIBRARY} - INTERFACE_INCLUDE_DIRECTORIES ${OpenCL_INCLUDE_DIR} - ) -endif() +target_compile_definitions(ur_adapter_opencl PRIVATE CL_TARGET_OPENCL_VERSION=300 CL_USE_DEPRECATED_OPENCL_1_2_APIS) target_link_libraries(${TARGET_NAME} PRIVATE ${PROJECT_NAME}::headers ${PROJECT_NAME}::common ${PROJECT_NAME}::unified_malloc_framework Threads::Threads - OpenCL-ICD + ${UR_OPENCL_ICD_LOADER_LIBRARY} ) target_include_directories(${TARGET_NAME} PRIVATE - ${OPENCL_DIR}/../../../ + "${CMAKE_CURRENT_SOURCE_DIR}/../../" ${OpenCL_INCLUDE_DIR} ) diff --git a/source/adapters/opencl/adapter.cpp b/source/adapters/opencl/adapter.cpp new file mode 100644 index 0000000000..10713b9ff9 --- /dev/null +++ b/source/adapters/opencl/adapter.cpp @@ -0,0 +1,82 @@ +//===-------------- adapter.cpp - OpenCL Adapter ---------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "common.hpp" + +struct ur_adapter_handle_t_ { + std::atomic RefCount = 0; +}; + +ur_adapter_handle_t_ adapter{}; + +UR_APIEXPORT ur_result_t UR_APICALL urInit(ur_device_init_flags_t, + ur_loader_config_handle_t) { + cl_ext::ExtFuncPtrCache = new cl_ext::ExtFuncPtrCacheT(); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urTearDown(void *) { + if (cl_ext::ExtFuncPtrCache) { + delete cl_ext::ExtFuncPtrCache; + cl_ext::ExtFuncPtrCache = nullptr; + } + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urAdapterGet(uint32_t NumEntries, ur_adapter_handle_t *phAdapters, + uint32_t *pNumAdapters) { + if (NumEntries > 0 && phAdapters) { + *phAdapters = &adapter; + } + + if (pNumAdapters) { + *pNumAdapters = 1; + } + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) { + ++adapter.RefCount; + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) { + --adapter.RefCount; + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetLastError( + ur_adapter_handle_t, const char **ppMessage, int32_t *pError) { + *ppMessage = cl_adapter::ErrorMessage; + *pError = cl_adapter::ErrorMessageCode; + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t, + ur_adapter_info_t propName, + size_t propSize, + void *pPropValue, + size_t *pPropSizeRet) { + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + + switch (propName) { + case UR_ADAPTER_INFO_BACKEND: + return ReturnValue(UR_ADAPTER_BACKEND_CUDA); + case UR_ADAPTER_INFO_REFERENCE_COUNT: + return ReturnValue(adapter.RefCount.load()); + default: + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + return UR_RESULT_SUCCESS; +} diff --git a/source/adapters/opencl/adapter.hpp b/source/adapters/opencl/adapter.hpp new file mode 100644 index 0000000000..27a45b0af8 --- /dev/null +++ b/source/adapters/opencl/adapter.hpp @@ -0,0 +1,13 @@ +//===-------------- adapter.hpp - OpenCL Adapter ---------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +struct ur_adapter_handle_t_; + +extern ur_adapter_handle_t_ adapter; diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp new file mode 100644 index 0000000000..121a991cbd --- /dev/null +++ b/source/adapters/opencl/command_buffer.cpp @@ -0,0 +1,198 @@ +//===--------- command_buffer.cpp - OpenCL Adapter ---------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "command_buffer.hpp" +#include "common.hpp" + +/// Stub implementations of UR experimental feature command-buffers + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] const ur_exp_command_buffer_desc_t *pCommandBufferDesc, + [[maybe_unused]] ur_exp_command_buffer_handle_t *phCommandBuffer) { + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainExp( + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer) { + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseExp( + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer) { + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferFinalizeExp( + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer) { + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, + [[maybe_unused]] ur_kernel_handle_t hKernel, + [[maybe_unused]] uint32_t workDim, + [[maybe_unused]] const size_t *pGlobalWorkOffset, + [[maybe_unused]] const size_t *pGlobalWorkSize, + [[maybe_unused]] const size_t *pLocalWorkSize, + [[maybe_unused]] uint32_t numSyncPointsInWaitList, + [[maybe_unused]] const ur_exp_command_buffer_sync_point_t + *pSyncPointWaitList, + [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, + [[maybe_unused]] void *pDst, [[maybe_unused]] const void *pSrc, + [[maybe_unused]] size_t size, + [[maybe_unused]] uint32_t numSyncPointsInWaitList, + [[maybe_unused]] const ur_exp_command_buffer_sync_point_t + *pSyncPointWaitList, + [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, + [[maybe_unused]] ur_mem_handle_t hSrcMem, + [[maybe_unused]] ur_mem_handle_t hDstMem, [[maybe_unused]] size_t srcOffset, + [[maybe_unused]] size_t dstOffset, [[maybe_unused]] size_t size, + [[maybe_unused]] uint32_t numSyncPointsInWaitList, + [[maybe_unused]] const ur_exp_command_buffer_sync_point_t + *pSyncPointWaitList, + [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, + [[maybe_unused]] ur_mem_handle_t hSrcMem, + [[maybe_unused]] ur_mem_handle_t hDstMem, + [[maybe_unused]] ur_rect_offset_t srcOrigin, + [[maybe_unused]] ur_rect_offset_t dstOrigin, + [[maybe_unused]] ur_rect_region_t region, + [[maybe_unused]] size_t srcRowPitch, [[maybe_unused]] size_t srcSlicePitch, + [[maybe_unused]] size_t dstRowPitch, [[maybe_unused]] size_t dstSlicePitch, + [[maybe_unused]] uint32_t numSyncPointsInWaitList, + [[maybe_unused]] const ur_exp_command_buffer_sync_point_t + *pSyncPointWaitList, + [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT +ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, + [[maybe_unused]] ur_mem_handle_t hBuffer, [[maybe_unused]] size_t offset, + [[maybe_unused]] size_t size, [[maybe_unused]] const void *pSrc, + [[maybe_unused]] uint32_t numSyncPointsInWaitList, + [[maybe_unused]] const ur_exp_command_buffer_sync_point_t + *pSyncPointWaitList, + [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT +ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, + [[maybe_unused]] ur_mem_handle_t hBuffer, [[maybe_unused]] size_t offset, + [[maybe_unused]] size_t size, [[maybe_unused]] void *pDst, + [[maybe_unused]] uint32_t numSyncPointsInWaitList, + [[maybe_unused]] const ur_exp_command_buffer_sync_point_t + *pSyncPointWaitList, + [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT +ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, + [[maybe_unused]] ur_mem_handle_t hBuffer, + [[maybe_unused]] ur_rect_offset_t bufferOffset, + [[maybe_unused]] ur_rect_offset_t hostOffset, + [[maybe_unused]] ur_rect_region_t region, + [[maybe_unused]] size_t bufferRowPitch, + [[maybe_unused]] size_t bufferSlicePitch, + [[maybe_unused]] size_t hostRowPitch, + [[maybe_unused]] size_t hostSlicePitch, [[maybe_unused]] void *pSrc, + [[maybe_unused]] uint32_t numSyncPointsInWaitList, + [[maybe_unused]] const ur_exp_command_buffer_sync_point_t + *pSyncPointWaitList, + [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT +ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, + [[maybe_unused]] ur_mem_handle_t hBuffer, + [[maybe_unused]] ur_rect_offset_t bufferOffset, + [[maybe_unused]] ur_rect_offset_t hostOffset, + [[maybe_unused]] ur_rect_region_t region, + [[maybe_unused]] size_t bufferRowPitch, + [[maybe_unused]] size_t bufferSlicePitch, + [[maybe_unused]] size_t hostRowPitch, + [[maybe_unused]] size_t hostSlicePitch, [[maybe_unused]] void *pDst, + [[maybe_unused]] uint32_t numSyncPointsInWaitList, + [[maybe_unused]] const ur_exp_command_buffer_sync_point_t + *pSyncPointWaitList, + [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( + [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, + [[maybe_unused]] ur_queue_handle_t hQueue, + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] ur_event_handle_t *phEvent) { + + cl_adapter::die("Experimental Command-buffer feature is not " + "implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/opencl/command_buffer.hpp b/source/adapters/opencl/command_buffer.hpp new file mode 100644 index 0000000000..7ab145c53d --- /dev/null +++ b/source/adapters/opencl/command_buffer.hpp @@ -0,0 +1,15 @@ +//===--------- command_buffer.hpp - OpenCL Adapter ---------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +/// Stub implementation of command-buffers for OpenCL + +struct ur_exp_command_buffer_handle_t_ {}; diff --git a/source/adapters/opencl/common.cpp b/source/adapters/opencl/common.cpp new file mode 100644 index 0000000000..2b0e7b6a27 --- /dev/null +++ b/source/adapters/opencl/common.cpp @@ -0,0 +1,82 @@ +//===--------- common.hpp - OpenCL Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "common.hpp" + +namespace cl_adapter { + +/* Global variables for urPlatformGetLastError() */ +thread_local int32_t ErrorMessageCode = 0; +thread_local char ErrorMessage[MaxMessageSize]; + +[[maybe_unused]] void setErrorMessage(const char *Message, int32_t ErrorCode) { + assert(strlen(Message) <= cl_adapter::MaxMessageSize); + strcpy(cl_adapter::ErrorMessage, Message); + ErrorMessageCode = ErrorCode; +} +} // namespace cl_adapter + +ur_result_t mapCLErrorToUR(cl_int Result) { + switch (Result) { + case CL_SUCCESS: + return UR_RESULT_SUCCESS; + case CL_OUT_OF_HOST_MEMORY: + return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + case CL_INVALID_VALUE: + case CL_INVALID_BUILD_OPTIONS: + return UR_RESULT_ERROR_INVALID_VALUE; + case CL_INVALID_PLATFORM: + return UR_RESULT_ERROR_INVALID_PLATFORM; + case CL_DEVICE_NOT_FOUND: + return UR_RESULT_ERROR_DEVICE_NOT_FOUND; + case CL_INVALID_OPERATION: + return UR_RESULT_ERROR_INVALID_OPERATION; + case CL_INVALID_ARG_VALUE: + return UR_RESULT_ERROR_INVALID_ARGUMENT; + case CL_INVALID_EVENT: + return UR_RESULT_ERROR_INVALID_EVENT; + case CL_INVALID_EVENT_WAIT_LIST: + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + case CL_INVALID_BINARY: + return UR_RESULT_ERROR_INVALID_BINARY; + case CL_INVALID_KERNEL_NAME: + return UR_RESULT_ERROR_INVALID_KERNEL_NAME; + case CL_BUILD_PROGRAM_FAILURE: + return UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE; + case CL_INVALID_WORK_GROUP_SIZE: + return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE; + case CL_INVALID_WORK_ITEM_SIZE: + return UR_RESULT_ERROR_INVALID_WORK_ITEM_SIZE; + case CL_INVALID_WORK_DIMENSION: + return UR_RESULT_ERROR_INVALID_WORK_DIMENSION; + case CL_OUT_OF_RESOURCES: + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + case CL_INVALID_MEM_OBJECT: + return UR_RESULT_ERROR_INVALID_MEM_OBJECT; + default: + return UR_RESULT_ERROR_UNKNOWN; + } +} + +void cl_adapter::die(const char *Message) { + std::cerr << "ur_die: " << Message << "\n"; + std::terminate(); +} + +/// Common API for getting the native handle of a UR object +/// +/// \param URObj is the UR object to get the native handle of +/// \param NativeHandle is a pointer to be set to the native handle +/// +/// UR_RESULT_SUCCESS +ur_result_t getNativeHandle(void *URObj, ur_native_handle_t *NativeHandle) { + *NativeHandle = reinterpret_cast(URObj); + return UR_RESULT_SUCCESS; +} diff --git a/source/adapters/opencl/common.hpp b/source/adapters/opencl/common.hpp new file mode 100644 index 0000000000..f78710d0df --- /dev/null +++ b/source/adapters/opencl/common.hpp @@ -0,0 +1,327 @@ +//===--------- common.hpp - OpenCL Adapter ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +/** + * Call an OpenCL API and, if the result is not CL_SUCCESS, automatically map + * the OpenCL error to UR and return from the current function. + */ +#define CL_RETURN_ON_FAILURE(clCall) \ + if (const cl_int cl_result_macro = clCall; cl_result_macro != CL_SUCCESS) { \ + return mapCLErrorToUR(cl_result_macro); \ + } + +/** + * Call an UR API and, if the result is not UR_RESULT_SUCCESS, automatically + * return from the current function. + */ +#define UR_RETURN_ON_FAILURE(urCall) \ + if (const ur_result_t ur_result_macro = urCall; \ + ur_result_macro != UR_RESULT_SUCCESS) { \ + return ur_result_macro; \ + } + +/** + * Call an OpenCL API and, if the result is not CL_SUCCESS, automatically return + * from the current function and set the pointer `outPtr` to nullptr. The OpenCL + * error is mapped to UR + */ +#define CL_RETURN_ON_FAILURE_AND_SET_NULL(clCall, outPtr) \ + if (const cl_int cl_result_macro = clCall != CL_SUCCESS) { \ + if (outPtr != nullptr) { \ + *outPtr = nullptr; \ + } \ + return mapCLErrorToUR(cl_result_macro); \ + } + +namespace oclv { +class OpenCLVersion { +protected: + unsigned int OCLMajor; + unsigned int OCLMinor; + +public: + OpenCLVersion() : OCLMajor(0), OCLMinor(0) {} + + OpenCLVersion(unsigned int OclMajor, unsigned int OclMinor) + : OCLMajor(OclMajor), OCLMinor(OclMinor) { + if (!isValid()) { + OclMajor = OclMinor = 0; + } + } + + OpenCLVersion(const char *Version) : OpenCLVersion(std::string(Version)) {} + + OpenCLVersion(const std::string &Version) : OCLMajor(0), OCLMinor(0) { + /* The OpenCL specification defines the full version string as + * 'OpenCL' for platforms and as + * 'OpenCL' for devices. + */ + std::regex Rx("OpenCL ([0-9]+)\\.([0-9]+)"); + std::smatch Match; + + if (std::regex_search(Version, Match, Rx) && (Match.size() == 3)) { + OCLMajor = strtoul(Match[1].str().c_str(), nullptr, 10); + OCLMinor = strtoul(Match[2].str().c_str(), nullptr, 10); + + if (!isValid()) { + OCLMajor = OCLMinor = 0; + } + } + } + + bool operator==(const OpenCLVersion &V) const { + return OCLMajor == V.OCLMajor && OCLMinor == V.OCLMinor; + } + + bool operator!=(const OpenCLVersion &V) const { return !(*this == V); } + + bool operator<(const OpenCLVersion &V) const { + if (OCLMajor == V.OCLMajor) + return OCLMinor < V.OCLMinor; + + return OCLMajor < V.OCLMajor; + } + + bool operator>(const OpenCLVersion &V) const { return V < *this; } + + bool operator<=(const OpenCLVersion &V) const { + return (*this < V) || (*this == V); + } + + bool operator>=(const OpenCLVersion &V) const { + return (*this > V) || (*this == V); + } + + bool isValid() const { + switch (OCLMajor) { + case 0: + return false; + case 1: + case 2: + return OCLMinor <= 2; + case UINT_MAX: + return false; + default: + return OCLMinor != UINT_MAX; + } + } + + unsigned int getMajor() const { return OCLMajor; } + unsigned int getMinor() const { return OCLMinor; } +}; + +inline const OpenCLVersion V1_0(1, 0); +inline const OpenCLVersion V1_1(1, 1); +inline const OpenCLVersion V1_2(1, 2); +inline const OpenCLVersion V2_0(2, 0); +inline const OpenCLVersion V2_1(2, 1); +inline const OpenCLVersion V2_2(2, 2); +inline const OpenCLVersion V3_0(3, 0); + +} // namespace oclv + +namespace cl_adapter { +constexpr size_t MaxMessageSize = 256; +extern thread_local int32_t ErrorMessageCode; +extern thread_local char ErrorMessage[MaxMessageSize]; + +// Utility function for setting a message and warning +[[maybe_unused]] void setErrorMessage(const char *Message, + ur_result_t ErrorCode); + +[[noreturn]] void die(const char *Message); + +template To cast(From Value) { + + if constexpr (std::is_pointer_v) { + static_assert(std::is_pointer_v == std::is_pointer_v, + "Cast failed pointer check"); + return reinterpret_cast(Value); + } else { + static_assert(sizeof(From) == sizeof(To), "Cast failed size check"); + static_assert(std::is_signed_v == std::is_signed_v, + "Cast failed sign check"); + return static_cast(Value); + } +} +} // namespace cl_adapter + +namespace cl_ext { +// Older versions of GCC don't like "const" here +#if defined(__GNUC__) && (__GNUC__ < 7 || (__GNU__C == 7 && __GNUC_MINOR__ < 2)) +#define CONSTFIX constexpr +#else +#define CONSTFIX const +#endif + +// Names of USM functions that are queried from OpenCL +CONSTFIX char HostMemAllocName[] = "clHostMemAllocINTEL"; +CONSTFIX char DeviceMemAllocName[] = "clDeviceMemAllocINTEL"; +CONSTFIX char SharedMemAllocName[] = "clSharedMemAllocINTEL"; +CONSTFIX char MemBlockingFreeName[] = "clMemBlockingFreeINTEL"; +CONSTFIX char CreateBufferWithPropertiesName[] = + "clCreateBufferWithPropertiesINTEL"; +CONSTFIX char SetKernelArgMemPointerName[] = "clSetKernelArgMemPointerINTEL"; +CONSTFIX char EnqueueMemFillName[] = "clEnqueueMemFillINTEL"; +CONSTFIX char EnqueueMemcpyName[] = "clEnqueueMemcpyINTEL"; +CONSTFIX char GetMemAllocInfoName[] = "clGetMemAllocInfoINTEL"; +CONSTFIX char SetProgramSpecializationConstantName[] = + "clSetProgramSpecializationConstant"; +CONSTFIX char GetDeviceFunctionPointerName[] = + "clGetDeviceFunctionPointerINTEL"; +CONSTFIX char EnqueueWriteGlobalVariableName[] = + "clEnqueueWriteGlobalVariableINTEL"; +CONSTFIX char EnqueueReadGlobalVariableName[] = + "clEnqueueReadGlobalVariableINTEL"; +// Names of host pipe functions queried from OpenCL +CONSTFIX char EnqueueReadHostPipeName[] = "clEnqueueReadHostPipeINTEL"; +CONSTFIX char EnqueueWriteHostPipeName[] = "clEnqueueWriteHostPipeINTEL"; + +#undef CONSTFIX + +using clGetDeviceFunctionPointer_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_device_id device, cl_program program, + const char *FuncName, cl_ulong *ret_ptr); + +using clEnqueueWriteGlobalVariable_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_command_queue, cl_program, const char *, cl_bool, + size_t, size_t, const void *, cl_uint, const cl_event *, + cl_event *); + +using clEnqueueReadGlobalVariable_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_command_queue, cl_program, const char *, cl_bool, + size_t, size_t, void *, cl_uint, const cl_event *, + cl_event *); + +using clSetProgramSpecializationConstant_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_program program, cl_uint spec_id, size_t spec_size, + const void *spec_value); + +using clEnqueueReadHostPipeINTEL_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_command_queue queue, cl_program program, + const char *pipe_symbol, cl_bool blocking, void *ptr, + size_t size, cl_uint num_events_in_waitlist, + const cl_event *events_waitlist, cl_event *event); + +using clEnqueueWriteHostPipeINTEL_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_command_queue queue, cl_program program, + const char *pipe_symbol, cl_bool blocking, + const void *ptr, size_t size, + cl_uint num_events_in_waitlist, + const cl_event *events_waitlist, cl_event *event); + +template struct FuncPtrCache { + std::map Map; + std::mutex Mutex; +}; + +// FIXME: There's currently no mechanism for cleaning up this cache, meaning +// that it is invalidated whenever a context is destroyed. This could lead to +// reusing an invalid function pointer if another context happens to have the +// same native handle. +struct ExtFuncPtrCacheT { + FuncPtrCache clHostMemAllocINTELCache; + FuncPtrCache clDeviceMemAllocINTELCache; + FuncPtrCache clSharedMemAllocINTELCache; + FuncPtrCache clGetDeviceFunctionPointerCache; + FuncPtrCache + clCreateBufferWithPropertiesINTELCache; + FuncPtrCache clMemBlockingFreeINTELCache; + FuncPtrCache + clSetKernelArgMemPointerINTELCache; + FuncPtrCache clEnqueueMemFillINTELCache; + FuncPtrCache clEnqueueMemcpyINTELCache; + FuncPtrCache clGetMemAllocInfoINTELCache; + FuncPtrCache + clEnqueueWriteGlobalVariableCache; + FuncPtrCache clEnqueueReadGlobalVariableCache; + FuncPtrCache clEnqueueReadHostPipeINTELCache; + FuncPtrCache clEnqueueWriteHostPipeINTELCache; + FuncPtrCache + clSetProgramSpecializationConstantCache; +}; +// A raw pointer is used here since the lifetime of this map has to be tied to +// piTeardown to avoid issues with static destruction order (a user application +// might have static objects that indirectly access this cache in their +// destructor). +inline ExtFuncPtrCacheT *ExtFuncPtrCache; + +// USM helper function to get an extension function pointer +template +static ur_result_t getExtFuncFromContext(cl_context Context, + FuncPtrCache &FPtrCache, + const char *FuncName, T *Fptr) { + // TODO + // Potentially redo caching as UR interface changes. + // if cached, return cached FuncPtr + std::lock_guard CacheLock{FPtrCache.Mutex}; + std::map &FPtrMap = FPtrCache.Map; + auto It = FPtrMap.find(Context); + if (It != FPtrMap.end()) { + auto F = It->second; + // if cached that extension is not available return nullptr and + // UR_RESULT_ERROR_INVALID_VALUE + *Fptr = F; + return F ? UR_RESULT_SUCCESS : UR_RESULT_ERROR_INVALID_VALUE; + } + + cl_uint DeviceCount; + cl_int RetErr = clGetContextInfo(Context, CL_CONTEXT_NUM_DEVICES, + sizeof(cl_uint), &DeviceCount, nullptr); + + if (RetErr != CL_SUCCESS || DeviceCount < 1) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + + std::vector DevicesInCtx(DeviceCount); + RetErr = clGetContextInfo(Context, CL_CONTEXT_DEVICES, + DeviceCount * sizeof(cl_device_id), + DevicesInCtx.data(), nullptr); + + if (RetErr != CL_SUCCESS) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + + cl_platform_id CurPlatform; + RetErr = clGetDeviceInfo(DevicesInCtx[0], CL_DEVICE_PLATFORM, + sizeof(cl_platform_id), &CurPlatform, nullptr); + + if (RetErr != CL_SUCCESS) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + + T FuncPtr = reinterpret_cast( + clGetExtensionFunctionAddressForPlatform(CurPlatform, FuncName)); + + if (!FuncPtr) { + // Cache that the extension is not available + FPtrMap[Context] = nullptr; + return UR_RESULT_ERROR_INVALID_VALUE; + } + + *Fptr = FuncPtr; + FPtrMap[Context] = FuncPtr; + + return UR_RESULT_SUCCESS; +} +} // namespace cl_ext + +ur_result_t mapCLErrorToUR(cl_int Result); + +ur_result_t getNativeHandle(void *URObj, ur_native_handle_t *NativeHandle); diff --git a/source/adapters/opencl/context.cpp b/source/adapters/opencl/context.cpp new file mode 100644 index 0000000000..16c5999160 --- /dev/null +++ b/source/adapters/opencl/context.cpp @@ -0,0 +1,137 @@ +//===--------- context.cpp - OpenCL Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "context.hpp" + +ur_result_t cl_adapter::getDevicesFromContext( + ur_context_handle_t hContext, + std::unique_ptr> &DevicesInCtx) { + + cl_uint DeviceCount; + CL_RETURN_ON_FAILURE(clGetContextInfo(cl_adapter::cast(hContext), + CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), + &DeviceCount, nullptr)); + + if (DeviceCount < 1) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + + DevicesInCtx = std::make_unique>(DeviceCount); + + CL_RETURN_ON_FAILURE(clGetContextInfo( + cl_adapter::cast(hContext), CL_CONTEXT_DEVICES, + DeviceCount * sizeof(cl_device_id), (*DevicesInCtx).data(), nullptr)); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( + uint32_t DeviceCount, const ur_device_handle_t *phDevices, + const ur_context_properties_t *, ur_context_handle_t *phContext) { + + cl_int Ret; + *phContext = cl_adapter::cast( + clCreateContext(nullptr, cl_adapter::cast(DeviceCount), + cl_adapter::cast(phDevices), + nullptr, nullptr, cl_adapter::cast(&Ret))); + + return mapCLErrorToUR(Ret); +} + +static cl_int mapURContextInfoToCL(ur_context_info_t URPropName) { + + cl_int CLPropName; + switch (URPropName) { + case UR_CONTEXT_INFO_NUM_DEVICES: + CLPropName = CL_CONTEXT_NUM_DEVICES; + break; + case UR_CONTEXT_INFO_DEVICES: + CLPropName = CL_CONTEXT_DEVICES; + break; + case UR_CONTEXT_INFO_REFERENCE_COUNT: + CLPropName = CL_CONTEXT_REFERENCE_COUNT; + break; + default: + CLPropName = -1; + } + + return CLPropName; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, + size_t propSize, void *pPropValue, size_t *pPropSizeRet) { + + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + const cl_int CLPropName = mapURContextInfoToCL(propName); + + switch (static_cast(propName)) { + /* 2D USM memops are not supported. */ + case UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT: + case UR_CONTEXT_INFO_USM_FILL2D_SUPPORT: { + return ReturnValue(false); + } + case UR_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: + case UR_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: + case UR_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: + case UR_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { + /* These queries should be dealt with in context_impl.cpp by calling the + * queries of each device separately and building the intersection set. */ + return UR_RESULT_ERROR_INVALID_ARGUMENT; + } + case UR_CONTEXT_INFO_NUM_DEVICES: + case UR_CONTEXT_INFO_DEVICES: + case UR_CONTEXT_INFO_REFERENCE_COUNT: { + + CL_RETURN_ON_FAILURE( + clGetContextInfo(cl_adapter::cast(hContext), CLPropName, + propSize, pPropValue, pPropSizeRet)); + return UR_RESULT_SUCCESS; + } + default: + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } +} + +UR_APIEXPORT ur_result_t UR_APICALL +urContextRelease(ur_context_handle_t hContext) { + + cl_int Ret = clReleaseContext(cl_adapter::cast(hContext)); + return mapCLErrorToUR(Ret); +} + +UR_APIEXPORT ur_result_t UR_APICALL +urContextRetain(ur_context_handle_t hContext) { + + cl_int Ret = clRetainContext(cl_adapter::cast(hContext)); + return mapCLErrorToUR(Ret); +} + +UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle( + ur_context_handle_t hContext, ur_native_handle_t *phNativeContext) { + + *phNativeContext = reinterpret_cast(hContext); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle( + ur_native_handle_t hNativeContext, uint32_t, const ur_device_handle_t *, + const ur_context_native_properties_t *, ur_context_handle_t *phContext) { + + *phContext = reinterpret_cast(hNativeContext); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urContextSetExtendedDeleter( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_context_extended_deleter_t pfnDeleter, + [[maybe_unused]] void *pUserData) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/opencl/context.hpp b/source/adapters/opencl/context.hpp new file mode 100644 index 0000000000..5319f68b55 --- /dev/null +++ b/source/adapters/opencl/context.hpp @@ -0,0 +1,18 @@ +//===--------- context.hpp - OpenCL Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include "common.hpp" + +namespace cl_adapter { +ur_result_t +getDevicesFromContext(ur_context_handle_t hContext, + std::unique_ptr> &DevicesInCtx); +} diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp new file mode 100644 index 0000000000..3fc6f5d491 --- /dev/null +++ b/source/adapters/opencl/device.cpp @@ -0,0 +1,1112 @@ +//===--------- device.hpp - OpenCL Adapter ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include "device.hpp" +#include "common.hpp" +#include "platform.hpp" + +#include + +ur_result_t cl_adapter::getDeviceVersion(cl_device_id Dev, + oclv::OpenCLVersion &Version) { + + size_t DevVerSize = 0; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(Dev, CL_DEVICE_VERSION, 0, nullptr, &DevVerSize)); + + std::string DevVer(DevVerSize, '\0'); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(Dev, CL_DEVICE_VERSION, DevVerSize, + DevVer.data(), nullptr)); + + Version = oclv::OpenCLVersion(DevVer); + if (!Version.isValid()) { + return UR_RESULT_ERROR_INVALID_DEVICE; + } + + return UR_RESULT_SUCCESS; +} + +ur_result_t cl_adapter::checkDeviceExtensions( + cl_device_id Dev, const std::vector &Exts, bool &Supported) { + size_t ExtSize = 0; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize)); + + std::string ExtStr(ExtSize, '\0'); + + CL_RETURN_ON_FAILURE(clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, ExtSize, + ExtStr.data(), nullptr)); + + Supported = true; + for (const std::string &Ext : Exts) { + if (!(Supported = (ExtStr.find(Ext) != std::string::npos))) { + break; + } + } + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform, + ur_device_type_t DeviceType, + uint32_t NumEntries, + ur_device_handle_t *phDevices, + uint32_t *pNumDevices) { + + cl_device_type Type; + switch (DeviceType) { + case UR_DEVICE_TYPE_ALL: + Type = CL_DEVICE_TYPE_ALL; + break; + case UR_DEVICE_TYPE_GPU: + Type = CL_DEVICE_TYPE_GPU; + break; + case UR_DEVICE_TYPE_CPU: + Type = CL_DEVICE_TYPE_CPU; + break; + case UR_DEVICE_TYPE_FPGA: + case UR_DEVICE_TYPE_MCA: + case UR_DEVICE_TYPE_VPU: + Type = CL_DEVICE_TYPE_ACCELERATOR; + break; + case UR_DEVICE_TYPE_DEFAULT: + Type = UR_DEVICE_TYPE_DEFAULT; + break; + default: + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + cl_int Result = clGetDeviceIDs(cl_adapter::cast(hPlatform), + Type, cl_adapter::cast(NumEntries), + cl_adapter::cast(phDevices), + cl_adapter::cast(pNumDevices)); + + // Absorb the CL_DEVICE_NOT_FOUND and just return 0 in num_devices + if (Result == CL_DEVICE_NOT_FOUND) { + Result = CL_SUCCESS; + if (pNumDevices) { + *pNumDevices = 0; + } + } + + return mapCLErrorToUR(Result); +} + +static ur_device_fp_capability_flags_t +mapCLDeviceFpConfigToUR(cl_device_fp_config CLValue) { + + ur_device_fp_capability_flags_t URValue = 0; + if (CLValue & CL_FP_DENORM) { + URValue |= UR_DEVICE_FP_CAPABILITY_FLAG_DENORM; + } + if (CLValue & CL_FP_INF_NAN) { + URValue |= UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN; + } + if (CLValue & CL_FP_ROUND_TO_NEAREST) { + URValue |= UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST; + } + if (CLValue & CL_FP_ROUND_TO_ZERO) { + URValue |= UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO; + } + if (CLValue & CL_FP_ROUND_TO_INF) { + URValue |= UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF; + } + if (CLValue & CL_FP_FMA) { + URValue |= UR_DEVICE_FP_CAPABILITY_FLAG_FMA; + } + if (CLValue & CL_FP_SOFT_FLOAT) { + URValue |= UR_DEVICE_FP_CAPABILITY_FLAG_SOFT_FLOAT; + } + if (CLValue & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) { + URValue |= UR_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT; + } + + return URValue; +} + +static cl_int mapURDeviceInfoToCL(ur_device_info_t URPropName) { + + switch (static_cast(URPropName)) { + case UR_DEVICE_INFO_TYPE: + return CL_DEVICE_TYPE; + case UR_DEVICE_INFO_PARENT_DEVICE: + return CL_DEVICE_PARENT_DEVICE; + case UR_DEVICE_INFO_PLATFORM: + return CL_DEVICE_PLATFORM; + case UR_DEVICE_INFO_VENDOR_ID: + return CL_DEVICE_VENDOR_ID; + case UR_DEVICE_INFO_EXTENSIONS: + return CL_DEVICE_EXTENSIONS; + case UR_DEVICE_INFO_NAME: + return CL_DEVICE_NAME; + case UR_DEVICE_INFO_COMPILER_AVAILABLE: + return CL_DEVICE_COMPILER_AVAILABLE; + case UR_DEVICE_INFO_LINKER_AVAILABLE: + return CL_DEVICE_LINKER_AVAILABLE; + case UR_DEVICE_INFO_MAX_COMPUTE_UNITS: + return CL_DEVICE_MAX_COMPUTE_UNITS; + case UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS: + return CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS; + case UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE: + return CL_DEVICE_MAX_WORK_GROUP_SIZE; + case UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES: + return CL_DEVICE_MAX_WORK_ITEM_SIZES; + case UR_DEVICE_INFO_MAX_CLOCK_FREQUENCY: + return CL_DEVICE_MAX_CLOCK_FREQUENCY; + case UR_DEVICE_INFO_ADDRESS_BITS: + return CL_DEVICE_ADDRESS_BITS; + case UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE: + return CL_DEVICE_MAX_MEM_ALLOC_SIZE; + case UR_DEVICE_INFO_GLOBAL_MEM_SIZE: + return CL_DEVICE_GLOBAL_MEM_SIZE; + case UR_DEVICE_INFO_LOCAL_MEM_SIZE: + return CL_DEVICE_LOCAL_MEM_SIZE; + case UR_DEVICE_INFO_IMAGE_SUPPORTED: + return CL_DEVICE_IMAGE_SUPPORT; + case UR_DEVICE_INFO_HOST_UNIFIED_MEMORY: + return CL_DEVICE_HOST_UNIFIED_MEMORY; + case UR_DEVICE_INFO_AVAILABLE: + return CL_DEVICE_AVAILABLE; + case UR_DEVICE_INFO_VENDOR: + return CL_DEVICE_VENDOR; + case UR_DEVICE_INFO_DRIVER_VERSION: + return CL_DRIVER_VERSION; + case UR_DEVICE_INFO_VERSION: + return CL_DEVICE_VERSION; + case UR_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES: + return CL_DEVICE_PARTITION_MAX_SUB_DEVICES; + case UR_DEVICE_INFO_REFERENCE_COUNT: + return CL_DEVICE_REFERENCE_COUNT; + case UR_DEVICE_INFO_SUPPORTED_PARTITIONS: + return CL_DEVICE_PARTITION_PROPERTIES; + case UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: + return CL_DEVICE_PARTITION_AFFINITY_DOMAIN; + case UR_DEVICE_INFO_PARTITION_TYPE: + return CL_DEVICE_PARTITION_TYPE; + case UR_EXT_DEVICE_INFO_OPENCL_C_VERSION: + return CL_DEVICE_OPENCL_C_VERSION; + case UR_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC: + return CL_DEVICE_PREFERRED_INTEROP_USER_SYNC; + case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: + return CL_DEVICE_PRINTF_BUFFER_SIZE; + case UR_DEVICE_INFO_PROFILE: + return CL_DEVICE_PROFILE; + case UR_DEVICE_INFO_BUILT_IN_KERNELS: + return CL_DEVICE_BUILT_IN_KERNELS; + case UR_DEVICE_INFO_QUEUE_PROPERTIES: + return CL_DEVICE_QUEUE_PROPERTIES; + case UR_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES: + return CL_DEVICE_QUEUE_ON_HOST_PROPERTIES; + case UR_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES: + return CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES; + case UR_DEVICE_INFO_EXECUTION_CAPABILITIES: + return CL_DEVICE_EXECUTION_CAPABILITIES; + case UR_DEVICE_INFO_ENDIAN_LITTLE: + return CL_DEVICE_ENDIAN_LITTLE; + case UR_DEVICE_INFO_ERROR_CORRECTION_SUPPORT: + return CL_DEVICE_ERROR_CORRECTION_SUPPORT; + case UR_DEVICE_INFO_PROFILING_TIMER_RESOLUTION: + return CL_DEVICE_PROFILING_TIMER_RESOLUTION; + case UR_DEVICE_INFO_LOCAL_MEM_TYPE: + return CL_DEVICE_LOCAL_MEM_TYPE; + case UR_DEVICE_INFO_MAX_CONSTANT_ARGS: + return CL_DEVICE_MAX_CONSTANT_ARGS; + case UR_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE: + return CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE; + case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: + return CL_DEVICE_GLOBAL_MEM_CACHE_TYPE; + case UR_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE: + return CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE; + case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE: + return CL_DEVICE_GLOBAL_MEM_CACHE_SIZE; + case UR_DEVICE_INFO_MAX_PARAMETER_SIZE: + return CL_DEVICE_MAX_PARAMETER_SIZE; + case UR_DEVICE_INFO_MEM_BASE_ADDR_ALIGN: + return CL_DEVICE_MEM_BASE_ADDR_ALIGN; + case UR_DEVICE_INFO_MAX_SAMPLERS: + return CL_DEVICE_MAX_SAMPLERS; + case UR_DEVICE_INFO_MAX_READ_IMAGE_ARGS: + return CL_DEVICE_MAX_READ_IMAGE_ARGS; + case UR_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS: + return CL_DEVICE_MAX_WRITE_IMAGE_ARGS; + case UR_DEVICE_INFO_MAX_READ_WRITE_IMAGE_ARGS: + return CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS; + case UR_DEVICE_INFO_SINGLE_FP_CONFIG: + return CL_DEVICE_SINGLE_FP_CONFIG; + case UR_DEVICE_INFO_HALF_FP_CONFIG: + return CL_DEVICE_HALF_FP_CONFIG; + case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: + return CL_DEVICE_DOUBLE_FP_CONFIG; + case UR_DEVICE_INFO_IMAGE2D_MAX_WIDTH: + return CL_DEVICE_IMAGE2D_MAX_WIDTH; + case UR_DEVICE_INFO_IMAGE2D_MAX_HEIGHT: + return CL_DEVICE_IMAGE2D_MAX_HEIGHT; + case UR_DEVICE_INFO_IMAGE3D_MAX_WIDTH: + return CL_DEVICE_IMAGE3D_MAX_WIDTH; + case UR_DEVICE_INFO_IMAGE3D_MAX_HEIGHT: + return CL_DEVICE_IMAGE3D_MAX_HEIGHT; + case UR_DEVICE_INFO_IMAGE3D_MAX_DEPTH: + return CL_DEVICE_IMAGE3D_MAX_DEPTH; + case UR_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE: + return CL_DEVICE_IMAGE_MAX_BUFFER_SIZE; + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR: + return CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR; + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR: + return CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR; + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT: + return CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT; + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT: + return CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT; + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT: + return CL_DEVICE_NATIVE_VECTOR_WIDTH_INT; + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT: + return CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT; + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG: + return CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG; + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG: + return CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG; + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT: + return CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT; + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT: + return CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT; + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE: + return CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE; + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE: + return CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE; + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF: + return CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF; + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF: + return CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF; + case UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS: + return CL_DEVICE_MAX_NUM_SUB_GROUPS; + case UR_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS: + return CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS; + case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: + return CL_DEVICE_SUB_GROUP_SIZES_INTEL; + case UR_DEVICE_INFO_IL_VERSION: + return CL_DEVICE_IL_VERSION; + case UR_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE: + return CL_DEVICE_IMAGE_MAX_ARRAY_SIZE; + case UR_DEVICE_INFO_USM_HOST_SUPPORT: + return CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL; + case UR_DEVICE_INFO_USM_DEVICE_SUPPORT: + return CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL; + case UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT: + return CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL; + case UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT: + return CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL; + case UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT: + return CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL; + case UR_DEVICE_INFO_IP_VERSION: + return CL_DEVICE_IP_VERSION_INTEL; + default: + return -1; + } +} + +UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, + ur_device_info_t propName, + size_t propSize, + void *pPropValue, + size_t *pPropSizeRet) { + + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + + const cl_device_info CLPropName = mapURDeviceInfoToCL(propName); + + /* TODO UR: Casting to uint32_t to silence warnings due to some values not + * being part of the enum. Can be removed once all UR_EXT enums are promoted + * to UR */ + switch (static_cast(propName)) { + case UR_DEVICE_INFO_TYPE: { + cl_device_type CLType; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, + sizeof(cl_device_type), &CLType, nullptr)); + + /* TODO UR: If the device is an Accelerator (FPGA, VPU, etc.), there is not + * enough information in the OpenCL runtime to know exactly which type it + * is. Assuming FPGA for now */ + /* TODO UR: In OpenCL, a device can have multiple types (e.g. CPU and GPU). + * We are potentially losing information by returning only one type */ + ur_device_type_t URDeviceType = UR_DEVICE_TYPE_DEFAULT; + if (CLType & CL_DEVICE_TYPE_CPU) { + URDeviceType = UR_DEVICE_TYPE_CPU; + } else if (CLType & CL_DEVICE_TYPE_GPU) { + URDeviceType = UR_DEVICE_TYPE_GPU; + } else if (CLType & CL_DEVICE_TYPE_ACCELERATOR) { + URDeviceType = UR_DEVICE_TYPE_FPGA; + } + + return ReturnValue(URDeviceType); + } + case UR_DEVICE_INFO_BACKEND_RUNTIME_VERSION: { + oclv::OpenCLVersion Version; + CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( + cl_adapter::cast(hDevice), Version)); + + const std::string Results = std::to_string(Version.getMajor()) + "." + + std::to_string(Version.getMinor()); + return ReturnValue(Results.c_str(), Results.size() + 1); + } + case UR_DEVICE_INFO_SUPPORTED_PARTITIONS: { + size_t CLSize; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, 0, + nullptr, &CLSize)); + const size_t NProperties = CLSize / sizeof(cl_device_partition_property); + + std::vector CLValue(NProperties); + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, + CLSize, CLValue.data(), nullptr)); + + /* The OpenCL implementation returns a value of 0 if no properties are + * supported. UR will return a size of 0 for now. + */ + if (pPropSizeRet && CLValue[0] == 0) { + *pPropSizeRet = 0; + return UR_RESULT_SUCCESS; + } + + std::vector URValue{}; + for (size_t i = 0; i < NProperties; ++i) { + if (CLValue[i] != CL_DEVICE_PARTITION_BY_NAMES_INTEL && CLValue[i] != 0) { + URValue.push_back(static_cast(CLValue[i])); + } + } + return ReturnValue(URValue.data(), URValue.size()); + } + case UR_DEVICE_INFO_PARTITION_TYPE: { + + size_t CLSize; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, 0, + nullptr, &CLSize)); + const size_t NProperties = CLSize / sizeof(cl_device_partition_property); + + /* The OpenCL implementation returns either a size of 0 or a value of 0 if + * the device is not a sub-device. UR will return a size of 0 for now. + * TODO Ideally, this could become an error once PI is removed from SYCL RT + */ + if (pPropSizeRet && (CLSize == 0 || NProperties == 1)) { + *pPropSizeRet = 0; + return UR_RESULT_SUCCESS; + } + + auto CLValue = + reinterpret_cast(alloca(CLSize)); + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, + CLSize, CLValue, nullptr)); + + std::vector URValue(NProperties - 1); + + /* OpenCL will always return exactly one partition type followed by one or + * more values. */ + for (uint32_t i = 0; i < URValue.size(); ++i) { + URValue[i].type = static_cast(CLValue[0]); + switch (URValue[i].type) { + case UR_DEVICE_PARTITION_EQUALLY: { + URValue[i].value.equally = CLValue[i + 1]; + break; + } + case UR_DEVICE_PARTITION_BY_COUNTS: { + URValue[i].value.count = CLValue[i + 1]; + break; + } + case UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: { + URValue[i].value.affinity_domain = CLValue[i + 1]; + break; + } + default: { + return UR_RESULT_ERROR_UNKNOWN; + } + } + } + + return ReturnValue(URValue.data(), URValue.size()); + } + case UR_DEVICE_INFO_MAX_WORK_GROUPS_3D: { + /* Returns the maximum sizes of a work group for each dimension one could + * use to submit a kernel. There is no such query defined in OpenCL. So + * we'll return the maximum value. */ + static constexpr uint32_t MaxWorkItemDimensions = 3u; + static constexpr size_t Max = (std::numeric_limits::max)(); + + struct { + size_t sizes[MaxWorkItemDimensions]; + } ReturnSizes; + + ReturnSizes.sizes[0] = Max; + ReturnSizes.sizes[1] = Max; + ReturnSizes.sizes[2] = Max; + return ReturnValue(ReturnSizes); + } + case UR_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES: { + return ReturnValue(static_cast(1u)); + } + case UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS: { + /* Corresponding OpenCL query is only available starting with OpenCL 2.1 + * and we have to emulate it on older OpenCL runtimes. */ + oclv::OpenCLVersion DevVer; + CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( + cl_adapter::cast(hDevice), DevVer)); + + if (DevVer >= oclv::V2_1) { + cl_uint CLValue; + CL_RETURN_ON_FAILURE(clGetDeviceInfo( + cl_adapter::cast(hDevice), CL_DEVICE_MAX_NUM_SUB_GROUPS, + sizeof(cl_uint), &CLValue, nullptr)); + + if (CLValue == 0u) { + /* OpenCL returns 0 if sub-groups are not supported, but SYCL 2020 + * spec says that minimum possible value is 1. */ + return ReturnValue(1u); + } else { + return ReturnValue(static_cast(CLValue)); + } + } else { + /* Otherwise, we can't query anything, because even cl_khr_subgroups + * does not provide similar query. Therefore, simply return minimum + * possible value 1 here. */ + return ReturnValue(1u); + } + } + case UR_DEVICE_INFO_SINGLE_FP_CONFIG: + case UR_DEVICE_INFO_HALF_FP_CONFIG: + case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: { + /* CL type: cl_device_fp_config + * UR type: ur_device_fp_capability_flags_t */ + if (propName == UR_DEVICE_INFO_HALF_FP_CONFIG) { + bool Supported; + CL_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( + cl_adapter::cast(hDevice), {"cl_khr_fp16"}, Supported)); + + if (!Supported) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + } + + cl_device_fp_config CLValue; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, + sizeof(cl_device_fp_config), &CLValue, nullptr)); + + return ReturnValue(mapCLDeviceFpConfigToUR(CLValue)); + } + + case UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { + /* This query is missing before OpenCL 3.0. Check version and handle + * appropriately */ + oclv::OpenCLVersion DevVer; + CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( + cl_adapter::cast(hDevice), DevVer)); + + /* Minimum required capability to be returned. For OpenCL 1.2, this is all + * that is required */ + ur_memory_order_capability_flags_t URCapabilities = + UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED; + + if (DevVer >= oclv::V3_0) { + /* For OpenCL >=3.0, the query should be implemented */ + cl_device_atomic_capabilities CLCapabilities; + CL_RETURN_ON_FAILURE(clGetDeviceInfo( + cl_adapter::cast(hDevice), + CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, + sizeof(cl_device_atomic_capabilities), &CLCapabilities, nullptr)); + + /* Mask operation to only consider atomic_memory_order* capabilities */ + const cl_int Mask = CL_DEVICE_ATOMIC_ORDER_RELAXED | + CL_DEVICE_ATOMIC_ORDER_ACQ_REL | + CL_DEVICE_ATOMIC_ORDER_SEQ_CST; + CLCapabilities &= Mask; + + /* The memory order capabilities are hierarchical, if one is implied, all + * preceding capabilities are implied as well. Especially in the case of + * ACQ_REL. */ + if (CLCapabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) { + URCapabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; + } + if (CLCapabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) { + URCapabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL | + UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE | + UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE; + } + } else if (DevVer >= oclv::V2_0) { + /* For OpenCL 2.x, return all capabilities. + * (https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_memory_consistency_model) + */ + URCapabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE | + UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE | + UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL | + UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; + } + /* cl_device_atomic_capabilities is uint64_t and + * ur_memory_order_capability_flags_t is uint32_t */ + return ReturnValue( + static_cast(URCapabilities)); + } + case UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: { + /* Initialize result to minimum mandated capabilities according to + * SYCL2020 4.6.3.2. Because scopes are hierarchical, wider scopes support + * all narrower scopes. At a minimum, each device must support WORK_ITEM, + * SUB_GROUP and WORK_GROUP. + * (https://github.com/KhronosGroup/SYCL-Docs/pull/382) */ + ur_memory_scope_capability_flags_t URCapabilities = + UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP; + + oclv::OpenCLVersion DevVer; + CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( + cl_adapter::cast(hDevice), DevVer)); + + cl_device_atomic_capabilities CLCapabilities; + if (DevVer >= oclv::V3_0) { + CL_RETURN_ON_FAILURE(clGetDeviceInfo( + cl_adapter::cast(hDevice), + CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, + sizeof(cl_device_atomic_capabilities), &CLCapabilities, nullptr)); + + assert((CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) && + "Violates minimum mandated guarantee"); + + /* Because scopes are hierarchical, wider scopes support all narrower + * scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and + * WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382). We + * already initialized to these minimum mandated capabilities. Just check + * wider scopes. */ + if (CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE) { + URCapabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE; + } + + if (CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) { + URCapabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; + } + } else { + /* This info is only available in OpenCL version >= 3.0. Just return + * minimum mandated capabilities for older versions. OpenCL 1.x minimum + * mandated capabilities are WORK_GROUP, we already initialized using it. + */ + if (DevVer >= oclv::V2_0) { + /* OpenCL 2.x minimum mandated capabilities are WORK_GROUP | DEVICE | + * ALL_DEVICES */ + URCapabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; + } + } + + /* cl_device_atomic_capabilities is uint64_t and + * ur_memory_scope_capability_flags_t is uint32_t */ + return ReturnValue( + static_cast(URCapabilities)); + } + case UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: { + /* Initialize result to minimum mandated capabilities according to + * SYCL2020 4.6.3.2 */ + ur_memory_order_capability_flags_t URCapabilities = + UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED | + UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE | + UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE | + UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL; + + oclv::OpenCLVersion DevVer; + CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( + cl_adapter::cast(hDevice), DevVer)); + + cl_device_atomic_capabilities CLCapabilities; + if (DevVer >= oclv::V3_0) { + CL_RETURN_ON_FAILURE(clGetDeviceInfo( + cl_adapter::cast(hDevice), + CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, + sizeof(cl_device_atomic_capabilities), &CLCapabilities, nullptr)); + + assert((CLCapabilities & CL_DEVICE_ATOMIC_ORDER_RELAXED) && + "Violates minimum mandated guarantee"); + assert((CLCapabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) && + "Violates minimum mandated guarantee"); + + /* We already initialized to minimum mandated capabilities. Just check + * stronger orders. */ + if (CLCapabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) { + URCapabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; + } + } else { + /* This info is only available in OpenCL version >= 3.0. Just return + * minimum mandated capabilities for older versions. OpenCL 1.x minimum + * mandated capabilities are RELAXED | ACQ_REL, we already initialized + * using these. */ + if (DevVer >= oclv::V2_0) { + /* OpenCL 2.x minimum mandated capabilities are RELAXED | ACQ_REL | + * SEQ_CST */ + URCapabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; + } + } + + /* cl_device_atomic_capabilities is uint64_t and + * ur_memory_order_capability_flags_t is uint32_t */ + return ReturnValue( + static_cast(URCapabilities)); + } + case UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { + /* Initialize result to minimum mandated capabilities according to + * SYCL2020 4.6.3.2. Because scopes are hierarchical, wider scopes support + * all narrower scopes. At a minimum, each device must support WORK_ITEM, + * SUB_GROUP and WORK_GROUP. + * (https://github.com/KhronosGroup/SYCL-Docs/pull/382) */ + ur_memory_scope_capability_flags_t URCapabilities = + UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP; + + oclv::OpenCLVersion DevVer; + CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( + cl_adapter::cast(hDevice), DevVer)); + + cl_device_atomic_capabilities CLCapabilities; + if (DevVer >= oclv::V3_0) { + CL_RETURN_ON_FAILURE(clGetDeviceInfo( + cl_adapter::cast(hDevice), + CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, + sizeof(cl_device_atomic_capabilities), &CLCapabilities, nullptr)); + + assert((CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) && + "Violates minimum mandated guarantee"); + + /* Because scopes are hierarchical, wider scopes support all narrower + * scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and + * WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382). We + * already initialized to these minimum mandated capabilities. Just check + * wider scopes. */ + if (CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE) { + URCapabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE; + } + + if (CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) { + URCapabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; + } + } else { + /* This info is only available in OpenCL version >= 3.0. Just return + * minimum mandated capabilities for older versions. OpenCL 1.x minimum + * mandated capabilities are WORK_GROUP, we already initialized using it. + */ + if (DevVer >= oclv::V2_0) { + /* OpenCL 2.x minimum mandated capabilities are WORK_GROUP | DEVICE | + * ALL_DEVICES */ + URCapabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; + } + } + + /* cl_device_atomic_capabilities is uint64_t and + * ur_memory_scope_capability_flags_t is uint32_t */ + return ReturnValue( + static_cast(URCapabilities)); + } + + case UR_DEVICE_INFO_IMAGE_SRGB: { + return ReturnValue(true); + } + + case UR_DEVICE_INFO_BFLOAT16: { + return ReturnValue(false); + } + case UR_DEVICE_INFO_ATOMIC_64: { + bool Supported = false; + CL_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( + cl_adapter::cast(hDevice), + {"cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics"}, + Supported)); + + return ReturnValue(Supported); + } + case UR_DEVICE_INFO_BUILD_ON_SUBDEVICE: { + + cl_device_type DevType = CL_DEVICE_TYPE_DEFAULT; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), CL_DEVICE_TYPE, + sizeof(cl_device_type), &DevType, nullptr)); + + return ReturnValue(DevType == CL_DEVICE_TYPE_GPU); + } + case UR_DEVICE_INFO_MEM_CHANNEL_SUPPORT: { + bool Supported = false; + CL_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( + cl_adapter::cast(hDevice), + {"cl_intel_mem_channel_property"}, Supported)); + + return ReturnValue(Supported); + } + case UR_DEVICE_INFO_ESIMD_SUPPORT: { + bool Supported = false; + cl_device_type DevType = CL_DEVICE_TYPE_DEFAULT; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), CL_DEVICE_TYPE, + sizeof(cl_device_type), &DevType, nullptr)); + + cl_uint VendorID = 0; + CL_RETURN_ON_FAILURE(clGetDeviceInfo( + cl_adapter::cast(hDevice), CL_DEVICE_VENDOR_ID, + sizeof(VendorID), &VendorID, nullptr)); + + /* ESIMD is only supported by Intel GPUs. */ + Supported = DevType == CL_DEVICE_TYPE_GPU && VendorID == 0x8086; + + return ReturnValue(Supported); + } + case UR_DEVICE_INFO_QUEUE_PROPERTIES: + case UR_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES: + case UR_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES: + case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: + case UR_DEVICE_INFO_LOCAL_MEM_TYPE: + case UR_DEVICE_INFO_EXECUTION_CAPABILITIES: + case UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: + case UR_DEVICE_INFO_USM_HOST_SUPPORT: + case UR_DEVICE_INFO_USM_DEVICE_SUPPORT: + case UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT: + case UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT: + case UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT: { + /* CL type: cl_bitfield / enum + * UR type: ur_flags_t (uint32_t) */ + + cl_bitfield CLValue; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, + sizeof(cl_bitfield), &CLValue, nullptr)); + + /* We can just static_cast the output because OpenCL and UR bitfields + * map 1 to 1 for these properties. cl_bitfield is uint64_t and ur_flags_t + * types are uint32_t */ + return ReturnValue(static_cast(CLValue)); + } + case UR_DEVICE_INFO_IMAGE_SUPPORTED: + case UR_DEVICE_INFO_ERROR_CORRECTION_SUPPORT: + case UR_DEVICE_INFO_HOST_UNIFIED_MEMORY: + case UR_DEVICE_INFO_ENDIAN_LITTLE: + case UR_DEVICE_INFO_AVAILABLE: + case UR_DEVICE_INFO_COMPILER_AVAILABLE: + case UR_DEVICE_INFO_LINKER_AVAILABLE: + case UR_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC: + case UR_DEVICE_INFO_KERNEL_SET_SPECIALIZATION_CONSTANTS: + case UR_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS: { + /* CL type: cl_bool + * UR type: ur_bool_t */ + + cl_bool CLValue; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, + sizeof(cl_bool), &CLValue, nullptr)); + + /* cl_bool is uint32_t and ur_bool_t is bool */ + return ReturnValue(static_cast(CLValue)); + } + case UR_DEVICE_INFO_VENDOR_ID: + case UR_DEVICE_INFO_MAX_COMPUTE_UNITS: + case UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS: + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR: + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT: + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT: + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG: + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT: + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE: + case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF: + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR: + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT: + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT: + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG: + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT: + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE: + case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF: + case UR_DEVICE_INFO_MAX_CLOCK_FREQUENCY: + case UR_DEVICE_INFO_ADDRESS_BITS: + case UR_DEVICE_INFO_MAX_READ_IMAGE_ARGS: + case UR_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS: + case UR_DEVICE_INFO_MAX_READ_WRITE_IMAGE_ARGS: + case UR_DEVICE_INFO_MEM_BASE_ADDR_ALIGN: + case UR_DEVICE_INFO_MAX_SAMPLERS: + case UR_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE: + case UR_DEVICE_INFO_MAX_CONSTANT_ARGS: + case UR_DEVICE_INFO_REFERENCE_COUNT: + case UR_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES: + case UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE: + case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE: + case UR_DEVICE_INFO_GLOBAL_MEM_SIZE: + case UR_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE: + case UR_DEVICE_INFO_LOCAL_MEM_SIZE: + case UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE: + case UR_DEVICE_INFO_IMAGE2D_MAX_WIDTH: + case UR_DEVICE_INFO_IMAGE2D_MAX_HEIGHT: + case UR_DEVICE_INFO_IMAGE3D_MAX_WIDTH: + case UR_DEVICE_INFO_IMAGE3D_MAX_HEIGHT: + case UR_DEVICE_INFO_IMAGE3D_MAX_DEPTH: + case UR_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE: + case UR_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE: + case UR_DEVICE_INFO_MAX_PARAMETER_SIZE: + case UR_DEVICE_INFO_PROFILING_TIMER_RESOLUTION: + case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: + case UR_DEVICE_INFO_PLATFORM: + case UR_DEVICE_INFO_PARENT_DEVICE: + case UR_DEVICE_INFO_IL_VERSION: + case UR_DEVICE_INFO_NAME: + case UR_DEVICE_INFO_VENDOR: + case UR_DEVICE_INFO_DRIVER_VERSION: + case UR_DEVICE_INFO_PROFILE: + case UR_DEVICE_INFO_VERSION: + case UR_EXT_DEVICE_INFO_OPENCL_C_VERSION: + case UR_DEVICE_INFO_EXTENSIONS: + case UR_DEVICE_INFO_BUILT_IN_KERNELS: + case UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES: + case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: + case UR_DEVICE_INFO_IP_VERSION: { + /* We can just use the OpenCL outputs because the sizes of OpenCL types + * are the same as UR. + * | CL | UR | Size | + * | char[] | char[] | 8 | + * | cl_uint | uint32_t | 4 | + * | cl_ulong | uint64_t | 8 | + * | size_t | size_t | 8 | + * | cl_platform_id | ur_platform_handle_t | 8 | + * | ur_device_handle_t | cl_device_id | 8 | + */ + + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, + propSize, pPropValue, pPropSizeRet)); + + return UR_RESULT_SUCCESS; + } + /* TODO: Check regularly to see if support is enabled in OpenCL. Intel GPU + * EU device-specific information extensions. Some of the queries are + * enabled by cl_intel_device_attribute_query extension, but it's not yet in + * the Registry. */ + case UR_DEVICE_INFO_PCI_ADDRESS: + case UR_DEVICE_INFO_GPU_EU_COUNT: + case UR_DEVICE_INFO_GPU_EU_SIMD_WIDTH: + case UR_DEVICE_INFO_GPU_EU_SLICES: + case UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE: + case UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE: + case UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU: + case UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH: + /* TODO: Check if device UUID extension is enabled in OpenCL. For details + * about Intel UUID extension, see + * sycl/doc/extensions/supported/sycl_ext_intel_device_info.md */ + case UR_DEVICE_INFO_UUID: + /* This enums have no equivalent in OpenCL */ + case UR_DEVICE_INFO_DEVICE_ID: + case UR_DEVICE_INFO_GLOBAL_MEM_FREE: + case UR_DEVICE_INFO_MEMORY_CLOCK_RATE: + case UR_DEVICE_INFO_MEMORY_BUS_WIDTH: + case UR_DEVICE_INFO_ASYNC_BARRIER: + case UR_DEVICE_INFO_HOST_PIPE_READ_WRITE_SUPPORTED: { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + default: { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + } +} + +UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( + ur_device_handle_t hDevice, + const ur_device_partition_properties_t *pProperties, uint32_t NumDevices, + ur_device_handle_t *phSubDevices, uint32_t *pNumDevicesRet) { + + std::vector CLProperties( + pProperties->PropCount + 2); + + /* The type must be the same for all properties since OpenCL doesn't support + * property lists with multiple types */ + CLProperties[0] = + static_cast(pProperties->pProperties->type); + + for (uint32_t i = 0; i < pProperties->PropCount; ++i) { + cl_device_partition_property CLProperty; + switch (pProperties->pProperties->type) { + case UR_DEVICE_PARTITION_EQUALLY: { + CLProperty = static_cast( + pProperties->pProperties->value.equally); + break; + } + case UR_DEVICE_PARTITION_BY_COUNTS: { + CLProperty = static_cast( + pProperties->pProperties->value.count); + break; + } + case UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: { + CLProperty = static_cast( + pProperties->pProperties->value.affinity_domain); + break; + } + default: { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + } + CLProperties[i + 1] = CLProperty; + } + + /* Terminate the list with 0 */ + CLProperties[CLProperties.size() - 1] = 0; + + cl_uint CLNumDevicesRet; + CL_RETURN_ON_FAILURE( + clCreateSubDevices(cl_adapter::cast(hDevice), + CLProperties.data(), 0, nullptr, &CLNumDevicesRet)); + + if (pNumDevicesRet) { + *pNumDevicesRet = CLNumDevicesRet; + } + + /*If NumDevices is less than the number of sub-devices available, then the + * function shall only retrieve that number of sub-devices. */ + if (phSubDevices) { + std::vector CLSubDevices(CLNumDevicesRet); + CL_RETURN_ON_FAILURE(clCreateSubDevices( + cl_adapter::cast(hDevice), CLProperties.data(), + CLNumDevicesRet, CLSubDevices.data(), nullptr)); + + std::memcpy(phSubDevices, CLSubDevices.data(), + sizeof(cl_device_id) * NumDevices); + } + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urDeviceRetain(ur_device_handle_t hDevice) { + + cl_int Result = clRetainDevice(cl_adapter::cast(hDevice)); + + return mapCLErrorToUR(Result); +} + +UR_APIEXPORT ur_result_t UR_APICALL +urDeviceRelease(ur_device_handle_t hDevice) { + + cl_int Result = clReleaseDevice(cl_adapter::cast(hDevice)); + + return mapCLErrorToUR(Result); +} + +UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetNativeHandle( + ur_device_handle_t hDevice, ur_native_handle_t *phNativeDevice) { + + *phNativeDevice = reinterpret_cast(hDevice); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( + ur_native_handle_t hNativeDevice, ur_platform_handle_t, + const ur_device_native_properties_t *, ur_device_handle_t *phDevice) { + + *phDevice = reinterpret_cast(hNativeDevice); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetGlobalTimestamps( + ur_device_handle_t hDevice, uint64_t *pDeviceTimestamp, + uint64_t *pHostTimestamp) { + oclv::OpenCLVersion DevVer, PlatVer; + cl_platform_id Platform; + cl_device_id DeviceId = cl_adapter::cast(hDevice); + + // TODO: Cache OpenCL version for each device and platform + auto RetErr = clGetDeviceInfo(DeviceId, CL_DEVICE_PLATFORM, + sizeof(cl_platform_id), &Platform, nullptr); + CL_RETURN_ON_FAILURE(RetErr); + + RetErr = cl_adapter::getDeviceVersion(DeviceId, DevVer); + CL_RETURN_ON_FAILURE(RetErr); + + RetErr = cl_adapter::getPlatformVersion(Platform, PlatVer); + + if (PlatVer < oclv::V2_1 || DevVer < oclv::V2_1) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + + if (pDeviceTimestamp) { + uint64_t Dummy; + clGetDeviceAndHostTimer(DeviceId, pDeviceTimestamp, + pHostTimestamp == nullptr ? &Dummy + : pHostTimestamp); + + } else if (pHostTimestamp) { + clGetHostTimer(DeviceId, pHostTimestamp); + } + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urDeviceSelectBinary( + ur_device_handle_t hDevice, const ur_device_binary_t *pBinaries, + uint32_t NumBinaries, uint32_t *pSelectedBinary) { + // TODO: this is a bare-bones implementation for choosing a device image + // that would be compatible with the targeted device. An AOT-compiled + // image is preferred over SPIR-V for known devices (i.e. Intel devices) + // The implementation makes no effort to differentiate between multiple images + // for the given device, and simply picks the first one compatible + // Real implementation will use the same mechanism OpenCL ICD dispatcher + // uses. Something like: + // PI_VALIDATE_HANDLE_RETURN_HANDLE(ctx, PI_ERROR_INVALID_CONTEXT); + // return context->dispatch->piextDeviceSelectIR( + // ctx, images, num_images, selected_image); + // where context->dispatch is set to the dispatch table provided by PI + // plugin for platform/device the ctx was created for. + + // Choose the binary target for the provided device + const char *ImageTarget = nullptr; + // Get the type of the device + cl_device_type DeviceType; + constexpr uint32_t InvalidInd = std::numeric_limits::max(); + cl_int RetErr = + clGetDeviceInfo(cl_adapter::cast(hDevice), CL_DEVICE_TYPE, + sizeof(cl_device_type), &DeviceType, nullptr); + if (RetErr != CL_SUCCESS) { + *pSelectedBinary = InvalidInd; + CL_RETURN_ON_FAILURE(RetErr); + } + + switch (DeviceType) { + // TODO: Factor out vendor specifics into a separate source + // E.g. sycl/source/detail/vendor/intel/detail/pi_opencl.cpp? + + // We'll attempt to find an image that was AOT-compiled + // from a SPIR-V image into an image specific for: + + case CL_DEVICE_TYPE_CPU: // OpenCL 64-bit CPU + ImageTarget = UR_DEVICE_BINARY_TARGET_SPIRV64_X86_64; + break; + case CL_DEVICE_TYPE_GPU: // OpenCL 64-bit GEN GPU + ImageTarget = UR_DEVICE_BINARY_TARGET_SPIRV64_GEN; + break; + case CL_DEVICE_TYPE_ACCELERATOR: // OpenCL 64-bit FPGA + ImageTarget = UR_DEVICE_BINARY_TARGET_SPIRV64_FPGA; + break; + default: + // Otherwise, we'll attempt to find and JIT-compile + // a device-independent SPIR-V image + ImageTarget = UR_DEVICE_BINARY_TARGET_SPIRV64; + break; + } + + // Find the appropriate device image, fallback to spirv if not found + uint32_t Fallback = InvalidInd; + for (uint32_t i = 0; i < NumBinaries; ++i) { + if (strcmp(pBinaries[i].pDeviceTargetSpec, ImageTarget) == 0) { + *pSelectedBinary = i; + return UR_RESULT_SUCCESS; + } + if (strcmp(pBinaries[i].pDeviceTargetSpec, + UR_DEVICE_BINARY_TARGET_SPIRV64) == 0) + Fallback = i; + } + // Points to a spirv image, if such indeed was found + if ((*pSelectedBinary = Fallback) != InvalidInd) + return UR_RESULT_SUCCESS; + // No image can be loaded for the given device + return UR_RESULT_ERROR_INVALID_BINARY; +} diff --git a/source/adapters/opencl/device.hpp b/source/adapters/opencl/device.hpp new file mode 100644 index 0000000000..548a5012f9 --- /dev/null +++ b/source/adapters/opencl/device.hpp @@ -0,0 +1,20 @@ +//===--------- device.hpp - OpenCL Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include "common.hpp" + +namespace cl_adapter { +ur_result_t getDeviceVersion(cl_device_id Dev, oclv::OpenCLVersion &Version); + +ur_result_t checkDeviceExtensions(cl_device_id Dev, + const std::vector &Exts, + bool &Supported); +} // namespace cl_adapter diff --git a/source/adapters/opencl/enqueue.cpp b/source/adapters/opencl/enqueue.cpp new file mode 100644 index 0000000000..29c5ad672e --- /dev/null +++ b/source/adapters/opencl/enqueue.cpp @@ -0,0 +1,401 @@ +//===--------- enqueue.cpp - OpenCL Adapter --------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "common.hpp" + +cl_map_flags convertURMapFlagsToCL(ur_map_flags_t URFlags) { + cl_map_flags CLFlags = 0; + if (URFlags & UR_MAP_FLAG_READ) { + CLFlags |= CL_MAP_READ; + } + if (URFlags & UR_MAP_FLAG_WRITE) { + CLFlags |= CL_MAP_WRITE; + } + if (URFlags & UR_MAP_FLAG_WRITE_INVALIDATE_REGION) { + CLFlags |= CL_MAP_WRITE_INVALIDATE_REGION; + } + + return CLFlags; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( + ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, + const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + + CL_RETURN_ON_FAILURE(clEnqueueNDRangeKernel( + cl_adapter::cast(hQueue), + cl_adapter::cast(hKernel), workDim, pGlobalWorkOffset, + pGlobalWorkSize, pLocalWorkSize, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( + ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + + CL_RETURN_ON_FAILURE(clEnqueueMarkerWithWaitList( + cl_adapter::cast(hQueue), numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( + ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + + CL_RETURN_ON_FAILURE(clEnqueueBarrierWithWaitList( + cl_adapter::cast(hQueue), numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, + size_t offset, size_t size, void *pDst, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + + CL_RETURN_ON_FAILURE(clEnqueueReadBuffer( + cl_adapter::cast(hQueue), + cl_adapter::cast(hBuffer), blockingRead, offset, size, pDst, + numEventsInWaitList, cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, + size_t offset, size_t size, const void *pSrc, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + + CL_RETURN_ON_FAILURE(clEnqueueWriteBuffer( + cl_adapter::cast(hQueue), + cl_adapter::cast(hBuffer), blockingWrite, offset, size, pSrc, + numEventsInWaitList, cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, + ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pDst, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + + CL_RETURN_ON_FAILURE(clEnqueueReadBufferRect( + cl_adapter::cast(hQueue), + cl_adapter::cast(hBuffer), blockingRead, + cl_adapter::cast(&bufferOrigin), + cl_adapter::cast(&hostOrigin), + cl_adapter::cast(®ion), bufferRowPitch, + bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, + ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pSrc, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + + CL_RETURN_ON_FAILURE(clEnqueueWriteBufferRect( + cl_adapter::cast(hQueue), + cl_adapter::cast(hBuffer), blockingWrite, + cl_adapter::cast(&bufferOrigin), + cl_adapter::cast(&hostOrigin), + cl_adapter::cast(®ion), bufferRowPitch, + bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( + ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc, + ur_mem_handle_t hBufferDst, size_t srcOffset, size_t dstOffset, size_t size, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + + CL_RETURN_ON_FAILURE(clEnqueueCopyBuffer( + cl_adapter::cast(hQueue), + cl_adapter::cast(hBufferSrc), + cl_adapter::cast(hBufferDst), srcOffset, dstOffset, size, + numEventsInWaitList, cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( + ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc, + ur_mem_handle_t hBufferDst, ur_rect_offset_t srcOrigin, + ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, + size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + + CL_RETURN_ON_FAILURE(clEnqueueCopyBufferRect( + cl_adapter::cast(hQueue), + cl_adapter::cast(hBufferSrc), + cl_adapter::cast(hBufferDst), + cl_adapter::cast(&srcOrigin), + cl_adapter::cast(&dstOrigin), + cl_adapter::cast(®ion), srcRowPitch, srcSlicePitch, + dstRowPitch, dstSlicePitch, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, const void *pPattern, + size_t patternSize, size_t offset, size_t size, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + + CL_RETURN_ON_FAILURE(clEnqueueFillBuffer( + cl_adapter::cast(hQueue), + cl_adapter::cast(hBuffer), pPattern, patternSize, offset, size, + numEventsInWaitList, cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( + ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingRead, + ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch, + size_t slicePitch, void *pDst, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + + CL_RETURN_ON_FAILURE(clEnqueueReadImage( + cl_adapter::cast(hQueue), + cl_adapter::cast(hImage), blockingRead, + cl_adapter::cast(&origin), + cl_adapter::cast(®ion), rowPitch, slicePitch, pDst, + numEventsInWaitList, cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( + ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingWrite, + ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch, + size_t slicePitch, void *pSrc, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + + CL_RETURN_ON_FAILURE(clEnqueueWriteImage( + cl_adapter::cast(hQueue), + cl_adapter::cast(hImage), blockingWrite, + cl_adapter::cast(&origin), + cl_adapter::cast(®ion), rowPitch, slicePitch, pSrc, + numEventsInWaitList, cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( + ur_queue_handle_t hQueue, ur_mem_handle_t hImageSrc, + ur_mem_handle_t hImageDst, ur_rect_offset_t srcOrigin, + ur_rect_offset_t dstOrigin, ur_rect_region_t region, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + + CL_RETURN_ON_FAILURE(clEnqueueCopyImage( + cl_adapter::cast(hQueue), + cl_adapter::cast(hImageSrc), cl_adapter::cast(hImageDst), + cl_adapter::cast(&srcOrigin), + cl_adapter::cast(&dstOrigin), + cl_adapter::cast(®ion), numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingMap, + ur_map_flags_t mapFlags, size_t offset, size_t size, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent, void **ppRetMap) { + + cl_int Err; + *ppRetMap = clEnqueueMapBuffer( + cl_adapter::cast(hQueue), + cl_adapter::cast(hBuffer), blockingMap, + convertURMapFlagsToCL(mapFlags), offset, size, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent), &Err); + + CL_RETURN_ON_FAILURE(Err); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( + ur_queue_handle_t hQueue, ur_mem_handle_t hMem, void *pMappedPtr, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + + CL_RETURN_ON_FAILURE(clEnqueueUnmapMemObject( + cl_adapter::cast(hQueue), + cl_adapter::cast(hMem), pMappedPtr, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, + bool blockingWrite, size_t count, size_t offset, const void *pSrc, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + + cl_context Ctx = nullptr; + cl_int Res = + clGetCommandQueueInfo(cl_adapter::cast(hQueue), + CL_QUEUE_CONTEXT, sizeof(Ctx), &Ctx, nullptr); + + if (Res != CL_SUCCESS) + return mapCLErrorToUR(Res); + + cl_ext::clEnqueueWriteGlobalVariable_fn F = nullptr; + Res = cl_ext::getExtFuncFromContext( + Ctx, cl_ext::ExtFuncPtrCache->clEnqueueWriteGlobalVariableCache, + cl_ext::EnqueueWriteGlobalVariableName, &F); + + if (!F || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + Res = F(cl_adapter::cast(hQueue), + cl_adapter::cast(hProgram), name, blockingWrite, count, + offset, pSrc, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent)); + + return mapCLErrorToUR(Res); +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, + bool blockingRead, size_t count, size_t offset, void *pDst, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + + cl_context Ctx = nullptr; + cl_int Res = + clGetCommandQueueInfo(cl_adapter::cast(hQueue), + CL_QUEUE_CONTEXT, sizeof(Ctx), &Ctx, nullptr); + + if (Res != CL_SUCCESS) + return mapCLErrorToUR(Res); + + cl_ext::clEnqueueReadGlobalVariable_fn F = nullptr; + Res = cl_ext::getExtFuncFromContext( + Ctx, cl_ext::ExtFuncPtrCache->clEnqueueReadGlobalVariableCache, + cl_ext::EnqueueReadGlobalVariableName, &F); + + if (!F || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + Res = F(cl_adapter::cast(hQueue), + cl_adapter::cast(hProgram), name, blockingRead, count, + offset, pDst, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent)); + + return mapCLErrorToUR(Res); +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, void *pDst, size_t size, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + + cl_context CLContext; + cl_int CLErr = clGetCommandQueueInfo( + cl_adapter::cast(hQueue), CL_QUEUE_CONTEXT, + sizeof(cl_context), &CLContext, nullptr); + if (CLErr != CL_SUCCESS) { + return mapCLErrorToUR(CLErr); + } + + clEnqueueReadHostPipeINTEL_fn FuncPtr = nullptr; + ur_result_t RetVal = + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clEnqueueReadHostPipeINTELCache, + cl_ext::EnqueueReadHostPipeName, &FuncPtr); + + if (FuncPtr) { + RetVal = mapCLErrorToUR( + FuncPtr(cl_adapter::cast(hQueue), + cl_adapter::cast(hProgram), pipe_symbol, blocking, + pDst, size, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + } + + return RetVal; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, void *pSrc, size_t size, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + + cl_context CLContext; + cl_int CLErr = clGetCommandQueueInfo( + cl_adapter::cast(hQueue), CL_QUEUE_CONTEXT, + sizeof(cl_context), &CLContext, nullptr); + if (CLErr != CL_SUCCESS) { + return mapCLErrorToUR(CLErr); + } + + clEnqueueWriteHostPipeINTEL_fn FuncPtr = nullptr; + ur_result_t RetVal = + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clEnqueueWriteHostPipeINTELCache, + cl_ext::EnqueueWriteHostPipeName, &FuncPtr); + + if (FuncPtr) { + RetVal = mapCLErrorToUR( + FuncPtr(cl_adapter::cast(hQueue), + cl_adapter::cast(hProgram), pipe_symbol, blocking, + pSrc, size, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + } + + return RetVal; +} diff --git a/source/adapters/opencl/event.cpp b/source/adapters/opencl/event.cpp new file mode 100644 index 0000000000..78303a0829 --- /dev/null +++ b/source/adapters/opencl/event.cpp @@ -0,0 +1,136 @@ +//===--------- memory.cpp - OpenCL Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "common.hpp" + +cl_event_info convertUREventInfoToCL(const ur_event_info_t PropName) { + switch (PropName) { + case UR_EVENT_INFO_COMMAND_QUEUE: + return CL_EVENT_COMMAND_QUEUE; + break; + case UR_EVENT_INFO_CONTEXT: + return CL_EVENT_CONTEXT; + break; + case UR_EVENT_INFO_COMMAND_TYPE: + return CL_EVENT_COMMAND_TYPE; + break; + case UR_EVENT_INFO_COMMAND_EXECUTION_STATUS: + return CL_EVENT_COMMAND_EXECUTION_STATUS; + break; + case UR_EVENT_INFO_REFERENCE_COUNT: + return CL_EVENT_REFERENCE_COUNT; + break; + default: + return -1; + break; + } +} + +cl_profiling_info +convertURProfilingInfoToCL(const ur_profiling_info_t PropName) { + switch (PropName) { + case UR_PROFILING_INFO_COMMAND_QUEUED: + return CL_PROFILING_COMMAND_QUEUED; + case UR_PROFILING_INFO_COMMAND_SUBMIT: + return CL_PROFILING_COMMAND_SUBMIT; + case UR_PROFILING_INFO_COMMAND_START: + return CL_PROFILING_COMMAND_START; + // TODO(ur) add UR_PROFILING_INFO_COMMAND_COMPLETE once spec has been updated + case UR_PROFILING_INFO_COMMAND_END: + return CL_PROFILING_COMMAND_END; + default: + return -1; + } +} + +UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( + ur_native_handle_t hNativeEvent, + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] const ur_event_native_properties_t *pProperties, + ur_event_handle_t *phEvent) { + *phEvent = reinterpret_cast(hNativeEvent); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle( + ur_event_handle_t hEvent, ur_native_handle_t *phNativeEvent) { + return getNativeHandle(hEvent, phNativeEvent); +} + +UR_APIEXPORT ur_result_t UR_APICALL urEventRelease(ur_event_handle_t hEvent) { + cl_int RetErr = clReleaseEvent(cl_adapter::cast(hEvent)); + CL_RETURN_ON_FAILURE(RetErr); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEventRetain(ur_event_handle_t hEvent) { + cl_int RetErr = clRetainEvent(cl_adapter::cast(hEvent)); + CL_RETURN_ON_FAILURE(RetErr); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urEventWait(uint32_t numEvents, const ur_event_handle_t *phEventWaitList) { + cl_int RetErr = clWaitForEvents( + numEvents, cl_adapter::cast(phEventWaitList)); + CL_RETURN_ON_FAILURE(RetErr); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent, + ur_event_info_t propName, + size_t propSize, + void *pPropValue, + size_t *pPropSizeRet) { + cl_event_info CLEventInfo = convertUREventInfoToCL(propName); + cl_int RetErr = + clGetEventInfo(cl_adapter::cast(hEvent), CLEventInfo, propSize, + pPropValue, pPropSizeRet); + CL_RETURN_ON_FAILURE(RetErr); + + if (RetErr == CL_SUCCESS && + propName == UR_EVENT_INFO_COMMAND_EXECUTION_STATUS) { + /* If the CL_EVENT_COMMAND_EXECUTION_STATUS info value is CL_QUEUED, change + * it to CL_SUBMITTED. sycl::info::event::event_command_status has no + * equivalent to CL_QUEUED. + * + * FIXME UR Port: This should not be part of the UR adapter. Since PI_QUEUED + * exists, SYCL RT should be changed to handle this situation. In addition, + * SYCL RT is relying on PI_QUEUED status to make sure that the queues are + * flushed. */ + const auto param_value_int = static_cast(pPropValue); + if (*param_value_int == UR_EVENT_STATUS_QUEUED) { + *param_value_int = UR_EVENT_STATUS_SUBMITTED; + } + } + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( + ur_event_handle_t hEvent, ur_profiling_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) { + cl_profiling_info CLProfilingInfo = convertURProfilingInfoToCL(propName); + cl_int RetErr = clGetEventProfilingInfo(cl_adapter::cast(hEvent), + CLProfilingInfo, propSize, pPropValue, + pPropSizeRet); + CL_RETURN_ON_FAILURE(RetErr); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urEventSetCallback(ur_event_handle_t hEvent, ur_execution_info_t execStatus, + ur_event_callback_t pfnNotify, void *pUserData) { + std::ignore = hEvent; + std::ignore = execStatus; + std::ignore = pfnNotify; + std::ignore = pUserData; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/opencl/image.cpp b/source/adapters/opencl/image.cpp new file mode 100644 index 0000000000..f7cef40194 --- /dev/null +++ b/source/adapters/opencl/image.cpp @@ -0,0 +1,176 @@ +//===---------- image.cpp - OpenCL Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "common.hpp" + +UR_APIEXPORT ur_result_t UR_APICALL urUSMPitchedAllocExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] const ur_usm_desc_t *pUSMDesc, + [[maybe_unused]] ur_usm_pool_handle_t pool, + [[maybe_unused]] size_t widthInBytes, [[maybe_unused]] size_t height, + [[maybe_unused]] size_t elementSizeBytes, [[maybe_unused]] void **ppMem, + [[maybe_unused]] size_t *pResultPitch) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urBindlessImagesUnsampledImageHandleDestroyExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] ur_exp_image_handle_t hImage) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urBindlessImagesSampledImageHandleDestroyExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] ur_exp_image_handle_t hImage) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageAllocateExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] const ur_image_format_t *pImageFormat, + [[maybe_unused]] const ur_image_desc_t *pImageDesc, + [[maybe_unused]] ur_exp_image_mem_handle_t *phImageMem) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageFreeExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] ur_exp_image_mem_handle_t hImageMem) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesUnsampledImageCreateExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] ur_exp_image_mem_handle_t hImageMem, + [[maybe_unused]] const ur_image_format_t *pImageFormat, + [[maybe_unused]] const ur_image_desc_t *pImageDesc, + [[maybe_unused]] ur_mem_handle_t *phMem, + [[maybe_unused]] ur_exp_image_handle_t *phImage) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] ur_exp_image_mem_handle_t hImageMem, + [[maybe_unused]] const ur_image_format_t *pImageFormat, + [[maybe_unused]] const ur_image_desc_t *pImageDesc, + [[maybe_unused]] ur_sampler_handle_t hSampler, + [[maybe_unused]] ur_mem_handle_t *phMem, + [[maybe_unused]] ur_exp_image_handle_t *phImage) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( + [[maybe_unused]] ur_queue_handle_t hQueue, [[maybe_unused]] void *pDst, + [[maybe_unused]] void *pSrc, + [[maybe_unused]] const ur_image_format_t *pImageFormat, + [[maybe_unused]] const ur_image_desc_t *pImageDesc, + [[maybe_unused]] ur_exp_image_copy_flags_t imageCopyFlags, + [[maybe_unused]] ur_rect_offset_t srcOffset, + [[maybe_unused]] ur_rect_offset_t dstOffset, + [[maybe_unused]] ur_rect_region_t copyExtent, + [[maybe_unused]] ur_rect_region_t hostExtent, + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] ur_event_handle_t *phEvent) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageGetInfoExp( + [[maybe_unused]] ur_exp_image_mem_handle_t hImageMem, + [[maybe_unused]] ur_image_info_t propName, + [[maybe_unused]] void *pPropValue, [[maybe_unused]] size_t *pPropSizeRet) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMipmapGetLevelExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] ur_exp_image_mem_handle_t hImageMem, + [[maybe_unused]] uint32_t mipmapLevel, + [[maybe_unused]] ur_exp_image_mem_handle_t *phImageMem) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urBindlessImagesMipmapFreeExp([[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] ur_exp_image_mem_handle_t hMem) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImportOpaqueFDExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, [[maybe_unused]] size_t size, + [[maybe_unused]] ur_exp_interop_mem_desc_t *pInteropMemDesc, + [[maybe_unused]] ur_exp_interop_mem_handle_t *phInteropMem) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMapExternalArrayExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] const ur_image_format_t *pImageFormat, + [[maybe_unused]] const ur_image_desc_t *pImageDesc, + [[maybe_unused]] ur_exp_interop_mem_handle_t hInteropMem, + [[maybe_unused]] ur_exp_image_mem_handle_t *phImageMem) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesReleaseInteropExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] ur_exp_interop_mem_handle_t hInteropMem) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urBindlessImagesImportExternalSemaphoreOpaqueFDExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] ur_exp_interop_semaphore_desc_t *pInteropSemaphoreDesc, + [[maybe_unused]] ur_exp_interop_semaphore_handle_t + *phInteropSemaphoreHandle) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesDestroyExternalSemaphoreExp( + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] ur_exp_interop_semaphore_handle_t hInteropSemaphore) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesWaitExternalSemaphoreExp( + [[maybe_unused]] ur_queue_handle_t hQueue, + [[maybe_unused]] ur_exp_interop_semaphore_handle_t hSemaphore, + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] ur_event_handle_t *phEvent) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( + [[maybe_unused]] ur_queue_handle_t hQueue, + [[maybe_unused]] ur_exp_interop_semaphore_handle_t hSemaphore, + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] ur_event_handle_t *phEvent) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/opencl/kernel.cpp b/source/adapters/opencl/kernel.cpp new file mode 100644 index 0000000000..80b1502854 --- /dev/null +++ b/source/adapters/opencl/kernel.cpp @@ -0,0 +1,364 @@ +//===----------- kernel.cpp - OpenCL Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "common.hpp" + +UR_APIEXPORT ur_result_t UR_APICALL +urKernelCreate(ur_program_handle_t hProgram, const char *pKernelName, + ur_kernel_handle_t *phKernel) { + + cl_int CLResult; + *phKernel = cl_adapter::cast(clCreateKernel( + cl_adapter::cast(hProgram), pKernelName, &CLResult)); + CL_RETURN_ON_FAILURE(CLResult); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue( + ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize, + const ur_kernel_arg_value_properties_t *, const void *pArgValue) { + + CL_RETURN_ON_FAILURE(clSetKernelArg(cl_adapter::cast(hKernel), + cl_adapter::cast(argIndex), + argSize, pArgValue)); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urKernelSetArgLocal(ur_kernel_handle_t hKernel, uint32_t argIndex, + size_t argSize, const ur_kernel_arg_local_properties_t *) { + + CL_RETURN_ON_FAILURE(clSetKernelArg(cl_adapter::cast(hKernel), + cl_adapter::cast(argIndex), + argSize, nullptr)); + + return UR_RESULT_SUCCESS; +} + +static cl_int mapURKernelInfoToCL(ur_kernel_info_t URPropName) { + + switch (static_cast(URPropName)) { + case UR_KERNEL_INFO_FUNCTION_NAME: + return CL_KERNEL_FUNCTION_NAME; + case UR_KERNEL_INFO_NUM_ARGS: + return CL_KERNEL_NUM_ARGS; + case UR_KERNEL_INFO_REFERENCE_COUNT: + return CL_KERNEL_REFERENCE_COUNT; + case UR_KERNEL_INFO_CONTEXT: + return CL_KERNEL_CONTEXT; + case UR_KERNEL_INFO_PROGRAM: + return CL_KERNEL_PROGRAM; + case UR_KERNEL_INFO_ATTRIBUTES: + return CL_KERNEL_ATTRIBUTES; + case UR_KERNEL_INFO_NUM_REGS: + return CL_KERNEL_NUM_ARGS; + default: + return -1; + } +} + +UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel, + ur_kernel_info_t propName, + size_t propSize, + void *pPropValue, + size_t *pPropSizeRet) { + + CL_RETURN_ON_FAILURE(clGetKernelInfo(cl_adapter::cast(hKernel), + mapURKernelInfoToCL(propName), propSize, + pPropValue, pPropSizeRet)); + + return UR_RESULT_SUCCESS; +} + +static cl_int mapURKernelGroupInfoToCL(ur_kernel_group_info_t URPropName) { + + switch (static_cast(URPropName)) { + case UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE: + return CL_KERNEL_GLOBAL_WORK_SIZE; + case UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE: + return CL_KERNEL_WORK_GROUP_SIZE; + case UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE: + return CL_KERNEL_COMPILE_WORK_GROUP_SIZE; + case UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE: + return CL_KERNEL_LOCAL_MEM_SIZE; + case UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: + return CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE; + case UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE: + return CL_KERNEL_PRIVATE_MEM_SIZE; + default: + return -1; + } +} + +UR_APIEXPORT ur_result_t UR_APICALL +urKernelGetGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, + ur_kernel_group_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) { + + CL_RETURN_ON_FAILURE(clGetKernelWorkGroupInfo( + cl_adapter::cast(hKernel), + cl_adapter::cast(hDevice), + mapURKernelGroupInfoToCL(propName), propSize, pPropValue, pPropSizeRet)); + + return UR_RESULT_SUCCESS; +} + +static cl_int +mapURKernelSubGroupInfoToCL(ur_kernel_sub_group_info_t URPropName) { + + switch (static_cast(URPropName)) { + case UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE: + return CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE; + case UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS: + return CL_KERNEL_MAX_NUM_SUB_GROUPS; + case UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS: + return CL_KERNEL_COMPILE_NUM_SUB_GROUPS; + case UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL: + return CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL; + default: + return -1; + } +} + +UR_APIEXPORT ur_result_t UR_APICALL +urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, + ur_kernel_sub_group_info_t propName, size_t, + void *pPropValue, size_t *pPropSizeRet) { + + std::shared_ptr InputValue; + size_t InputValueSize = 0; + size_t RetVal; + + if (propName == UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE) { + // OpenCL needs an input value for PI_KERNEL_MAX_SUB_GROUP_SIZE so if no + // value is given we use the max work item size of the device in the first + // dimension to avoid truncation of max sub-group size. + uint32_t MaxDims = 0; + ur_result_t URRet = + urDeviceGetInfo(hDevice, UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS, + sizeof(uint32_t), &MaxDims, nullptr); + if (URRet != UR_RESULT_SUCCESS) + return URRet; + std::shared_ptr WgSizes{new size_t[MaxDims]}; + URRet = urDeviceGetInfo(hDevice, UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES, + MaxDims * sizeof(size_t), WgSizes.get(), nullptr); + if (URRet != UR_RESULT_SUCCESS) + return URRet; + for (size_t i = 1; i < MaxDims; ++i) + WgSizes.get()[i] = 1; + InputValue = std::move(WgSizes); + InputValueSize = MaxDims * sizeof(size_t); + } + + cl_int Ret = clGetKernelSubGroupInfo(cl_adapter::cast(hKernel), + cl_adapter::cast(hDevice), + mapURKernelSubGroupInfoToCL(propName), + InputValueSize, InputValue.get(), + sizeof(size_t), &RetVal, pPropSizeRet); + + if (Ret == CL_INVALID_OPERATION) { + // clGetKernelSubGroupInfo returns CL_INVALID_OPERATION if the device does + // not support subgroups. + if (propName == UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS) { + RetVal = 1; // Minimum required by SYCL 2020 spec + Ret = CL_SUCCESS; + } else if (propName == UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS) { + RetVal = 0; // Not specified by kernel + Ret = CL_SUCCESS; + } else if (propName == UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE) { + // Return the maximum sub group size for the device + size_t ResultSize = 0; + // Two calls to urDeviceGetInfo are needed: the first determines the size + // required to store the result, and the second returns the actual size + // values. + ur_result_t URRet = + urDeviceGetInfo(hDevice, UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL, 0, + nullptr, &ResultSize); + if (URRet != UR_RESULT_SUCCESS) { + return URRet; + } + assert(ResultSize % sizeof(size_t) == 0); + std::vector Result(ResultSize / sizeof(size_t)); + URRet = urDeviceGetInfo(hDevice, UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL, + ResultSize, Result.data(), nullptr); + if (URRet != UR_RESULT_SUCCESS) { + return URRet; + } + RetVal = *std::max_element(Result.begin(), Result.end()); + Ret = CL_SUCCESS; + } else if (propName == UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL) { + RetVal = 0; // Not specified by kernel + Ret = CL_SUCCESS; + } + } + + *(static_cast(pPropValue)) = static_cast(RetVal); + if (pPropSizeRet) + *pPropSizeRet = sizeof(uint32_t); + + CL_RETURN_ON_FAILURE(Ret); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urKernelRetain(ur_kernel_handle_t hKernel) { + CL_RETURN_ON_FAILURE(clRetainKernel(cl_adapter::cast(hKernel))); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urKernelRelease(ur_kernel_handle_t hKernel) { + CL_RETURN_ON_FAILURE(clReleaseKernel(cl_adapter::cast(hKernel))); + return UR_RESULT_SUCCESS; +} + +/** + * Enables indirect access of pointers in kernels. Necessary to avoid telling CL + * about every pointer that might be used. + */ +static ur_result_t usmSetIndirectAccess(ur_kernel_handle_t hKernel) { + + cl_bool TrueVal = CL_TRUE; + clHostMemAllocINTEL_fn HFunc = nullptr; + clSharedMemAllocINTEL_fn SFunc = nullptr; + clDeviceMemAllocINTEL_fn DFunc = nullptr; + cl_context CLContext; + + /* We test that each alloc type is supported before we actually try to set + * KernelExecInfo. */ + CL_RETURN_ON_FAILURE(clGetKernelInfo(cl_adapter::cast(hKernel), + CL_KERNEL_CONTEXT, sizeof(cl_context), + &CLContext, nullptr)); + + UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache, + cl_ext::HostMemAllocName, &HFunc)); + + if (HFunc) { + CL_RETURN_ON_FAILURE( + clSetKernelExecInfo(cl_adapter::cast(hKernel), + CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, + sizeof(cl_bool), &TrueVal)); + } + + UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clDeviceMemAllocINTELCache, + cl_ext::DeviceMemAllocName, &DFunc)); + + if (DFunc) { + CL_RETURN_ON_FAILURE( + clSetKernelExecInfo(cl_adapter::cast(hKernel), + CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, + sizeof(cl_bool), &TrueVal)); + } + + UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clSharedMemAllocINTELCache, + cl_ext::SharedMemAllocName, &SFunc)); + + if (SFunc) { + CL_RETURN_ON_FAILURE( + clSetKernelExecInfo(cl_adapter::cast(hKernel), + CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, + sizeof(cl_bool), &TrueVal)); + } + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urKernelSetExecInfo( + ur_kernel_handle_t hKernel, ur_kernel_exec_info_t propName, size_t propSize, + const ur_kernel_exec_info_properties_t *, const void *pPropValue) { + + switch (propName) { + case UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS: { + if (*(static_cast(pPropValue)) == true) { + CL_RETURN_ON_FAILURE(usmSetIndirectAccess(hKernel)); + } + return UR_RESULT_SUCCESS; + } + case UR_KERNEL_EXEC_INFO_CACHE_CONFIG: { + /* Setting the cache config is unsupported in OpenCL */ + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + case UR_KERNEL_EXEC_INFO_USM_PTRS: { + CL_RETURN_ON_FAILURE(clSetKernelExecInfo( + cl_adapter::cast(hKernel), propName, propSize, pPropValue)); + return UR_RESULT_SUCCESS; + } + default: { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + } +} + +UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( + ur_kernel_handle_t hKernel, uint32_t argIndex, + const ur_kernel_arg_pointer_properties_t *, const void *pArgValue) { + + cl_context CLContext; + CL_RETURN_ON_FAILURE(clGetKernelInfo(cl_adapter::cast(hKernel), + CL_KERNEL_CONTEXT, sizeof(cl_context), + &CLContext, nullptr)); + + clSetKernelArgMemPointerINTEL_fn FuncPtr = nullptr; + UR_RETURN_ON_FAILURE( + cl_ext::getExtFuncFromContext( + CLContext, + cl_ext::ExtFuncPtrCache->clSetKernelArgMemPointerINTELCache, + cl_ext::SetKernelArgMemPointerName, &FuncPtr)); + + if (FuncPtr) { + /* OpenCL passes pointers by value not by reference. This means we need to + * deref the arg to get the pointer value */ + auto PtrToPtr = reinterpret_cast(pArgValue); + auto DerefPtr = reinterpret_cast(*PtrToPtr); + CL_RETURN_ON_FAILURE(FuncPtr(cl_adapter::cast(hKernel), + cl_adapter::cast(argIndex), + DerefPtr)); + } + + return UR_RESULT_SUCCESS; +} +UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle( + ur_kernel_handle_t hKernel, ur_native_handle_t *phNativeKernel) { + + *phNativeKernel = reinterpret_cast(hKernel); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urKernelCreateWithNativeHandle( + ur_native_handle_t hNativeKernel, ur_context_handle_t, ur_program_handle_t, + const ur_kernel_native_properties_t *, ur_kernel_handle_t *phKernel) { + + *phKernel = reinterpret_cast(hNativeKernel); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( + ur_kernel_handle_t hKernel, uint32_t argIndex, + const ur_kernel_arg_mem_obj_properties_t *, ur_mem_handle_t hArgValue) { + + cl_int RetErr = clSetKernelArg( + cl_adapter::cast(hKernel), cl_adapter::cast(argIndex), + sizeof(hArgValue), cl_adapter::cast(&hArgValue)); + CL_RETURN_ON_FAILURE(RetErr); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgSampler( + ur_kernel_handle_t hKernel, uint32_t argIndex, + const ur_kernel_arg_sampler_properties_t *, ur_sampler_handle_t hArgValue) { + + cl_int RetErr = clSetKernelArg( + cl_adapter::cast(hKernel), cl_adapter::cast(argIndex), + sizeof(hArgValue), cl_adapter::cast(&hArgValue)); + CL_RETURN_ON_FAILURE(RetErr); + return UR_RESULT_SUCCESS; +} diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp new file mode 100644 index 0000000000..279faad376 --- /dev/null +++ b/source/adapters/opencl/memory.cpp @@ -0,0 +1,391 @@ +//===--------- memory.cpp - OpenCL Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "common.hpp" + +cl_image_format mapURImageFormatToCL(const ur_image_format_t *PImageFormat) { + cl_image_format CLImageFormat; + switch (PImageFormat->channelOrder) { + case UR_IMAGE_CHANNEL_ORDER_A: + CLImageFormat.image_channel_order = CL_A; + break; + case UR_IMAGE_CHANNEL_ORDER_R: + CLImageFormat.image_channel_order = CL_R; + break; + case UR_IMAGE_CHANNEL_ORDER_RG: + CLImageFormat.image_channel_order = CL_RG; + break; + case UR_IMAGE_CHANNEL_ORDER_RA: + CLImageFormat.image_channel_order = CL_RA; + break; + case UR_IMAGE_CHANNEL_ORDER_RGB: + CLImageFormat.image_channel_order = CL_RGB; + break; + case UR_IMAGE_CHANNEL_ORDER_RGBA: + CLImageFormat.image_channel_order = CL_RGBA; + break; + case UR_IMAGE_CHANNEL_ORDER_BGRA: + CLImageFormat.image_channel_order = CL_BGRA; + break; + case UR_IMAGE_CHANNEL_ORDER_ARGB: + CLImageFormat.image_channel_order = CL_ARGB; + break; + case UR_IMAGE_CHANNEL_ORDER_ABGR: + CLImageFormat.image_channel_order = CL_ABGR; + break; + case UR_IMAGE_CHANNEL_ORDER_INTENSITY: + CLImageFormat.image_channel_order = CL_INTENSITY; + break; + case UR_IMAGE_CHANNEL_ORDER_LUMINANCE: + CLImageFormat.image_channel_order = CL_LUMINANCE; + break; + case UR_IMAGE_CHANNEL_ORDER_RX: + CLImageFormat.image_channel_order = CL_Rx; + break; + case UR_IMAGE_CHANNEL_ORDER_RGX: + CLImageFormat.image_channel_order = CL_RGx; + break; + case UR_IMAGE_CHANNEL_ORDER_RGBX: + CLImageFormat.image_channel_order = CL_RGBx; + break; + case UR_IMAGE_CHANNEL_ORDER_SRGBA: + CLImageFormat.image_channel_order = CL_sRGBA; + break; + default: + CLImageFormat.image_channel_order = -1; + break; + } + + switch (PImageFormat->channelType) { + case UR_IMAGE_CHANNEL_TYPE_SNORM_INT8: + CLImageFormat.image_channel_data_type = CL_SNORM_INT8; + break; + case UR_IMAGE_CHANNEL_TYPE_SNORM_INT16: + CLImageFormat.image_channel_data_type = CL_SNORM_INT16; + break; + case UR_IMAGE_CHANNEL_TYPE_UNORM_INT8: + CLImageFormat.image_channel_data_type = CL_UNORM_INT8; + break; + case UR_IMAGE_CHANNEL_TYPE_UNORM_INT16: + CLImageFormat.image_channel_data_type = CL_UNORM_INT16; + break; + case UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565: + CLImageFormat.image_channel_data_type = CL_UNORM_SHORT_565; + break; + case UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555: + CLImageFormat.image_channel_data_type = CL_UNORM_SHORT_555; + break; + case UR_IMAGE_CHANNEL_TYPE_INT_101010: + CLImageFormat.image_channel_data_type = CL_UNORM_INT_101010; + break; + case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8: + CLImageFormat.image_channel_data_type = CL_SIGNED_INT8; + break; + case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16: + CLImageFormat.image_channel_data_type = CL_SIGNED_INT16; + break; + case UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32: + CLImageFormat.image_channel_data_type = CL_SIGNED_INT32; + break; + case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8: + CLImageFormat.image_channel_data_type = CL_UNSIGNED_INT8; + break; + case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16: + CLImageFormat.image_channel_data_type = CL_UNSIGNED_INT16; + break; + case UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32: + CLImageFormat.image_channel_data_type = CL_UNSIGNED_INT32; + break; + case UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT: + CLImageFormat.image_channel_data_type = CL_HALF_FLOAT; + break; + case UR_IMAGE_CHANNEL_TYPE_FLOAT: + CLImageFormat.image_channel_data_type = CL_FLOAT; + break; + default: + CLImageFormat.image_channel_data_type = -1; + break; + } + + return CLImageFormat; +} + +cl_image_desc mapURImageDescToCL(const ur_image_desc_t *PImageDesc) { + cl_image_desc CLImageDesc; + CLImageDesc.image_type = + cl_adapter::cast(PImageDesc->type); + + switch (PImageDesc->type) { + case UR_MEM_TYPE_BUFFER: + CLImageDesc.image_type = CL_MEM_OBJECT_BUFFER; + break; + case UR_MEM_TYPE_IMAGE2D: + CLImageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; + break; + case UR_MEM_TYPE_IMAGE3D: + CLImageDesc.image_type = CL_MEM_OBJECT_IMAGE3D; + break; + case UR_MEM_TYPE_IMAGE2D_ARRAY: + CLImageDesc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; + break; + case UR_MEM_TYPE_IMAGE1D: + CLImageDesc.image_type = CL_MEM_OBJECT_IMAGE1D; + break; + case UR_MEM_TYPE_IMAGE1D_ARRAY: + CLImageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY; + break; + case UR_MEM_TYPE_IMAGE1D_BUFFER: + CLImageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; + break; + default: + CLImageDesc.image_type = -1; + break; + } + + CLImageDesc.image_width = PImageDesc->width; + CLImageDesc.image_height = PImageDesc->height; + CLImageDesc.image_depth = PImageDesc->depth; + CLImageDesc.image_array_size = PImageDesc->arraySize; + CLImageDesc.image_row_pitch = PImageDesc->rowPitch; + CLImageDesc.image_slice_pitch = PImageDesc->slicePitch; + CLImageDesc.num_mip_levels = PImageDesc->numMipLevel; + CLImageDesc.num_samples = PImageDesc->numSamples; + CLImageDesc.buffer = nullptr; + CLImageDesc.mem_object = nullptr; + + return CLImageDesc; +} + +cl_int mapURMemImageInfoToCL(ur_image_info_t URPropName) { + + switch (URPropName) { + case UR_IMAGE_INFO_FORMAT: + return CL_IMAGE_FORMAT; + case UR_IMAGE_INFO_ELEMENT_SIZE: + return CL_IMAGE_ELEMENT_SIZE; + case UR_IMAGE_INFO_ROW_PITCH: + return CL_IMAGE_ROW_PITCH; + case UR_IMAGE_INFO_SLICE_PITCH: + return CL_IMAGE_SLICE_PITCH; + case UR_IMAGE_INFO_WIDTH: + return CL_IMAGE_WIDTH; + case UR_IMAGE_INFO_HEIGHT: + return CL_IMAGE_HEIGHT; + case UR_IMAGE_INFO_DEPTH: + return CL_IMAGE_DEPTH; + default: + return -1; + } +} + +cl_int mapURMemInfoToCL(ur_mem_info_t URPropName) { + + switch (URPropName) { + case UR_MEM_INFO_SIZE: + return CL_MEM_SIZE; + case UR_MEM_INFO_CONTEXT: + return CL_MEM_CONTEXT; + default: + return -1; + } +} + +cl_map_flags convertURMemFlagsToCL(ur_mem_flags_t URFlags) { + cl_map_flags CLFlags = 0; + if (URFlags & UR_MEM_FLAG_READ_WRITE) { + CLFlags |= CL_MEM_READ_WRITE; + } + if (URFlags & UR_MEM_FLAG_WRITE_ONLY) { + CLFlags |= CL_MEM_WRITE_ONLY; + } + if (URFlags & UR_MEM_FLAG_READ_ONLY) { + CLFlags |= CL_MEM_READ_ONLY; + } + if (URFlags & UR_MEM_FLAG_USE_HOST_POINTER) { + CLFlags |= CL_MEM_USE_HOST_PTR; + } + if (URFlags & UR_MEM_FLAG_ALLOC_HOST_POINTER) { + CLFlags |= CL_MEM_ALLOC_HOST_PTR; + } + if (URFlags & UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER) { + CLFlags |= CL_MEM_COPY_HOST_PTR; + } + + return CLFlags; +} + +UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( + ur_context_handle_t hContext, ur_mem_flags_t flags, size_t size, + const ur_buffer_properties_t *pProperties, ur_mem_handle_t *phBuffer) { + + cl_int RetErr = CL_INVALID_OPERATION; + if (pProperties) { + // TODO: need to check if all properties are supported by OpenCL RT and + // ignore unsupported + clCreateBufferWithPropertiesINTEL_fn FuncPtr = nullptr; + cl_context CLContext = cl_adapter::cast(hContext); + // First we need to look up the function pointer + RetErr = + cl_ext::getExtFuncFromContext( + CLContext, + cl_ext::ExtFuncPtrCache->clCreateBufferWithPropertiesINTELCache, + cl_ext::CreateBufferWithPropertiesName, &FuncPtr); + if (FuncPtr) { + std::vector PropertiesIntel; + auto Prop = static_cast(pProperties->pNext); + while (Prop) { + switch (Prop->stype) { + case UR_STRUCTURE_TYPE_BUFFER_CHANNEL_PROPERTIES: { + auto BufferChannelProperty = + reinterpret_cast(Prop); + PropertiesIntel.push_back(CL_MEM_CHANNEL_INTEL); + PropertiesIntel.push_back(BufferChannelProperty->channel); + } break; + case UR_STRUCTURE_TYPE_BUFFER_ALLOC_LOCATION_PROPERTIES: { + auto BufferLocationProperty = + reinterpret_cast(Prop); + PropertiesIntel.push_back(CL_MEM_ALLOC_FLAGS_INTEL); + PropertiesIntel.push_back(BufferLocationProperty->location); + } break; + default: + break; + } + Prop = static_cast(Prop->pNext); + } + PropertiesIntel.push_back(0); + + *phBuffer = reinterpret_cast(FuncPtr( + CLContext, PropertiesIntel.data(), static_cast(flags), + size, pProperties->pHost, cl_adapter::cast(&RetErr))); + CL_RETURN_ON_FAILURE(RetErr); + } + } + + *phBuffer = reinterpret_cast(clCreateBuffer( + cl_adapter::cast(hContext), static_cast(flags), + size, pProperties->pHost, cl_adapter::cast(&RetErr))); + CL_RETURN_ON_FAILURE(RetErr); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( + ur_context_handle_t hContext, ur_mem_flags_t flags, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + void *pHost, ur_mem_handle_t *phMem) { + + cl_int RetErr = CL_INVALID_OPERATION; + + cl_image_format ImageFormat = mapURImageFormatToCL(pImageFormat); + cl_image_desc ImageDesc = mapURImageDescToCL(pImageDesc); + cl_map_flags MapFlags = convertURMemFlagsToCL(flags); + + *phMem = reinterpret_cast(clCreateImage( + cl_adapter::cast(hContext), MapFlags, &ImageFormat, + &ImageDesc, pHost, cl_adapter::cast(&RetErr))); + CL_RETURN_ON_FAILURE(RetErr); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( + ur_mem_handle_t hBuffer, ur_mem_flags_t flags, + ur_buffer_create_type_t bufferCreateType, const ur_buffer_region_t *pRegion, + ur_mem_handle_t *phMem) { + + cl_int RetErr = CL_INVALID_OPERATION; + + cl_buffer_create_type BufferCreateType; + switch (bufferCreateType) { + case UR_BUFFER_CREATE_TYPE_REGION: + BufferCreateType = CL_BUFFER_CREATE_TYPE_REGION; + break; + default: + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + _cl_buffer_region BufferRegion; + BufferRegion.origin = pRegion->origin; + BufferRegion.size = pRegion->size; + + *phMem = reinterpret_cast(clCreateSubBuffer( + cl_adapter::cast(hBuffer), static_cast(flags), + BufferCreateType, &BufferRegion, cl_adapter::cast(&RetErr))); + CL_RETURN_ON_FAILURE(RetErr); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urMemGetNativeHandle(ur_mem_handle_t hMem, ur_native_handle_t *phNativeMem) { + return getNativeHandle(hMem, phNativeMem); +} + +UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( + ur_native_handle_t hNativeMem, + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] const ur_mem_native_properties_t *pProperties, + ur_mem_handle_t *phMem) { + + *phMem = reinterpret_cast(hNativeMem); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( + ur_native_handle_t hNativeMem, + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] const ur_image_format_t *pImageFormat, + [[maybe_unused]] const ur_image_desc_t *pImageDesc, + [[maybe_unused]] const ur_mem_native_properties_t *pProperties, + ur_mem_handle_t *phMem) { + + *phMem = reinterpret_cast(hNativeMem); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, + ur_mem_info_t propName, + size_t propSize, + void *pPropValue, + size_t *pPropSizeRet) { + + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + const cl_int CLPropName = mapURMemInfoToCL(propName); + + CL_RETURN_ON_FAILURE(clGetMemObjectInfo(cl_adapter::cast(hMemory), + CLPropName, propSize, pPropValue, + pPropSizeRet)); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory, + ur_image_info_t propName, + size_t propSize, + void *pPropValue, + size_t *pPropSizeRet) { + + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + const cl_int CLPropName = mapURMemImageInfoToCL(propName); + + CL_RETURN_ON_FAILURE(clGetImageInfo(cl_adapter::cast(hMemory), + CLPropName, propSize, pPropValue, + pPropSizeRet)); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urMemRetain(ur_mem_handle_t hMem) { + CL_RETURN_ON_FAILURE(clRetainMemObject(cl_adapter::cast(hMem))); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urMemRelease(ur_mem_handle_t hMem) { + CL_RETURN_ON_FAILURE(clReleaseMemObject(cl_adapter::cast(hMem))); + return UR_RESULT_SUCCESS; +} diff --git a/source/adapters/opencl/platform.cpp b/source/adapters/opencl/platform.cpp new file mode 100644 index 0000000000..7188a3e8f0 --- /dev/null +++ b/source/adapters/opencl/platform.cpp @@ -0,0 +1,143 @@ +//===--------- platform.cpp - OpenCL Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "platform.hpp" + +ur_result_t cl_adapter::getPlatformVersion(cl_platform_id Plat, + oclv::OpenCLVersion &Version) { + + size_t PlatVerSize = 0; + CL_RETURN_ON_FAILURE( + clGetPlatformInfo(Plat, CL_PLATFORM_VERSION, 0, nullptr, &PlatVerSize)); + + std::string PlatVer(PlatVerSize, '\0'); + CL_RETURN_ON_FAILURE(clGetPlatformInfo(Plat, CL_PLATFORM_VERSION, PlatVerSize, + PlatVer.data(), nullptr)); + + Version = oclv::OpenCLVersion(PlatVer); + if (!Version.isValid()) { + return UR_RESULT_ERROR_INVALID_PLATFORM; + } + + return UR_RESULT_SUCCESS; +} + +static cl_int mapURPlatformInfoToCL(ur_platform_info_t URPropName) { + + switch (URPropName) { + case UR_PLATFORM_INFO_NAME: + return CL_PLATFORM_NAME; + case UR_PLATFORM_INFO_VENDOR_NAME: + return CL_PLATFORM_VENDOR; + case UR_PLATFORM_INFO_VERSION: + return CL_PLATFORM_VERSION; + case UR_PLATFORM_INFO_EXTENSIONS: + return CL_PLATFORM_EXTENSIONS; + case UR_PLATFORM_INFO_PROFILE: + return CL_PLATFORM_PROFILE; + default: + return -1; + } +} + +UR_DLLEXPORT ur_result_t UR_APICALL +urPlatformGetInfo(ur_platform_handle_t hPlatform, ur_platform_info_t propName, + size_t propSize, void *pPropValue, size_t *pSizeRet) { + + UrReturnHelper ReturnValue(propSize, pPropValue, pSizeRet); + const cl_int CLPropName = mapURPlatformInfoToCL(propName); + + switch (static_cast(propName)) { + case UR_PLATFORM_INFO_BACKEND: + return ReturnValue(UR_PLATFORM_BACKEND_OPENCL); + case UR_PLATFORM_INFO_NAME: + case UR_PLATFORM_INFO_VENDOR_NAME: + case UR_PLATFORM_INFO_VERSION: + case UR_PLATFORM_INFO_EXTENSIONS: + case UR_PLATFORM_INFO_PROFILE: { + CL_RETURN_ON_FAILURE( + clGetPlatformInfo(cl_adapter::cast(hPlatform), + CLPropName, propSize, pPropValue, pSizeRet)); + return UR_RESULT_SUCCESS; + } + default: + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } +} + +UR_DLLEXPORT ur_result_t UR_APICALL +urPlatformGetApiVersion([[maybe_unused]] ur_platform_handle_t hPlatform, + ur_api_version_t *pVersion) { + *pVersion = UR_API_VERSION_CURRENT; + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, + ur_platform_handle_t *phPlatforms, uint32_t *pNumPlatforms) { + + cl_int Result = + clGetPlatformIDs(cl_adapter::cast(NumEntries), + cl_adapter::cast(phPlatforms), + cl_adapter::cast(pNumPlatforms)); + + /* Absorb the CL_PLATFORM_NOT_FOUND_KHR and just return 0 in num_platforms */ + if (Result == CL_PLATFORM_NOT_FOUND_KHR) { + Result = CL_SUCCESS; + if (pNumPlatforms) { + *pNumPlatforms = 0; + } + } + + return mapCLErrorToUR(Result); +} + +UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetNativeHandle( + ur_platform_handle_t hPlatform, ur_native_handle_t *phNativePlatform) { + *phNativePlatform = reinterpret_cast(hPlatform); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( + ur_native_handle_t hNativePlatform, const ur_platform_native_properties_t *, + ur_platform_handle_t *phPlatform) { + *phPlatform = reinterpret_cast(hNativePlatform); + return UR_RESULT_SUCCESS; +} + +// Returns plugin specific backend option. +// Current support is only for optimization options. +// Return '-cl-opt-disable' for pFrontendOption = -O0 and '' for others. +UR_APIEXPORT ur_result_t UR_APICALL +urPlatformGetBackendOption(ur_platform_handle_t, const char *pFrontendOption, + const char **ppPlatformOption) { + using namespace std::literals; + if (pFrontendOption == nullptr) + return UR_RESULT_SUCCESS; + if (pFrontendOption == ""sv) { + *ppPlatformOption = ""; + return UR_RESULT_SUCCESS; + } + // Return '-cl-opt-disable' for frontend_option = -O0 and '' for others. + if (!strcmp(pFrontendOption, "-O0")) { + *ppPlatformOption = "-cl-opt-disable"; + return UR_RESULT_SUCCESS; + } + if (pFrontendOption == "-O1"sv || pFrontendOption == "-O2"sv || + pFrontendOption == "-O3"sv) { + *ppPlatformOption = ""; + return UR_RESULT_SUCCESS; + } + if (pFrontendOption == "-ftarget-compile-fast"sv) { + *ppPlatformOption = "-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'"; + return UR_RESULT_SUCCESS; + } + return UR_RESULT_ERROR_INVALID_VALUE; +} diff --git a/source/adapters/opencl/platform.hpp b/source/adapters/opencl/platform.hpp new file mode 100644 index 0000000000..31fd69de14 --- /dev/null +++ b/source/adapters/opencl/platform.hpp @@ -0,0 +1,17 @@ +//===--------- platform.hpp - OpenCL Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include "common.hpp" + +namespace cl_adapter { +ur_result_t getPlatformVersion(cl_platform_id Plat, + oclv::OpenCLVersion &Version); +} diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp new file mode 100644 index 0000000000..0beca23dab --- /dev/null +++ b/source/adapters/opencl/program.cpp @@ -0,0 +1,421 @@ +//===--------- platform.cpp - OpenCL Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "common.hpp" +#include "context.hpp" +#include "device.hpp" +#include "platform.hpp" + +static ur_result_t getDevicesFromProgram( + ur_program_handle_t hProgram, + std::unique_ptr> &DevicesInProgram) { + + cl_uint DeviceCount; + CL_RETURN_ON_FAILURE(clGetProgramInfo(cl_adapter::cast(hProgram), + CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), + &DeviceCount, nullptr)); + + if (DeviceCount < 1) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + + DevicesInProgram = std::make_unique>(DeviceCount); + + CL_RETURN_ON_FAILURE(clGetProgramInfo( + cl_adapter::cast(hProgram), CL_PROGRAM_DEVICES, + DeviceCount * sizeof(cl_device_id), (*DevicesInProgram).data(), nullptr)); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( + ur_context_handle_t hContext, const void *pIL, size_t length, + const ur_program_properties_t *, ur_program_handle_t *phProgram) { + + std::unique_ptr> DevicesInCtx; + CL_RETURN_ON_FAILURE_AND_SET_NULL( + cl_adapter::getDevicesFromContext(hContext, DevicesInCtx), phProgram); + + cl_platform_id CurPlatform; + CL_RETURN_ON_FAILURE_AND_SET_NULL( + clGetDeviceInfo((*DevicesInCtx)[0], CL_DEVICE_PLATFORM, + sizeof(cl_platform_id), &CurPlatform, nullptr), + phProgram); + + oclv::OpenCLVersion PlatVer; + CL_RETURN_ON_FAILURE_AND_SET_NULL( + cl_adapter::getPlatformVersion(CurPlatform, PlatVer), phProgram); + + cl_int Err = CL_SUCCESS; + if (PlatVer >= oclv::V2_1) { + + /* Make sure all devices support CL 2.1 or newer as well. */ + for (cl_device_id Dev : *DevicesInCtx) { + oclv::OpenCLVersion DevVer; + + CL_RETURN_ON_FAILURE_AND_SET_NULL( + cl_adapter::getDeviceVersion(Dev, DevVer), phProgram); + + /* If the device does not support CL 2.1 or greater, we need to make sure + * it supports the cl_khr_il_program extension. + */ + if (DevVer < oclv::V2_1) { + bool Supported = false; + CL_RETURN_ON_FAILURE_AND_SET_NULL( + cl_adapter::checkDeviceExtensions(Dev, {"cl_khr_il_program"}, + Supported), + phProgram); + + if (!Supported) { + return UR_RESULT_ERROR_COMPILER_NOT_AVAILABLE; + } + } + } + + *phProgram = cl_adapter::cast(clCreateProgramWithIL( + cl_adapter::cast(hContext), pIL, length, &Err)); + CL_RETURN_ON_FAILURE(Err); + } else { + + /* If none of the devices conform with CL 2.1 or newer make sure they all + * support the cl_khr_il_program extension. + */ + for (cl_device_id Dev : *DevicesInCtx) { + bool Supported = false; + CL_RETURN_ON_FAILURE_AND_SET_NULL( + cl_adapter::checkDeviceExtensions(Dev, {"cl_khr_il_program"}, + Supported), + phProgram); + + if (!Supported) { + return UR_RESULT_ERROR_COMPILER_NOT_AVAILABLE; + } + } + + using ApiFuncT = + cl_program(CL_API_CALL *)(cl_context, const void *, size_t, cl_int *); + ApiFuncT FuncPtr = + reinterpret_cast(clGetExtensionFunctionAddressForPlatform( + CurPlatform, "clCreateProgramWithILKHR")); + + assert(FuncPtr != nullptr); + + *phProgram = cl_adapter::cast( + FuncPtr(cl_adapter::cast(hContext), pIL, length, &Err)); + CL_RETURN_ON_FAILURE(Err); + } + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( + ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, + const uint8_t *pBinary, const ur_program_properties_t *, + ur_program_handle_t *phProgram) { + + cl_int BinaryStatus; + cl_int CLResult; + *phProgram = cl_adapter::cast(clCreateProgramWithBinary( + cl_adapter::cast(hContext), cl_adapter::cast(1u), + cl_adapter::cast(&hDevice), &size, &pBinary, + &BinaryStatus, &CLResult)); + CL_RETURN_ON_FAILURE(BinaryStatus); + CL_RETURN_ON_FAILURE(CLResult); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urProgramCompile([[maybe_unused]] ur_context_handle_t hContext, + ur_program_handle_t hProgram, const char *pOptions) { + + std::unique_ptr> DevicesInProgram; + CL_RETURN_ON_FAILURE(getDevicesFromProgram(hProgram, DevicesInProgram)); + + CL_RETURN_ON_FAILURE(clCompileProgram(cl_adapter::cast(hProgram), + DevicesInProgram->size(), + DevicesInProgram->data(), pOptions, 0, + nullptr, nullptr, nullptr, nullptr)); + + return UR_RESULT_SUCCESS; +} + +static cl_int mapURProgramInfoToCL(ur_program_info_t URPropName) { + + switch (static_cast(URPropName)) { + case UR_PROGRAM_INFO_REFERENCE_COUNT: + return CL_PROGRAM_REFERENCE_COUNT; + case UR_PROGRAM_INFO_CONTEXT: + return CL_PROGRAM_CONTEXT; + case UR_PROGRAM_INFO_NUM_DEVICES: + return CL_PROGRAM_NUM_DEVICES; + case UR_PROGRAM_INFO_DEVICES: + return CL_PROGRAM_DEVICES; + case UR_PROGRAM_INFO_SOURCE: + return CL_PROGRAM_SOURCE; + case UR_PROGRAM_INFO_BINARY_SIZES: + return CL_PROGRAM_BINARY_SIZES; + case UR_PROGRAM_INFO_BINARIES: + return CL_PROGRAM_BINARIES; + case UR_PROGRAM_INFO_NUM_KERNELS: + return CL_PROGRAM_NUM_KERNELS; + case UR_PROGRAM_INFO_KERNEL_NAMES: + return CL_PROGRAM_KERNEL_NAMES; + default: + return -1; + } +} + +UR_APIEXPORT ur_result_t UR_APICALL +urProgramGetInfo(ur_program_handle_t hProgram, ur_program_info_t propName, + size_t propSize, void *pPropValue, size_t *pPropSizeRet) { + + CL_RETURN_ON_FAILURE(clGetProgramInfo(cl_adapter::cast(hProgram), + mapURProgramInfoToCL(propName), + propSize, pPropValue, pPropSizeRet)); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urProgramBuild([[maybe_unused]] ur_context_handle_t hContext, + ur_program_handle_t hProgram, const char *pOptions) { + + std::unique_ptr> DevicesInProgram; + CL_RETURN_ON_FAILURE(getDevicesFromProgram(hProgram, DevicesInProgram)); + + CL_RETURN_ON_FAILURE(clBuildProgram( + cl_adapter::cast(hProgram), DevicesInProgram->size(), + DevicesInProgram->data(), pOptions, nullptr, nullptr)); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urProgramLink(ur_context_handle_t hContext, uint32_t count, + const ur_program_handle_t *phPrograms, const char *pOptions, + ur_program_handle_t *phProgram) { + + cl_int CLResult; + *phProgram = cl_adapter::cast( + clLinkProgram(cl_adapter::cast(hContext), 0, nullptr, + pOptions, cl_adapter::cast(count), + cl_adapter::cast(phPrograms), nullptr, + nullptr, &CLResult)); + CL_RETURN_ON_FAILURE(CLResult); + + return UR_RESULT_SUCCESS; +} + +static cl_int mapURProgramBuildInfoToCL(ur_program_build_info_t URPropName) { + + switch (static_cast(URPropName)) { + case UR_PROGRAM_BUILD_INFO_STATUS: + return CL_PROGRAM_BUILD_STATUS; + case UR_PROGRAM_BUILD_INFO_OPTIONS: + return CL_PROGRAM_BUILD_OPTIONS; + case UR_PROGRAM_BUILD_INFO_LOG: + return CL_PROGRAM_BUILD_LOG; + case UR_PROGRAM_BUILD_INFO_BINARY_TYPE: + return CL_PROGRAM_BINARY_TYPE; + default: + return -1; + } +} + +static ur_program_binary_type_t +mapCLBinaryTypeToUR(cl_program_binary_type binaryType) { + switch (binaryType) { + case CL_PROGRAM_BINARY_TYPE_NONE: + return UR_PROGRAM_BINARY_TYPE_NONE; + case CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT: + return UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT; + case CL_PROGRAM_BINARY_TYPE_LIBRARY: + return UR_PROGRAM_BINARY_TYPE_LIBRARY; + case CL_PROGRAM_BINARY_TYPE_EXECUTABLE: + return UR_PROGRAM_BINARY_TYPE_EXECUTABLE; + default: + return UR_PROGRAM_BINARY_TYPE_FORCE_UINT32; + } +} + +UR_APIEXPORT ur_result_t UR_APICALL +urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t hDevice, + ur_program_build_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) { + + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + + switch (propName) { + case UR_PROGRAM_BUILD_INFO_BINARY_TYPE: + cl_program_binary_type cl_value; + CL_RETURN_ON_FAILURE(clGetProgramBuildInfo( + cl_adapter::cast(hProgram), + cl_adapter::cast(hDevice), + mapURProgramBuildInfoToCL(propName), sizeof(cl_program_binary_type), + &cl_value, nullptr)); + return ReturnValue(mapCLBinaryTypeToUR(cl_value)); + case UR_PROGRAM_BUILD_INFO_LOG: + case UR_PROGRAM_BUILD_INFO_OPTIONS: + case UR_PROGRAM_BUILD_INFO_STATUS: + CL_RETURN_ON_FAILURE( + clGetProgramBuildInfo(cl_adapter::cast(hProgram), + cl_adapter::cast(hDevice), + mapURProgramBuildInfoToCL(propName), propSize, + pPropValue, pPropSizeRet)); + return UR_RESULT_SUCCESS; + default: + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } +} + +UR_APIEXPORT ur_result_t UR_APICALL +urProgramRetain(ur_program_handle_t hProgram) { + + CL_RETURN_ON_FAILURE(clRetainProgram(cl_adapter::cast(hProgram))); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urProgramRelease(ur_program_handle_t hProgram) { + + CL_RETURN_ON_FAILURE( + clReleaseProgram(cl_adapter::cast(hProgram))); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle( + ur_program_handle_t hProgram, ur_native_handle_t *phNativeProgram) { + + *phNativeProgram = reinterpret_cast(hProgram); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithNativeHandle( + ur_native_handle_t hNativeProgram, ur_context_handle_t, + const ur_program_native_properties_t *, ur_program_handle_t *phProgram) { + + *phProgram = reinterpret_cast(hNativeProgram); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( + ur_program_handle_t hProgram, uint32_t count, + const ur_specialization_constant_info_t *pSpecConstants) { + + cl_program CLProg = cl_adapter::cast(hProgram); + cl_context Ctx = nullptr; + size_t RetSize = 0; + + CL_RETURN_ON_FAILURE(clGetProgramInfo(CLProg, CL_PROGRAM_CONTEXT, sizeof(Ctx), + &Ctx, &RetSize)); + + cl_ext::clSetProgramSpecializationConstant_fn F = nullptr; + const ur_result_t URResult = cl_ext::getExtFuncFromContext( + Ctx, cl_ext::ExtFuncPtrCache->clSetProgramSpecializationConstantCache, + cl_ext::SetProgramSpecializationConstantName, &F); + + if (URResult != UR_RESULT_SUCCESS) { + return URResult; + } + + for (uint32_t i = 0; i < count; ++i) { + CL_RETURN_ON_FAILURE(F(CLProg, pSpecConstants[i].id, pSpecConstants[i].size, + pSpecConstants[i].pValue)); + } + + return UR_RESULT_SUCCESS; +} + +// Function gets characters between delimeter's in str +// then checks if they are equal to the sub_str. +// returns true if there is at least one instance +// returns false if there are no instances of the name +static bool isInSeparatedString(const std::string &Str, char Delimiter, + const std::string &SubStr) { + size_t Beg = 0; + size_t Length = 0; + for (const auto &x : Str) { + if (x == Delimiter) { + if (Str.substr(Beg, Length) == SubStr) + return true; + + Beg += Length + 1; + Length = 0; + continue; + } + Length++; + } + if (Length != 0) + if (Str.substr(Beg, Length) == SubStr) + return true; + + return false; +} + +UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( + ur_device_handle_t hDevice, ur_program_handle_t hProgram, + const char *pFunctionName, void **ppFunctionPointer) { + + cl_context CLContext = nullptr; + CL_RETURN_ON_FAILURE(clGetProgramInfo(cl_adapter::cast(hProgram), + CL_PROGRAM_CONTEXT, sizeof(CLContext), + &CLContext, nullptr)); + + cl_ext::clGetDeviceFunctionPointer_fn FuncT = nullptr; + + UR_RETURN_ON_FAILURE( + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clGetDeviceFunctionPointerCache, + cl_ext::GetDeviceFunctionPointerName, &FuncT)); + + if (!FuncT) { + return UR_RESULT_ERROR_INVALID_FUNCTION_NAME; + } + + // Check if the kernel name exists to prevent the OpenCL runtime from throwing + // an exception with the cpu runtime. + // TODO: Use fallback search method if the clGetDeviceFunctionPointerINTEL + // extension does not exist. Can only be done once the CPU runtime no longer + // throws exceptions. + *ppFunctionPointer = 0; + size_t Size; + CL_RETURN_ON_FAILURE(clGetProgramInfo(cl_adapter::cast(hProgram), + CL_PROGRAM_KERNEL_NAMES, 0, nullptr, + &Size)); + + std::string KernelNames(Size, ' '); + + CL_RETURN_ON_FAILURE(clGetProgramInfo( + cl_adapter::cast(hProgram), CL_PROGRAM_KERNEL_NAMES, + KernelNames.size(), &KernelNames[0], nullptr)); + + // Get rid of the null terminator and search for the kernel name. If the + // function cannot be found, return an error code to indicate it exists. + KernelNames.pop_back(); + if (!isInSeparatedString(KernelNames, ';', pFunctionName)) { + return UR_RESULT_ERROR_INVALID_KERNEL_NAME; + } + + const cl_int CLResult = + FuncT(cl_adapter::cast(hDevice), + cl_adapter::cast(hProgram), pFunctionName, + reinterpret_cast(ppFunctionPointer)); + // GPU runtime sometimes returns CL_INVALID_ARG_VALUE if the function address + // cannot be found but the kernel exists. As the kernel does exist, return + // that the function name is invalid. + if (CLResult == CL_INVALID_ARG_VALUE) { + *ppFunctionPointer = 0; + return UR_RESULT_ERROR_INVALID_FUNCTION_NAME; + } + + CL_RETURN_ON_FAILURE(CLResult); + + return UR_RESULT_SUCCESS; +} diff --git a/source/adapters/opencl/queue.cpp b/source/adapters/opencl/queue.cpp new file mode 100644 index 0000000000..8b5496e619 --- /dev/null +++ b/source/adapters/opencl/queue.cpp @@ -0,0 +1,161 @@ +//===--------- memory.cpp - OpenCL Adapter ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include "common.hpp" +#include "platform.hpp" + +cl_command_queue_info mapURQueueInfoToCL(const ur_queue_info_t PropName) { + + switch (PropName) { + case UR_QUEUE_INFO_CONTEXT: + return CL_QUEUE_CONTEXT; + case UR_QUEUE_INFO_DEVICE: + return CL_QUEUE_DEVICE; + case UR_QUEUE_INFO_DEVICE_DEFAULT: + return CL_QUEUE_DEVICE_DEFAULT; + case UR_QUEUE_INFO_FLAGS: + return CL_QUEUE_PROPERTIES_ARRAY; + case UR_QUEUE_INFO_REFERENCE_COUNT: + return CL_QUEUE_REFERENCE_COUNT; + case UR_QUEUE_INFO_SIZE: + return CL_QUEUE_SIZE; + default: + return -1; + } +} + +cl_command_queue_properties +convertURQueuePropertiesToCL(const ur_queue_properties_t *URQueueProperties) { + cl_command_queue_properties CLCommandQueueProperties = 0; + + if (URQueueProperties->flags & UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) { + CLCommandQueueProperties |= CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; + } + if (URQueueProperties->flags & UR_QUEUE_FLAG_PROFILING_ENABLE) { + CLCommandQueueProperties |= CL_QUEUE_PROFILING_ENABLE; + } + if (URQueueProperties->flags & UR_QUEUE_FLAG_ON_DEVICE) { + CLCommandQueueProperties |= CL_QUEUE_ON_DEVICE; + } + if (URQueueProperties->flags & UR_QUEUE_FLAG_ON_DEVICE_DEFAULT) { + CLCommandQueueProperties |= CL_QUEUE_ON_DEVICE_DEFAULT; + } + + return CLCommandQueueProperties; +} + +UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + const ur_queue_properties_t *pProperties, ur_queue_handle_t *phQueue) { + + cl_platform_id CurPlatform; + CL_RETURN_ON_FAILURE_AND_SET_NULL( + clGetDeviceInfo(cl_adapter::cast(hDevice), + CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &CurPlatform, + nullptr), + phQueue); + + cl_command_queue_properties CLProperties = + pProperties ? convertURQueuePropertiesToCL(pProperties) : 0; + + // Properties supported by OpenCL backend. + const cl_command_queue_properties SupportByOpenCL = + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE | + CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT; + + oclv::OpenCLVersion Version; + CL_RETURN_ON_FAILURE_AND_SET_NULL( + cl_adapter::getPlatformVersion(CurPlatform, Version), phQueue); + + cl_int RetErr = CL_INVALID_OPERATION; + + if (Version < oclv::V2_0) { + *phQueue = cl_adapter::cast( + clCreateCommandQueue(cl_adapter::cast(hContext), + cl_adapter::cast(hDevice), + CLProperties & SupportByOpenCL, &RetErr)); + CL_RETURN_ON_FAILURE(RetErr); + return UR_RESULT_SUCCESS; + } + + /* TODO: Add support for CL_QUEUE_PRIORITY_KHR */ + cl_queue_properties CreationFlagProperties[] = { + CL_QUEUE_PROPERTIES, CLProperties & SupportByOpenCL, 0}; + *phQueue = + cl_adapter::cast(clCreateCommandQueueWithProperties( + cl_adapter::cast(hContext), + cl_adapter::cast(hDevice), CreationFlagProperties, + &RetErr)); + CL_RETURN_ON_FAILURE(RetErr); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, + ur_queue_info_t propName, + size_t propSize, + void *pPropValue, + size_t *pPropSizeRet) { + if (propName == UR_QUEUE_INFO_EMPTY) { + // OpenCL doesn't provide API to check the status of the queue. + return UR_RESULT_ERROR_INVALID_VALUE; + } + + cl_command_queue_info CLCommandQueueInfo = mapURQueueInfoToCL(propName); + + cl_int RetErr = clGetCommandQueueInfo( + cl_adapter::cast(hQueue), CLCommandQueueInfo, propSize, + pPropValue, pPropSizeRet); + CL_RETURN_ON_FAILURE(RetErr); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urQueueGetNativeHandle(ur_queue_handle_t hQueue, ur_queue_native_desc_t *, + ur_native_handle_t *phNativeQueue) { + return getNativeHandle(hQueue, phNativeQueue); +} + +UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( + ur_native_handle_t hNativeQueue, + [[maybe_unused]] ur_context_handle_t hContext, + [[maybe_unused]] ur_device_handle_t hDevice, + [[maybe_unused]] const ur_queue_native_properties_t *pProperties, + ur_queue_handle_t *phQueue) { + + *phQueue = reinterpret_cast(hNativeQueue); + cl_int RetErr = + clRetainCommandQueue(cl_adapter::cast(hNativeQueue)); + CL_RETURN_ON_FAILURE(RetErr); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish(ur_queue_handle_t hQueue) { + cl_int RetErr = clFinish(cl_adapter::cast(hQueue)); + CL_RETURN_ON_FAILURE(RetErr); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urQueueFlush(ur_queue_handle_t hQueue) { + cl_int RetErr = clFinish(cl_adapter::cast(hQueue)); + CL_RETURN_ON_FAILURE(RetErr); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urQueueRetain(ur_queue_handle_t hQueue) { + cl_int RetErr = + clRetainCommandQueue(cl_adapter::cast(hQueue)); + CL_RETURN_ON_FAILURE(RetErr); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(ur_queue_handle_t hQueue) { + cl_int RetErr = + clReleaseCommandQueue(cl_adapter::cast(hQueue)); + CL_RETURN_ON_FAILURE(RetErr); + return UR_RESULT_SUCCESS; +} diff --git a/source/adapters/opencl/sampler.cpp b/source/adapters/opencl/sampler.cpp new file mode 100644 index 0000000000..0cd4cbed2b --- /dev/null +++ b/source/adapters/opencl/sampler.cpp @@ -0,0 +1,198 @@ +//===--------- sampler.cpp - OpenCL Adapter --------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "common.hpp" + +namespace { + +cl_sampler_info ur2CLSamplerInfo(ur_sampler_info_t URInfo) { + switch (URInfo) { +#define CASE(UR_INFO, CL_INFO) \ + case UR_INFO: \ + return CL_INFO; + + CASE(UR_SAMPLER_INFO_REFERENCE_COUNT, CL_SAMPLER_REFERENCE_COUNT) + CASE(UR_SAMPLER_INFO_CONTEXT, CL_SAMPLER_CONTEXT) + CASE(UR_SAMPLER_INFO_NORMALIZED_COORDS, CL_SAMPLER_NORMALIZED_COORDS) + CASE(UR_SAMPLER_INFO_ADDRESSING_MODE, CL_SAMPLER_ADDRESSING_MODE) + CASE(UR_SAMPLER_INFO_FILTER_MODE, CL_SAMPLER_FILTER_MODE) + +#undef CASE + + default: + cl_adapter::die("Unhandled: ur_sampler_info_t"); + } +} + +cl_addressing_mode ur2CLAddressingMode(ur_sampler_addressing_mode_t Mode) { + switch (Mode) { + +#define CASE(UR_MODE, CL_MODE) \ + case UR_MODE: \ + return CL_MODE; + + CASE(UR_SAMPLER_ADDRESSING_MODE_NONE, CL_ADDRESS_NONE); + CASE(UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE, CL_ADDRESS_CLAMP_TO_EDGE); + CASE(UR_SAMPLER_ADDRESSING_MODE_CLAMP, CL_ADDRESS_CLAMP); + CASE(UR_SAMPLER_ADDRESSING_MODE_REPEAT, CL_ADDRESS_REPEAT); + CASE(UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT, + CL_ADDRESS_MIRRORED_REPEAT); + +#undef CASE + + default: + cl_adapter::die("Unhandled: ur_sampler_addressing_mode_t"); + } +} + +cl_filter_mode ur2CLFilterMode(ur_sampler_filter_mode_t Mode) { + switch (Mode) { + +#define CASE(UR_MODE, CL_MODE) \ + case UR_MODE: \ + return CL_MODE; + + CASE(UR_SAMPLER_FILTER_MODE_NEAREST, CL_FILTER_NEAREST) + CASE(UR_SAMPLER_FILTER_MODE_LINEAR, CL_FILTER_LINEAR) + +#undef CASE + + default: + cl_adapter::die("Unhandled: ur_sampler_filter_mode_t"); + } +} + +ur_sampler_addressing_mode_t cl2URAddressingMode(cl_addressing_mode Mode) { + switch (Mode) { + +#define CASE(CL_MODE, UR_MODE) \ + case CL_MODE: \ + return UR_MODE; + + CASE(CL_ADDRESS_NONE, UR_SAMPLER_ADDRESSING_MODE_NONE); + CASE(CL_ADDRESS_CLAMP_TO_EDGE, UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE); + CASE(CL_ADDRESS_CLAMP, UR_SAMPLER_ADDRESSING_MODE_CLAMP); + CASE(CL_ADDRESS_REPEAT, UR_SAMPLER_ADDRESSING_MODE_REPEAT); + CASE(CL_ADDRESS_MIRRORED_REPEAT, + UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT); + +#undef CASE + + default: + cl_adapter::die("Unhandled: cl_addressing_mode"); + } +} + +ur_sampler_filter_mode_t cl2URFilterMode(cl_filter_mode Mode) { + switch (Mode) { +#define CASE(CL_MODE, UR_MODE) \ + case CL_MODE: \ + return UR_MODE; + + CASE(CL_FILTER_NEAREST, UR_SAMPLER_FILTER_MODE_NEAREST) + CASE(CL_FILTER_LINEAR, UR_SAMPLER_FILTER_MODE_LINEAR); + +#undef CASE + + default: + cl_adapter::die("Unhandled: cl_filter_mode"); + } +} + +void cl2URSamplerInfoValue(cl_sampler_info Info, void *InfoValue) { + if (!InfoValue) { + return; + } + switch (Info) { + case CL_SAMPLER_ADDRESSING_MODE: { + cl_addressing_mode CLValue = + *reinterpret_cast(InfoValue); + *reinterpret_cast(InfoValue) = + cl2URAddressingMode(CLValue); + break; + } + case CL_SAMPLER_FILTER_MODE: { + cl_filter_mode CLMode = *reinterpret_cast(InfoValue); + *reinterpret_cast(InfoValue) = + cl2URFilterMode(CLMode); + break; + } + + default: + break; + } +} + +} // namespace + +ur_result_t urSamplerCreate(ur_context_handle_t hContext, + const ur_sampler_desc_t *pDesc, + ur_sampler_handle_t *phSampler) { + + // Initialize properties according to OpenCL 2.1 spec. + ur_result_t ErrorCode; + cl_addressing_mode AddressingMode = + ur2CLAddressingMode(pDesc->addressingMode); + cl_filter_mode FilterMode = ur2CLFilterMode(pDesc->filterMode); + + // Always call OpenCL 1.0 API + *phSampler = cl_adapter::cast(clCreateSampler( + cl_adapter::cast(hContext), + static_cast(pDesc->normalizedCoords), AddressingMode, FilterMode, + cl_adapter::cast(&ErrorCode))); + + return mapCLErrorToUR(ErrorCode); +} + +UR_APIEXPORT ur_result_t UR_APICALL +urSamplerGetInfo(ur_sampler_handle_t hSampler, ur_sampler_info_t propName, + size_t propSize, void *pPropValue, size_t *pPropSizeRet) { + + cl_sampler_info SamplerInfo = ur2CLSamplerInfo(propName); + static_assert(sizeof(cl_addressing_mode) == + sizeof(ur_sampler_addressing_mode_t)); + + if (ur_result_t Err = mapCLErrorToUR( + clGetSamplerInfo(cl_adapter::cast(hSampler), SamplerInfo, + propSize, pPropValue, pPropSizeRet))) { + return Err; + } + // Convert OpenCL returns to UR + cl2URSamplerInfoValue(SamplerInfo, pPropValue); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urSamplerRetain(ur_sampler_handle_t hSampler) { + return mapCLErrorToUR( + clRetainSampler(cl_adapter::cast(hSampler))); +} + +UR_APIEXPORT ur_result_t UR_APICALL +urSamplerRelease(ur_sampler_handle_t hSampler) { + return mapCLErrorToUR( + clReleaseSampler(cl_adapter::cast(hSampler))); +} + +UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetNativeHandle( + ur_sampler_handle_t hSampler, ur_native_handle_t *phNativeSampler) { + *phNativeSampler = reinterpret_cast( + cl_adapter::cast(hSampler)); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( + ur_native_handle_t hNativeSampler, ur_context_handle_t, + const ur_sampler_native_properties_t *, ur_sampler_handle_t *phSampler) { + *phSampler = reinterpret_cast( + cl_adapter::cast(hNativeSampler)); + return UR_RESULT_SUCCESS; +} diff --git a/source/adapters/opencl/ur_interface_loader.cpp b/source/adapters/opencl/ur_interface_loader.cpp new file mode 100644 index 0000000000..32d26cf58c --- /dev/null +++ b/source/adapters/opencl/ur_interface_loader.cpp @@ -0,0 +1,384 @@ +//===--------- ur_interface_loader.cpp - Unified Runtime ------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +namespace { + +// TODO - this is a duplicate of what is in the L0 plugin +// We should move this to somewhere common +ur_result_t validateProcInputs(ur_api_version_t Version, void *pDdiTable) { + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + // Pre 1.0 we enforce loader and adapter must have same version. + // Post 1.0 only major version match should be required. + if (Version != UR_API_VERSION_CURRENT) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + return UR_RESULT_SUCCESS; +} +} // namespace + +#if defined(__cplusplus) +extern "C" { +#endif + +UR_DLLEXPORT ur_result_t UR_APICALL urGetPlatformProcAddrTable( + ur_api_version_t Version, ur_platform_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; + } + pDdiTable->pfnCreateWithNativeHandle = urPlatformCreateWithNativeHandle; + pDdiTable->pfnGet = urPlatformGet; + pDdiTable->pfnGetApiVersion = urPlatformGetApiVersion; + pDdiTable->pfnGetInfo = urPlatformGetInfo; + pDdiTable->pfnGetNativeHandle = urPlatformGetNativeHandle; + pDdiTable->pfnGetBackendOption = urPlatformGetBackendOption; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetContextProcAddrTable( + ur_api_version_t Version, ur_context_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; + } + pDdiTable->pfnCreate = urContextCreate; + pDdiTable->pfnCreateWithNativeHandle = urContextCreateWithNativeHandle; + pDdiTable->pfnGetInfo = urContextGetInfo; + pDdiTable->pfnGetNativeHandle = urContextGetNativeHandle; + pDdiTable->pfnRelease = urContextRelease; + pDdiTable->pfnRetain = urContextRetain; + pDdiTable->pfnSetExtendedDeleter = urContextSetExtendedDeleter; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( + ur_api_version_t Version, ur_event_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; + } + pDdiTable->pfnCreateWithNativeHandle = urEventCreateWithNativeHandle; + pDdiTable->pfnGetInfo = urEventGetInfo; + pDdiTable->pfnGetNativeHandle = urEventGetNativeHandle; + pDdiTable->pfnGetProfilingInfo = urEventGetProfilingInfo; + pDdiTable->pfnRelease = urEventRelease; + pDdiTable->pfnRetain = urEventRetain; + pDdiTable->pfnSetCallback = urEventSetCallback; + pDdiTable->pfnWait = urEventWait; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramProcAddrTable( + ur_api_version_t Version, ur_program_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; + } + pDdiTable->pfnBuild = urProgramBuild; + pDdiTable->pfnCompile = urProgramCompile; + pDdiTable->pfnCreateWithBinary = urProgramCreateWithBinary; + pDdiTable->pfnCreateWithIL = urProgramCreateWithIL; + pDdiTable->pfnCreateWithNativeHandle = urProgramCreateWithNativeHandle; + pDdiTable->pfnGetBuildInfo = urProgramGetBuildInfo; + pDdiTable->pfnGetFunctionPointer = urProgramGetFunctionPointer; + pDdiTable->pfnGetInfo = urProgramGetInfo; + pDdiTable->pfnGetNativeHandle = urProgramGetNativeHandle; + pDdiTable->pfnLink = urProgramLink; + pDdiTable->pfnRelease = urProgramRelease; + pDdiTable->pfnRetain = urProgramRetain; + pDdiTable->pfnSetSpecializationConstants = + urProgramSetSpecializationConstants; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable( + ur_api_version_t Version, ur_kernel_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; + } + pDdiTable->pfnCreate = urKernelCreate; + pDdiTable->pfnCreateWithNativeHandle = urKernelCreateWithNativeHandle; + pDdiTable->pfnGetGroupInfo = urKernelGetGroupInfo; + pDdiTable->pfnGetInfo = urKernelGetInfo; + pDdiTable->pfnGetNativeHandle = urKernelGetNativeHandle; + pDdiTable->pfnGetSubGroupInfo = urKernelGetSubGroupInfo; + pDdiTable->pfnRelease = urKernelRelease; + pDdiTable->pfnRetain = urKernelRetain; + pDdiTable->pfnSetArgLocal = urKernelSetArgLocal; + pDdiTable->pfnSetArgMemObj = urKernelSetArgMemObj; + pDdiTable->pfnSetArgPointer = urKernelSetArgPointer; + pDdiTable->pfnSetArgSampler = urKernelSetArgSampler; + pDdiTable->pfnSetArgValue = urKernelSetArgValue; + pDdiTable->pfnSetExecInfo = urKernelSetExecInfo; + pDdiTable->pfnSetSpecializationConstants = nullptr; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( + ur_api_version_t Version, ur_sampler_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; + } + pDdiTable->pfnCreate = urSamplerCreate; + pDdiTable->pfnCreateWithNativeHandle = urSamplerCreateWithNativeHandle; + pDdiTable->pfnGetInfo = urSamplerGetInfo; + pDdiTable->pfnGetNativeHandle = urSamplerGetNativeHandle; + pDdiTable->pfnRelease = urSamplerRelease; + pDdiTable->pfnRetain = urSamplerRetain; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL +urGetMemProcAddrTable(ur_api_version_t Version, ur_mem_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; + } + pDdiTable->pfnBufferCreate = urMemBufferCreate; + pDdiTable->pfnBufferPartition = urMemBufferPartition; + pDdiTable->pfnBufferCreateWithNativeHandle = + urMemBufferCreateWithNativeHandle; + pDdiTable->pfnGetInfo = urMemGetInfo; + pDdiTable->pfnGetNativeHandle = urMemGetNativeHandle; + pDdiTable->pfnImageCreate = urMemImageCreate; + pDdiTable->pfnImageGetInfo = urMemImageGetInfo; + pDdiTable->pfnRelease = urMemRelease; + pDdiTable->pfnRetain = urMemRetain; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( + ur_api_version_t Version, ur_enqueue_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; + } + pDdiTable->pfnDeviceGlobalVariableRead = urEnqueueDeviceGlobalVariableRead; + pDdiTable->pfnDeviceGlobalVariableWrite = urEnqueueDeviceGlobalVariableWrite; + pDdiTable->pfnEventsWait = urEnqueueEventsWait; + pDdiTable->pfnEventsWaitWithBarrier = urEnqueueEventsWaitWithBarrier; + pDdiTable->pfnKernelLaunch = urEnqueueKernelLaunch; + pDdiTable->pfnMemBufferCopy = urEnqueueMemBufferCopy; + pDdiTable->pfnMemBufferCopyRect = urEnqueueMemBufferCopyRect; + pDdiTable->pfnMemBufferFill = urEnqueueMemBufferFill; + pDdiTable->pfnMemBufferMap = urEnqueueMemBufferMap; + pDdiTable->pfnMemBufferRead = urEnqueueMemBufferRead; + pDdiTable->pfnMemBufferReadRect = urEnqueueMemBufferReadRect; + pDdiTable->pfnMemBufferWrite = urEnqueueMemBufferWrite; + pDdiTable->pfnMemBufferWriteRect = urEnqueueMemBufferWriteRect; + pDdiTable->pfnMemImageCopy = urEnqueueMemImageCopy; + pDdiTable->pfnMemImageRead = urEnqueueMemImageRead; + pDdiTable->pfnMemImageWrite = urEnqueueMemImageWrite; + pDdiTable->pfnMemUnmap = urEnqueueMemUnmap; + pDdiTable->pfnUSMFill2D = urEnqueueUSMFill2D; + pDdiTable->pfnUSMFill = urEnqueueUSMFill; + pDdiTable->pfnUSMAdvise = urEnqueueUSMAdvise; + pDdiTable->pfnUSMMemcpy2D = urEnqueueUSMMemcpy2D; + pDdiTable->pfnUSMMemcpy = urEnqueueUSMMemcpy; + pDdiTable->pfnUSMPrefetch = urEnqueueUSMPrefetch; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( + ur_api_version_t Version, ur_global_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; + } + pDdiTable->pfnInit = urInit; + pDdiTable->pfnTearDown = urTearDown; + pDdiTable->pfnAdapterGet = urAdapterGet; + pDdiTable->pfnAdapterRelease = urAdapterRelease; + pDdiTable->pfnAdapterRetain = urAdapterRetain; + pDdiTable->pfnAdapterGetLastError = urAdapterGetLastError; + pDdiTable->pfnAdapterGetInfo = urAdapterGetInfo; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetQueueProcAddrTable( + ur_api_version_t Version, ur_queue_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; + } + pDdiTable->pfnCreate = urQueueCreate; + pDdiTable->pfnCreateWithNativeHandle = urQueueCreateWithNativeHandle; + pDdiTable->pfnFinish = urQueueFinish; + pDdiTable->pfnFlush = urQueueFlush; + pDdiTable->pfnGetInfo = urQueueGetInfo; + pDdiTable->pfnGetNativeHandle = urQueueGetNativeHandle; + pDdiTable->pfnRelease = urQueueRelease; + pDdiTable->pfnRetain = urQueueRetain; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL +urGetUSMProcAddrTable(ur_api_version_t Version, ur_usm_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; + } + pDdiTable->pfnDeviceAlloc = urUSMDeviceAlloc; + pDdiTable->pfnFree = urUSMFree; + pDdiTable->pfnGetMemAllocInfo = urUSMGetMemAllocInfo; + pDdiTable->pfnHostAlloc = urUSMHostAlloc; + pDdiTable->pfnPoolCreate = nullptr; + pDdiTable->pfnPoolRetain = nullptr; + pDdiTable->pfnPoolRelease = nullptr; + pDdiTable->pfnPoolGetInfo = nullptr; + pDdiTable->pfnSharedAlloc = urUSMSharedAlloc; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable( + ur_api_version_t Version, ur_usm_exp_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; + } + + pDdiTable->pfnImportExp = urUSMImportExp; + pDdiTable->pfnReleaseExp = urUSMReleaseExp; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable( + ur_api_version_t Version, ur_device_dditable_t *pDdiTable) { + auto Result = validateProcInputs(Version, pDdiTable); + if (UR_RESULT_SUCCESS != Result) { + return Result; + } + pDdiTable->pfnCreateWithNativeHandle = urDeviceCreateWithNativeHandle; + pDdiTable->pfnGet = urDeviceGet; + pDdiTable->pfnGetGlobalTimestamps = urDeviceGetGlobalTimestamps; + pDdiTable->pfnGetInfo = urDeviceGetInfo; + pDdiTable->pfnGetNativeHandle = urDeviceGetNativeHandle; + pDdiTable->pfnPartition = urDevicePartition; + pDdiTable->pfnRelease = urDeviceRelease; + pDdiTable->pfnRetain = urDeviceRetain; + pDdiTable->pfnSelectBinary = urDeviceSelectBinary; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( + ur_api_version_t version, ur_command_buffer_exp_dditable_t *pDdiTable) { + auto retVal = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != retVal) { + return retVal; + } + pDdiTable->pfnCreateExp = urCommandBufferCreateExp; + pDdiTable->pfnRetainExp = urCommandBufferRetainExp; + pDdiTable->pfnReleaseExp = urCommandBufferReleaseExp; + pDdiTable->pfnFinalizeExp = urCommandBufferFinalizeExp; + pDdiTable->pfnAppendKernelLaunchExp = urCommandBufferAppendKernelLaunchExp; + pDdiTable->pfnAppendMemcpyUSMExp = urCommandBufferAppendMemcpyUSMExp; + pDdiTable->pfnAppendMembufferCopyExp = urCommandBufferAppendMembufferCopyExp; + pDdiTable->pfnAppendMembufferCopyRectExp = + urCommandBufferAppendMembufferCopyRectExp; + pDdiTable->pfnAppendMembufferReadExp = urCommandBufferAppendMembufferReadExp; + pDdiTable->pfnAppendMembufferReadRectExp = + urCommandBufferAppendMembufferReadRectExp; + pDdiTable->pfnAppendMembufferWriteExp = + urCommandBufferAppendMembufferWriteExp; + pDdiTable->pfnAppendMembufferWriteRectExp = + urCommandBufferAppendMembufferWriteRectExp; + pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; + + return retVal; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable( + ur_api_version_t version, ur_usm_p2p_exp_dditable_t *pDdiTable) { + auto retVal = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != retVal) { + return retVal; + } + pDdiTable->pfnEnablePeerAccessExp = urUsmP2PEnablePeerAccessExp; + pDdiTable->pfnDisablePeerAccessExp = urUsmP2PDisablePeerAccessExp; + pDdiTable->pfnPeerAccessGetInfoExp = urUsmP2PPeerAccessGetInfoExp; + + return retVal; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetBindlessImagesExpProcAddrTable( + ur_api_version_t version, ur_bindless_images_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + pDdiTable->pfnUnsampledImageHandleDestroyExp = + urBindlessImagesUnsampledImageHandleDestroyExp; + pDdiTable->pfnSampledImageHandleDestroyExp = + urBindlessImagesSampledImageHandleDestroyExp; + pDdiTable->pfnImageAllocateExp = urBindlessImagesImageAllocateExp; + pDdiTable->pfnImageFreeExp = urBindlessImagesImageFreeExp; + pDdiTable->pfnUnsampledImageCreateExp = + urBindlessImagesUnsampledImageCreateExp; + pDdiTable->pfnSampledImageCreateExp = urBindlessImagesSampledImageCreateExp; + pDdiTable->pfnImageCopyExp = urBindlessImagesImageCopyExp; + pDdiTable->pfnImageGetInfoExp = urBindlessImagesImageGetInfoExp; + pDdiTable->pfnMipmapGetLevelExp = urBindlessImagesMipmapGetLevelExp; + pDdiTable->pfnMipmapFreeExp = urBindlessImagesMipmapFreeExp; + pDdiTable->pfnImportOpaqueFDExp = urBindlessImagesImportOpaqueFDExp; + pDdiTable->pfnMapExternalArrayExp = urBindlessImagesMapExternalArrayExp; + pDdiTable->pfnReleaseInteropExp = urBindlessImagesReleaseInteropExp; + pDdiTable->pfnImportExternalSemaphoreOpaqueFDExp = + urBindlessImagesImportExternalSemaphoreOpaqueFDExp; + pDdiTable->pfnDestroyExternalSemaphoreExp = + urBindlessImagesDestroyExternalSemaphoreExp; + pDdiTable->pfnWaitExternalSemaphoreExp = + urBindlessImagesWaitExternalSemaphoreExp; + pDdiTable->pfnSignalExternalSemaphoreExp = + urBindlessImagesSignalExternalSemaphoreExp; + return UR_RESULT_SUCCESS; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetVirtualMemProcAddrTable( + ur_api_version_t version, ur_virtual_mem_dditable_t *pDdiTable) { + auto retVal = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != retVal) { + return retVal; + } + + pDdiTable->pfnFree = nullptr; + pDdiTable->pfnGetInfo = nullptr; + pDdiTable->pfnGranularityGetInfo = nullptr; + pDdiTable->pfnMap = nullptr; + pDdiTable->pfnReserve = nullptr; + pDdiTable->pfnSetAccess = nullptr; + pDdiTable->pfnUnmap = nullptr; + + return retVal; +} + +UR_DLLEXPORT ur_result_t UR_APICALL urGetPhysicalMemProcAddrTable( + ur_api_version_t version, ur_physical_mem_dditable_t *pDdiTable) { + auto retVal = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != retVal) { + return retVal; + } + + pDdiTable->pfnCreate = nullptr; + pDdiTable->pfnRelease = nullptr; + pDdiTable->pfnRetain = nullptr; + + return retVal; +} + +#if defined(__cplusplus) +} // extern "C" +#endif diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp new file mode 100644 index 0000000000..afa22ffbb9 --- /dev/null +++ b/source/adapters/opencl/usm.cpp @@ -0,0 +1,431 @@ +//===--------- usm.cpp - OpenCL Adapter -------------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "common.hpp" + +UR_APIEXPORT ur_result_t UR_APICALL +urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, + ur_usm_pool_handle_t, size_t size, void **ppMem) { + + void *Ptr = nullptr; + ur_result_t RetVal = UR_RESULT_ERROR_INVALID_OPERATION; + uint32_t Alignment = pUSMDesc ? pUSMDesc->align : 0; + + cl_mem_alloc_flags_intel Flags = 0; + cl_mem_properties_intel Properties[3]; + + if (pUSMDesc && pUSMDesc->pNext && + static_cast(pUSMDesc->pNext)->stype == + UR_STRUCTURE_TYPE_USM_HOST_DESC) { + const auto *HostDesc = + static_cast(pUSMDesc->pNext); + + if (HostDesc->flags & UR_USM_HOST_MEM_FLAG_INITIAL_PLACEMENT) { + Flags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_HOST_INTEL; + } + Properties[0] = CL_MEM_ALLOC_FLAGS_INTEL; + Properties[1] = Flags; + Properties[2] = 0; + } else { + Properties[0] = 0; + } + + // First we need to look up the function pointer + clHostMemAllocINTEL_fn FuncPtr = nullptr; + cl_context CLContext = cl_adapter::cast(hContext); + RetVal = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache, + cl_ext::HostMemAllocName, &FuncPtr); + + if (FuncPtr) { + Ptr = FuncPtr(CLContext, Properties, size, Alignment, + cl_adapter::cast(&RetVal)); + } + + *ppMem = Ptr; + + // ensure we aligned the allocation correctly + if (RetVal == UR_RESULT_SUCCESS && Alignment != 0) + assert(reinterpret_cast(*ppMem) % Alignment == 0 && + "allocation not aligned correctly"); + + return RetVal; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, + const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t, + size_t size, void **ppMem) { + + void *Ptr = nullptr; + ur_result_t RetVal = UR_RESULT_ERROR_INVALID_OPERATION; + uint32_t Alignment = pUSMDesc ? pUSMDesc->align : 0; + + cl_mem_alloc_flags_intel Flags = 0; + cl_mem_properties_intel Properties[3]; + if (pUSMDesc && pUSMDesc->pNext && + static_cast(pUSMDesc->pNext)->stype == + UR_STRUCTURE_TYPE_USM_DEVICE_DESC) { + const auto *HostDesc = + static_cast(pUSMDesc->pNext); + + if (HostDesc->flags & UR_USM_DEVICE_MEM_FLAG_INITIAL_PLACEMENT) { + Flags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_DEVICE_INTEL; + } + if (HostDesc->flags & UR_USM_DEVICE_MEM_FLAG_WRITE_COMBINED) { + Flags |= CL_MEM_ALLOC_WRITE_COMBINED_INTEL; + } + Properties[0] = CL_MEM_ALLOC_FLAGS_INTEL; + Properties[1] = Flags; + Properties[2] = 0; + } else { + Properties[0] = 0; + } + + // First we need to look up the function pointer + clDeviceMemAllocINTEL_fn FuncPtr = nullptr; + cl_context CLContext = cl_adapter::cast(hContext); + RetVal = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clDeviceMemAllocINTELCache, + cl_ext::DeviceMemAllocName, &FuncPtr); + + if (FuncPtr) { + Ptr = FuncPtr(CLContext, cl_adapter::cast(hDevice), + cl_adapter::cast(Properties), size, + Alignment, cl_adapter::cast(&RetVal)); + } + + *ppMem = Ptr; + + // ensure we aligned the allocation correctly + if (RetVal == UR_RESULT_SUCCESS && Alignment != 0) + assert(reinterpret_cast(*ppMem) % Alignment == 0 && + "allocation not aligned correctly"); + + return RetVal; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, + const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t, + size_t size, void **ppMem) { + + void *Ptr = nullptr; + ur_result_t RetVal = UR_RESULT_ERROR_INVALID_OPERATION; + uint32_t Alignment = pUSMDesc ? pUSMDesc->align : 0; + + cl_mem_alloc_flags_intel Flags = 0; + const auto *NextStruct = + (pUSMDesc ? static_cast(pUSMDesc->pNext) + : nullptr); + while (NextStruct) { + if (NextStruct->stype == UR_STRUCTURE_TYPE_USM_HOST_DESC) { + const auto *HostDesc = + reinterpret_cast(NextStruct); + if (HostDesc->flags & UR_USM_HOST_MEM_FLAG_INITIAL_PLACEMENT) { + Flags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_HOST_INTEL; + } + } else if (NextStruct->stype == UR_STRUCTURE_TYPE_USM_DEVICE_DESC) { + const auto *DevDesc = + reinterpret_cast(NextStruct); + if (DevDesc->flags & UR_USM_DEVICE_MEM_FLAG_INITIAL_PLACEMENT) { + Flags |= CL_MEM_ALLOC_INITIAL_PLACEMENT_DEVICE_INTEL; + } + if (DevDesc->flags & UR_USM_DEVICE_MEM_FLAG_WRITE_COMBINED) { + Flags |= CL_MEM_ALLOC_WRITE_COMBINED_INTEL; + } + } + NextStruct = static_cast(NextStruct->pNext); + } + + cl_mem_properties_intel Properties[3] = {CL_MEM_ALLOC_FLAGS_INTEL, Flags, 0}; + + // Passing a flags value of 0 doesn't work, so truncate the properties + if (Flags == 0) { + Properties[0] = 0; + } + + // First we need to look up the function pointer + clSharedMemAllocINTEL_fn FuncPtr = nullptr; + cl_context CLContext = cl_adapter::cast(hContext); + RetVal = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clSharedMemAllocINTELCache, + cl_ext::SharedMemAllocName, &FuncPtr); + + if (FuncPtr) { + Ptr = FuncPtr(CLContext, cl_adapter::cast(hDevice), + cl_adapter::cast(Properties), size, + Alignment, cl_adapter::cast(&RetVal)); + } + + *ppMem = Ptr; + + assert(Alignment == 0 || + (RetVal == UR_RESULT_SUCCESS && + reinterpret_cast(*ppMem) % Alignment == 0)); + return RetVal; +} + +UR_APIEXPORT ur_result_t UR_APICALL urUSMFree(ur_context_handle_t hContext, + void *pMem) { + + // Use a blocking free to avoid issues with indirect access from kernels that + // might be still running. + clMemBlockingFreeINTEL_fn FuncPtr = nullptr; + + cl_context CLContext = cl_adapter::cast(hContext); + ur_result_t RetVal = UR_RESULT_ERROR_INVALID_OPERATION; + RetVal = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clMemBlockingFreeINTELCache, + cl_ext::MemBlockingFreeName, &FuncPtr); + + if (FuncPtr) { + RetVal = mapCLErrorToUR(FuncPtr(CLContext, pMem)); + } + + return RetVal; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( + ur_queue_handle_t hQueue, void *ptr, size_t patternSize, + const void *pPattern, size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + + // Have to look up the context from the kernel + cl_context CLContext; + cl_int CLErr = clGetCommandQueueInfo( + cl_adapter::cast(hQueue), CL_QUEUE_CONTEXT, + sizeof(cl_context), &CLContext, nullptr); + if (CLErr != CL_SUCCESS) { + return mapCLErrorToUR(CLErr); + } + + clEnqueueMemFillINTEL_fn FuncPtr = nullptr; + ur_result_t RetVal = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clEnqueueMemFillINTELCache, + cl_ext::EnqueueMemFillName, &FuncPtr); + + if (FuncPtr) { + RetVal = mapCLErrorToUR( + FuncPtr(cl_adapter::cast(hQueue), ptr, pPattern, + patternSize, size, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + } + + return RetVal; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( + ur_queue_handle_t hQueue, bool blocking, void *pDst, const void *pSrc, + size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + + // Have to look up the context from the kernel + cl_context CLContext; + cl_int CLErr = clGetCommandQueueInfo( + cl_adapter::cast(hQueue), CL_QUEUE_CONTEXT, + sizeof(cl_context), &CLContext, nullptr); + if (CLErr != CL_SUCCESS) { + return mapCLErrorToUR(CLErr); + } + + clEnqueueMemcpyINTEL_fn FuncPtr = nullptr; + ur_result_t RetVal = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clEnqueueMemcpyINTELCache, + cl_ext::EnqueueMemcpyName, &FuncPtr); + + if (FuncPtr) { + RetVal = mapCLErrorToUR( + FuncPtr(cl_adapter::cast(hQueue), blocking, pDst, + pSrc, size, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + } + + return RetVal; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( + ur_queue_handle_t hQueue, [[maybe_unused]] const void *pMem, + [[maybe_unused]] size_t size, ur_usm_migration_flags_t flags, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + + // flags is currently unused so fail if set + if (flags != 0) + return UR_RESULT_ERROR_INVALID_VALUE; + + return mapCLErrorToUR(clEnqueueMarkerWithWaitList( + cl_adapter::cast(hQueue), numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + /* + // Use this once impls support it. + // Have to look up the context from the kernel + cl_context CLContext; + cl_int CLErr = + clGetCommandQueueInfo(cl_adapter::cast(hQueue), + CL_QUEUE_CONTEXT, sizeof(cl_context), + &CLContext, nullptr); + if (CLErr != CL_SUCCESS) { + return map_cl_error_to_ur(CLErr); + } + + clEnqueueMigrateMemINTEL_fn FuncPtr; + ur_result_t Err = cl_ext::getExtFuncFromContext( + CLContext, "clEnqueueMigrateMemINTEL", &FuncPtr); + + ur_result_t RetVal; + if (Err != UR_RESULT_SUCCESS) { + RetVal = Err; + } else { + RetVal = map_cl_error_to_ur( + FuncPtr(cl_adapter::cast(hQueue), pMem, size, flags, + numEventsInWaitList, + reinterpret_cast(phEventWaitList), + reinterpret_cast(phEvent))); + } + */ +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMAdvise( + ur_queue_handle_t hQueue, [[maybe_unused]] const void *pMem, + [[maybe_unused]] size_t size, [[maybe_unused]] ur_usm_advice_flags_t advice, + ur_event_handle_t *phEvent) { + + return mapCLErrorToUR(clEnqueueMarkerWithWaitList( + cl_adapter::cast(hQueue), 0, nullptr, + reinterpret_cast(phEvent))); + + /* + // Change to use this once drivers support it. + // Have to look up the context from the kernel + cl_context CLContext; + cl_int CLErr = + clGetCommandQueueInfo(cl_adapter::cast(hQueue), + CL_QUEUE_CONTEXT, + sizeof(cl_context), + &CLContext, nullptr); + if (CLErr != CL_SUCCESS) { + return map_cl_error_to_ur(CLErr); + } + + clEnqueueMemAdviseINTEL_fn FuncPtr; + ur_result_t Err = + cl_ext::getExtFuncFromContext( + CLContext, "clEnqueueMemAdviseINTEL", &FuncPtr); + + ur_result_t RetVal; + if (Err != UR_RESULT_SUCCESS) { + RetVal = Err; + } else { + RetVal = + map_cl_error_to_ur(FuncPtr(cl_adapter::cast(hQueue), pMem, + size, advice, 0, nullptr, reinterpret_cast(phEvent))); + } + */ +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill2D( + [[maybe_unused]] ur_queue_handle_t hQueue, [[maybe_unused]] void *pMem, + [[maybe_unused]] size_t pitch, [[maybe_unused]] size_t patternSize, + [[maybe_unused]] const void *pPattern, [[maybe_unused]] size_t width, + [[maybe_unused]] size_t height, + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] ur_event_handle_t *phEvent) { + return UR_RESULT_ERROR_INVALID_OPERATION; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( + [[maybe_unused]] ur_queue_handle_t hQueue, [[maybe_unused]] bool blocking, + [[maybe_unused]] void *pDst, [[maybe_unused]] size_t dstPitch, + [[maybe_unused]] const void *pSrc, [[maybe_unused]] size_t srcPitch, + [[maybe_unused]] size_t width, [[maybe_unused]] size_t height, + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] ur_event_handle_t *phEvent) { + return UR_RESULT_ERROR_INVALID_OPERATION; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, + ur_usm_alloc_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) { + + clGetMemAllocInfoINTEL_fn FuncPtr = nullptr; + cl_context CLContext = cl_adapter::cast(hContext); + ur_result_t RetVal = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clGetMemAllocInfoINTELCache, + cl_ext::GetMemAllocInfoName, &FuncPtr); + + cl_mem_info_intel PropNameCL; + switch (propName) { + case UR_USM_ALLOC_INFO_TYPE: + PropNameCL = CL_MEM_ALLOC_TYPE_INTEL; + break; + case UR_USM_ALLOC_INFO_BASE_PTR: + PropNameCL = CL_MEM_ALLOC_BASE_PTR_INTEL; + break; + case UR_USM_ALLOC_INFO_SIZE: + PropNameCL = CL_MEM_ALLOC_SIZE_INTEL; + break; + case UR_USM_ALLOC_INFO_DEVICE: + PropNameCL = CL_MEM_ALLOC_DEVICE_INTEL; + break; + default: + return UR_RESULT_ERROR_INVALID_VALUE; + } + + if (FuncPtr) { + RetVal = + mapCLErrorToUR(FuncPtr(cl_adapter::cast(hContext), pMem, + PropNameCL, propSize, pPropValue, pPropSizeRet)); + if (RetVal == UR_RESULT_SUCCESS && pPropValue && + propName == UR_USM_ALLOC_INFO_TYPE) { + auto *AllocTypeCL = + static_cast(pPropValue); + ur_usm_type_t AllocTypeUR; + switch (*AllocTypeCL) { + case CL_MEM_TYPE_HOST_INTEL: + AllocTypeUR = UR_USM_TYPE_HOST; + break; + case CL_MEM_TYPE_DEVICE_INTEL: + AllocTypeUR = UR_USM_TYPE_DEVICE; + break; + case CL_MEM_TYPE_SHARED_INTEL: + AllocTypeUR = UR_USM_TYPE_SHARED; + break; + case CL_MEM_TYPE_UNKNOWN_INTEL: + default: + AllocTypeUR = UR_USM_TYPE_UNKNOWN; + break; + } + auto *AllocTypeOut = static_cast(pPropValue); + *AllocTypeOut = AllocTypeUR; + } + } + + return RetVal; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urUSMImportExp([[maybe_unused]] ur_context_handle_t Context, + [[maybe_unused]] void *HostPtr, [[maybe_unused]] size_t Size) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urUSMReleaseExp([[maybe_unused]] ur_context_handle_t Context, + [[maybe_unused]] void *HostPtr) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/opencl/usm_p2p.cpp b/source/adapters/opencl/usm_p2p.cpp new file mode 100644 index 0000000000..b0f51eac2b --- /dev/null +++ b/source/adapters/opencl/usm_p2p.cpp @@ -0,0 +1,41 @@ +//===--------- usm_p2p.cpp - OpenCL Adapter-------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "common.hpp" + +UR_APIEXPORT ur_result_t UR_APICALL +urUsmP2PEnablePeerAccessExp([[maybe_unused]] ur_device_handle_t commandDevice, + [[maybe_unused]] ur_device_handle_t peerDevice) { + + cl_adapter::die( + "Experimental P2P feature is not implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urUsmP2PDisablePeerAccessExp([[maybe_unused]] ur_device_handle_t commandDevice, + [[maybe_unused]] ur_device_handle_t peerDevice) { + + cl_adapter::die( + "Experimental P2P feature is not implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( + [[maybe_unused]] ur_device_handle_t commandDevice, + [[maybe_unused]] ur_device_handle_t peerDevice, + [[maybe_unused]] ur_exp_peer_info_t propName, + [[maybe_unused]] size_t propSize, [[maybe_unused]] void *pPropValue, + [[maybe_unused]] size_t *pPropSizeRet) { + + cl_adapter::die( + "Experimental P2P feature is not implemented for OpenCL adapter."); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/test/conformance/CMakeLists.txt b/test/conformance/CMakeLists.txt index 46960a3a51..e90b74f4cd 100644 --- a/test/conformance/CMakeLists.txt +++ b/test/conformance/CMakeLists.txt @@ -53,9 +53,12 @@ function(add_conformance_test name) if(UR_BUILD_ADAPTER_L0) add_test_adapter(${name} adapter_level_zero) endif() + if(UR_BUILD_ADAPTER_OPENCL) + add_test_adapter(${name} adapter_opencl) + endif() - if(NOT (UR_BUILD_ADAPTER_CUDA - OR UR_BUILD_ADAPTER_HIP OR UR_BUILD_ADAPTER_L0)) + if(NOT (UR_BUILD_ADAPTER_CUDA OR UR_BUILD_ADAPTER_HIP + OR UR_BUILD_ADAPTER_L0 OR UR_BUILD_ADAPTER_OPENCL)) add_test_adapter(${name} adapter_null) endif() endif() diff --git a/test/conformance/context/context_adapter_hip.match b/test/conformance/context/context_adapter_hip.match index 0add99006b..129b8d392c 100644 --- a/test/conformance/context/context_adapter_hip.match +++ b/test/conformance/context/context_adapter_hip.match @@ -1,2 +1 @@ urContextCreateWithNativeHandleTest.Success/AMD_HIP_BACKEND___{{.*}}_ -urContextSetExtendedDeleterTest.Success/AMD_HIP_BACKEND___{{.*}}_