diff --git a/source/adapters/opencl/adapter.hpp b/source/adapters/opencl/adapter.hpp index fc7c2e2fe8..2b17762de7 100644 --- a/source/adapters/opencl/adapter.hpp +++ b/source/adapters/opencl/adapter.hpp @@ -7,6 +7,8 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +#include "logger/ur_logger.hpp" +#include "platform.hpp" #include "CL/cl.h" #include "logger/ur_logger.hpp" @@ -18,6 +20,9 @@ struct ur_adapter_handle_t_ { std::mutex Mutex; logger::Logger &log = logger::get_logger("opencl"); + std::vector> URPlatforms; + uint32_t NumPlatforms = 0; + // Function pointers to core OpenCL entry points which may not exist in older // versions of the OpenCL-ICD-Loader are tracked here and initialized by // dynamically loading the symbol by name. diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 15029d5e27..b671937dd6 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -10,6 +10,11 @@ #include "command_buffer.hpp" #include "common.hpp" +#include "context.hpp" +#include "event.hpp" +#include "kernel.hpp" +#include "memory.hpp" +#include "queue.hpp" namespace { ur_result_t @@ -41,7 +46,7 @@ commandHandleReleaseInternal(ur_exp_command_buffer_command_handle_t Command) { ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() { urQueueRelease(hInternalQueue); - cl_context CLContext = cl_adapter::cast(hContext); + cl_context CLContext = hContext->CLContext; cl_ext::clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHR = nullptr; cl_int Res = cl_ext::getExtFuncFromContext( @@ -61,7 +66,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( ur_queue_handle_t Queue = nullptr; UR_RETURN_ON_FAILURE(urQueueCreate(hContext, hDevice, nullptr, &Queue)); - cl_context CLContext = cl_adapter::cast(hContext); + cl_context CLContext = hContext->CLContext; cl_ext::clCreateCommandBufferKHR_fn clCreateCommandBufferKHR = nullptr; UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( @@ -72,7 +77,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( pCommandBufferDesc ? pCommandBufferDesc->isUpdatable : false; ur_device_command_buffer_update_capability_flags_t UpdateCapabilities; - cl_device_id CLDevice = cl_adapter::cast(hDevice); + cl_device_id CLDevice = hDevice->CLDevice; CL_RETURN_ON_FAILURE( getDeviceCommandBufferUpdateCapabilities(CLDevice, UpdateCapabilities)); bool DeviceSupportsUpdate = UpdateCapabilities > 0; @@ -86,16 +91,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( IsUpdatable ? CL_COMMAND_BUFFER_MUTABLE_KHR : 0u, 0}; cl_int Res = CL_SUCCESS; - auto CLCommandBuffer = clCreateCommandBufferKHR( - 1, cl_adapter::cast(&Queue), Properties, &Res); + const cl_command_queue CLQueue = Queue->CLQueue; + auto CLCommandBuffer = + clCreateCommandBufferKHR(1, &CLQueue, Properties, &Res); CL_RETURN_ON_FAILURE_AND_SET_NULL(Res, phCommandBuffer); try { auto URCommandBuffer = std::make_unique( Queue, hContext, CLCommandBuffer, IsUpdatable); *phCommandBuffer = URCommandBuffer.release(); - } catch (...) { + } catch (std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; } CL_RETURN_ON_FAILURE(Res); @@ -124,7 +132,7 @@ urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) { UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) { - cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_context CLContext = hCommandBuffer->hContext->CLContext; cl_ext::clFinalizeCommandBufferKHR_fn clFinalizeCommandBufferKHR = nullptr; UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( @@ -156,7 +164,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( UR_ASSERT(!(phCommandHandle && !hCommandBuffer->IsUpdatable), UR_RESULT_ERROR_INVALID_OPERATION); - cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_context CLContext = hCommandBuffer->hContext->CLContext; cl_ext::clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR = nullptr; UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( @@ -178,10 +186,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( cl_command_properties_khr *Properties = hCommandBuffer->IsUpdatable ? UpdateProperties : nullptr; CL_RETURN_ON_FAILURE(clCommandNDRangeKernelKHR( - hCommandBuffer->CLCommandBuffer, nullptr, Properties, - cl_adapter::cast(hKernel), workDim, pGlobalWorkOffset, - pGlobalWorkSize, pLocalWorkSize, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint, OutCommandHandle)); + hCommandBuffer->CLCommandBuffer, nullptr, Properties, hKernel->CLKernel, + workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, + numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, + OutCommandHandle)); try { auto URCommandHandle = @@ -242,7 +250,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( (void)phEventWaitList; (void)phEvent; (void)phCommand; - cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_context CLContext = hCommandBuffer->hContext->CLContext; cl_ext::clCommandCopyBufferKHR_fn clCommandCopyBufferKHR = nullptr; UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( @@ -250,10 +258,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( cl_ext::CommandCopyBufferName, &clCommandCopyBufferKHR)); CL_RETURN_ON_FAILURE(clCommandCopyBufferKHR( - hCommandBuffer->CLCommandBuffer, nullptr, nullptr, - cl_adapter::cast(hSrcMem), cl_adapter::cast(hDstMem), - srcOffset, dstOffset, size, numSyncPointsInWaitList, pSyncPointWaitList, - pSyncPoint, nullptr)); + hCommandBuffer->CLCommandBuffer, nullptr, nullptr, hSrcMem->CLMemory, + hDstMem->CLMemory, srcOffset, dstOffset, size, numSyncPointsInWaitList, + pSyncPointWaitList, pSyncPoint, nullptr)); return UR_RESULT_SUCCESS; } @@ -280,7 +287,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( size_t OpenCLDstRect[3]{dstOrigin.x, dstOrigin.y, dstOrigin.z}; size_t OpenCLRegion[3]{region.width, region.height, region.depth}; - cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_context CLContext = hCommandBuffer->hContext->CLContext; cl_ext::clCommandCopyBufferRectKHR_fn clCommandCopyBufferRectKHR = nullptr; UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( @@ -288,11 +295,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( cl_ext::CommandCopyBufferRectName, &clCommandCopyBufferRectKHR)); CL_RETURN_ON_FAILURE(clCommandCopyBufferRectKHR( - hCommandBuffer->CLCommandBuffer, nullptr, nullptr, - cl_adapter::cast(hSrcMem), cl_adapter::cast(hDstMem), - OpenCLOriginRect, OpenCLDstRect, OpenCLRegion, srcRowPitch, srcSlicePitch, - dstRowPitch, dstSlicePitch, numSyncPointsInWaitList, pSyncPointWaitList, - pSyncPoint, nullptr)); + hCommandBuffer->CLCommandBuffer, nullptr, nullptr, hSrcMem->CLMemory, + hDstMem->CLMemory, OpenCLOriginRect, OpenCLDstRect, OpenCLRegion, + srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, + numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, nullptr)); return UR_RESULT_SUCCESS; } @@ -384,7 +390,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( [[maybe_unused]] ur_event_handle_t *phEvent, [[maybe_unused]] ur_exp_command_buffer_command_handle_t *phCommand) { - cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_context CLContext = hCommandBuffer->hContext->CLContext; cl_ext::clCommandFillBufferKHR_fn clCommandFillBufferKHR = nullptr; UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( @@ -392,9 +398,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( cl_ext::CommandFillBufferName, &clCommandFillBufferKHR)); CL_RETURN_ON_FAILURE(clCommandFillBufferKHR( - hCommandBuffer->CLCommandBuffer, nullptr, nullptr, - cl_adapter::cast(hBuffer), pPattern, patternSize, offset, size, - numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, nullptr)); + hCommandBuffer->CLCommandBuffer, nullptr, nullptr, hBuffer->CLMemory, + pPattern, patternSize, offset, size, numSyncPointsInWaitList, + pSyncPointWaitList, pSyncPoint, nullptr)); return UR_RESULT_SUCCESS; } @@ -450,7 +456,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_context CLContext = hCommandBuffer->hContext->CLContext; cl_ext::clEnqueueCommandBufferKHR_fn clEnqueueCommandBufferKHR = nullptr; UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( @@ -458,13 +464,26 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( cl_ext::EnqueueCommandBufferName, &clEnqueueCommandBufferKHR)); const uint32_t NumberOfQueues = 1; - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->CLEvent; + } + cl_command_queue CLQueue = hQueue->CLQueue; CL_RETURN_ON_FAILURE(clEnqueueCommandBufferKHR( - NumberOfQueues, cl_adapter::cast(&hQueue), - hCommandBuffer->CLCommandBuffer, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + NumberOfQueues, &CLQueue, hCommandBuffer->CLCommandBuffer, + numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } return UR_RESULT_SUCCESS; } @@ -517,11 +536,11 @@ void updateKernelArgs(std::vector &CLArgs, for (uint32_t i = 0; i < NumMemobjArgs; i++) { const ur_exp_command_buffer_update_memobj_arg_desc_t &URMemObjArg = ArgMemobjList[i]; + cl_mem arg_value = URMemObjArg.hNewMemObjArg->CLMemory; cl_mutable_dispatch_arg_khr CLArg{ URMemObjArg.argIndex, // arg_index sizeof(cl_mem), // arg_size - cl_adapter::cast( - &URMemObjArg.hNewMemObjArg) // arg_value + &arg_value // arg_value }; CLArgs.push_back(CLArg); @@ -552,7 +571,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( } ur_exp_command_buffer_handle_t hCommandBuffer = hCommand->hCommandBuffer; - cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_context CLContext = hCommandBuffer->hContext->CLContext; cl_ext::clUpdateMutableCommandsKHR_fn clUpdateMutableCommandsKHR = nullptr; UR_RETURN_ON_FAILURE( @@ -602,8 +621,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( updateNDRange(CLLocalWorkSize, LocalWorkSizePtr); } - cl_mutable_command_khr command = - cl_adapter::cast(hCommand->CLMutableCommand); + cl_mutable_command_khr command = hCommand->CLMutableCommand; cl_mutable_dispatch_config_khr dispatch_config = { command, static_cast(CLArgs.size()), // num_args diff --git a/source/adapters/opencl/common.hpp b/source/adapters/opencl/common.hpp index e21f78af6b..4e550a4bbc 100644 --- a/source/adapters/opencl/common.hpp +++ b/source/adapters/opencl/common.hpp @@ -158,20 +158,6 @@ extern thread_local char ErrorMessage[MaxMessageSize]; ur_result_t ErrorCode); [[noreturn]] void die(const char *Message); - -template To cast(From Value) { - - if constexpr (std::is_pointer_v) { - static_assert(std::is_pointer_v == std::is_pointer_v, - "Cast failed pointer check"); - return reinterpret_cast(Value); - } else { - static_assert(sizeof(From) == sizeof(To), "Cast failed size check"); - static_assert(std::is_signed_v == std::is_signed_v, - "Cast failed sign check"); - return static_cast(Value); - } -} } // namespace cl_adapter namespace cl_ext { diff --git a/source/adapters/opencl/context.cpp b/source/adapters/opencl/context.cpp index c2c38aa753..54d5e4a035 100644 --- a/source/adapters/opencl/context.cpp +++ b/source/adapters/opencl/context.cpp @@ -15,25 +15,50 @@ #include #include -ur_result_t cl_adapter::getDevicesFromContext( - ur_context_handle_t hContext, - std::unique_ptr> &DevicesInCtx) { - - cl_uint DeviceCount; - CL_RETURN_ON_FAILURE(clGetContextInfo(cl_adapter::cast(hContext), - CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), - &DeviceCount, nullptr)); +ur_result_t +ur_context_handle_t_::makeWithNative(native_type Ctx, uint32_t DevCount, + const ur_device_handle_t *phDevices, + ur_context_handle_t &Context) { + try { + uint32_t CLDeviceCount; + CL_RETURN_ON_FAILURE(clGetContextInfo(Ctx, CL_CONTEXT_NUM_DEVICES, + sizeof(CLDeviceCount), &CLDeviceCount, + nullptr)); + std::vector CLDevices(CLDeviceCount); + CL_RETURN_ON_FAILURE(clGetContextInfo( + Ctx, CL_CONTEXT_DEVICES, sizeof(CLDevices), CLDevices.data(), nullptr)); + std::vector URDevices; + if (DevCount) { + if (DevCount != CLDeviceCount) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + for (uint32_t i = 0; i < DevCount; i++) { + if (phDevices[i]->CLDevice != CLDevices[i]) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + URDevices.push_back(phDevices[i]); + } + } else { + DevCount = CLDeviceCount; + for (uint32_t i = 0; i < CLDeviceCount; i++) { + ur_device_handle_t UrDevice = nullptr; + ur_native_handle_t hNativeHandle = + reinterpret_cast(CLDevices[i]); + UR_RETURN_ON_FAILURE(urDeviceCreateWithNativeHandle( + hNativeHandle, nullptr, nullptr, &UrDevice)); + URDevices.push_back(UrDevice); + } + } - if (DeviceCount < 1) { - return UR_RESULT_ERROR_INVALID_CONTEXT; + auto URContext = + std::make_unique(Ctx, DevCount, URDevices.data()); + Context = URContext.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; } - DevicesInCtx = std::make_unique>(DeviceCount); - - CL_RETURN_ON_FAILURE(clGetContextInfo( - cl_adapter::cast(hContext), CL_CONTEXT_DEVICES, - DeviceCount * sizeof(cl_device_id), (*DevicesInCtx).data(), nullptr)); - return UR_RESULT_SUCCESS; } @@ -42,32 +67,26 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( const ur_context_properties_t *, ur_context_handle_t *phContext) { cl_int Ret; - *phContext = cl_adapter::cast( - clCreateContext(nullptr, cl_adapter::cast(DeviceCount), - cl_adapter::cast(phDevices), - nullptr, nullptr, cl_adapter::cast(&Ret))); - - return mapCLErrorToUR(Ret); -} + std::vector CLDevices(DeviceCount); + for (size_t i = 0; i < DeviceCount; i++) { + CLDevices[i] = phDevices[i]->CLDevice; + } -static cl_int mapURContextInfoToCL(ur_context_info_t URPropName) { - - cl_int CLPropName; - switch (URPropName) { - case UR_CONTEXT_INFO_NUM_DEVICES: - CLPropName = CL_CONTEXT_NUM_DEVICES; - break; - case UR_CONTEXT_INFO_DEVICES: - CLPropName = CL_CONTEXT_DEVICES; - break; - case UR_CONTEXT_INFO_REFERENCE_COUNT: - CLPropName = CL_CONTEXT_REFERENCE_COUNT; - break; - default: - CLPropName = -1; + try { + cl_context Ctx = clCreateContext(nullptr, static_cast(DeviceCount), + CLDevices.data(), nullptr, nullptr, + static_cast(&Ret)); + CL_RETURN_ON_FAILURE(Ret); + auto URContext = + std::make_unique(Ctx, DeviceCount, phDevices); + *phContext = URContext.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; } - return CLPropName; + return mapCLErrorToUR(Ret); } UR_APIEXPORT ur_result_t UR_APICALL @@ -75,7 +94,6 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); - const cl_int CLPropName = mapURContextInfoToCL(propName); switch (static_cast(propName)) { /* 2D USM memops are not supported. */ @@ -91,21 +109,14 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, * queries of each device separately and building the intersection set. */ return UR_RESULT_ERROR_INVALID_ARGUMENT; } - case UR_CONTEXT_INFO_NUM_DEVICES: - case UR_CONTEXT_INFO_DEVICES: + case UR_CONTEXT_INFO_NUM_DEVICES: { + return ReturnValue(hContext->DeviceCount); + } + case UR_CONTEXT_INFO_DEVICES: { + return ReturnValue(&hContext->Devices[0], hContext->DeviceCount); + } case UR_CONTEXT_INFO_REFERENCE_COUNT: { - size_t CheckPropSize = 0; - auto ClResult = - clGetContextInfo(cl_adapter::cast(hContext), CLPropName, - propSize, pPropValue, &CheckPropSize); - if (pPropValue && CheckPropSize != propSize) { - return UR_RESULT_ERROR_INVALID_SIZE; - } - CL_RETURN_ON_FAILURE(ClResult); - if (pPropSizeRet) { - *pPropSizeRet = CheckPropSize; - } - return UR_RESULT_SUCCESS; + return ReturnValue(hContext->getReferenceCount()); } default: return UR_RESULT_ERROR_INVALID_ENUMERATION; @@ -119,51 +130,47 @@ urContextRelease(ur_context_handle_t hContext) { // should drastically reduce the chances of the pathological case described // in the comments in common.hpp. static std::mutex contextReleaseMutex; - auto clContext = cl_adapter::cast(hContext); - - { - std::lock_guard lock(contextReleaseMutex); - size_t refCount = 0; - CL_RETURN_ON_FAILURE(clGetContextInfo(clContext, CL_CONTEXT_REFERENCE_COUNT, - sizeof(size_t), &refCount, nullptr)); - - // ExtFuncPtrCache is destroyed in an atexit() callback, so it doesn't - // necessarily outlive the adapter (or all the contexts). - if (refCount == 1 && cl_ext::ExtFuncPtrCache) { - cl_ext::ExtFuncPtrCache->clearCache(clContext); - } + auto clContext = hContext->CLContext; + + std::lock_guard lock(contextReleaseMutex); + size_t refCount = hContext->getReferenceCount(); + // ExtFuncPtrCache is destroyed in an atexit() callback, so it doesn't + // necessarily outlive the adapter (or all the contexts). + if (refCount == 1 && cl_ext::ExtFuncPtrCache) { + cl_ext::ExtFuncPtrCache->clearCache(clContext); } - CL_RETURN_ON_FAILURE( - clReleaseContext(cl_adapter::cast(hContext))); + if (hContext->decrementReferenceCount() == 0) { + delete hContext; + } return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urContextRetain(ur_context_handle_t hContext) { - - cl_int Ret = clRetainContext(cl_adapter::cast(hContext)); - return mapCLErrorToUR(Ret); + hContext->incrementReferenceCount(); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle( ur_context_handle_t hContext, ur_native_handle_t *phNativeContext) { - *phNativeContext = reinterpret_cast(hContext); + *phNativeContext = reinterpret_cast(hContext->CLContext); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle( - ur_native_handle_t hNativeContext, ur_adapter_handle_t, uint32_t, - const ur_device_handle_t *, + ur_native_handle_t hNativeContext, ur_adapter_handle_t, uint32_t numDevices, + const ur_device_handle_t *phDevices, const ur_context_native_properties_t *pProperties, ur_context_handle_t *phContext) { - *phContext = reinterpret_cast(hNativeContext); - if (!pProperties || !pProperties->isNativeHandleOwned) { - return urContextRetain(*phContext); - } + cl_context NativeHandle = reinterpret_cast(hNativeContext); + UR_RETURN_ON_FAILURE(ur_context_handle_t_::makeWithNative( + NativeHandle, numDevices, phDevices, *phContext)); + (*phContext)->IsNativeHandleOwned = + pProperties ? pProperties->isNativeHandleOwned : false; return UR_RESULT_SUCCESS; } @@ -221,7 +228,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextSetExtendedDeleter( C->execute(); }; CL_RETURN_ON_FAILURE(ur::cl::getAdapter()->clSetContextDestructorCallback( - cl_adapter::cast(hContext), ClCallback, Callback)); + hContext->CLContext, ClCallback, Callback)); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/context.hpp b/source/adapters/opencl/context.hpp index 5319f68b55..a71f6adc05 100644 --- a/source/adapters/opencl/context.hpp +++ b/source/adapters/opencl/context.hpp @@ -10,9 +10,43 @@ #pragma once #include "common.hpp" +#include "device.hpp" -namespace cl_adapter { -ur_result_t -getDevicesFromContext(ur_context_handle_t hContext, - std::unique_ptr> &DevicesInCtx); -} +#include + +struct ur_context_handle_t_ { + using native_type = cl_context; + native_type CLContext; + std::vector Devices; + uint32_t DeviceCount; + std::atomic RefCount = 0; + bool IsNativeHandleOwned = true; + + ur_context_handle_t_(native_type Ctx, uint32_t DevCount, + const ur_device_handle_t *phDevices) + : CLContext(Ctx), DeviceCount(DevCount) { + for (uint32_t i = 0; i < DeviceCount; i++) { + Devices.emplace_back(phDevices[i]); + urDeviceRetain(phDevices[i]); + } + RefCount = 1; + } + + uint32_t incrementReferenceCount() noexcept { return ++RefCount; } + + uint32_t decrementReferenceCount() noexcept { return --RefCount; } + + uint32_t getReferenceCount() const noexcept { return RefCount; } + + static ur_result_t makeWithNative(native_type Ctx, uint32_t DevCount, + const ur_device_handle_t *phDevices, + ur_context_handle_t &Context); + ~ur_context_handle_t_() { + for (uint32_t i = 0; i < DeviceCount; i++) { + urDeviceRelease(Devices[i]); + } + if (IsNativeHandleOwned) { + clReleaseContext(CLContext); + } + } +}; diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index e17211826f..80204690ca 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -13,68 +13,9 @@ #include #include -ur_result_t cl_adapter::getDeviceVersion(cl_device_id Dev, - oclv::OpenCLVersion &Version) { - - size_t DevVerSize = 0; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(Dev, CL_DEVICE_VERSION, 0, nullptr, &DevVerSize)); - - std::string DevVer(DevVerSize, '\0'); - CL_RETURN_ON_FAILURE(clGetDeviceInfo(Dev, CL_DEVICE_VERSION, DevVerSize, - DevVer.data(), nullptr)); - - Version = oclv::OpenCLVersion(DevVer); - if (!Version.isValid()) { - return UR_RESULT_ERROR_INVALID_DEVICE; - } - - return UR_RESULT_SUCCESS; -} - -static bool isIntelFPGAEmuDevice(cl_device_id Dev) { - size_t NameSize = 0; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(Dev, CL_DEVICE_NAME, 0, nullptr, &NameSize)); - std::string NameStr(NameSize, '\0'); - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(Dev, CL_DEVICE_NAME, NameSize, NameStr.data(), nullptr)); - - return NameStr.find("Intel(R) FPGA Emulation Device") != std::string::npos; -} - -ur_result_t cl_adapter::checkDeviceExtensions( - cl_device_id Dev, const std::vector &Exts, bool &Supported) { - size_t ExtSize = 0; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize)); - - std::string ExtStr(ExtSize, '\0'); - - CL_RETURN_ON_FAILURE(clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, ExtSize, - ExtStr.data(), nullptr)); - - Supported = true; - for (const std::string &Ext : Exts) { - if (!(Supported = (ExtStr.find(Ext) != std::string::npos))) { - // The Intel FPGA emulation device does actually support these, even if it - // doesn't report them. - if (isIntelFPGAEmuDevice(Dev) && - (Ext == "cl_intel_device_attribute_query" || - Ext == "cl_intel_required_subgroup_size")) { - Supported = true; - continue; - } - break; - } - } - - return UR_RESULT_SUCCESS; -} - UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform, ur_device_type_t DeviceType, - uint32_t NumEntries, + uint32_t, ur_device_handle_t *phDevices, uint32_t *pNumDevices) { @@ -95,26 +36,33 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform, Type = CL_DEVICE_TYPE_ACCELERATOR; break; case UR_DEVICE_TYPE_DEFAULT: - Type = UR_DEVICE_TYPE_DEFAULT; + Type = CL_DEVICE_TYPE_DEFAULT; break; default: return UR_RESULT_ERROR_INVALID_ENUMERATION; } - - cl_int Result = clGetDeviceIDs(cl_adapter::cast(hPlatform), - Type, cl_adapter::cast(NumEntries), - cl_adapter::cast(phDevices), - cl_adapter::cast(pNumDevices)); - - // Absorb the CL_DEVICE_NOT_FOUND and just return 0 in num_devices - if (Result == CL_DEVICE_NOT_FOUND) { - Result = CL_SUCCESS; + try { + uint32_t AllDevicesNum = hPlatform->Devices.size(); + uint32_t DeviceNumIter = 0; + for (uint32_t i = 0; i < AllDevicesNum; i++) { + cl_device_type DevTy = hPlatform->Devices[i]->Type; + if (DevTy == Type || Type == CL_DEVICE_TYPE_ALL) { + if (phDevices) { + phDevices[DeviceNumIter] = hPlatform->Devices[i].get(); + } + DeviceNumIter++; + } + } if (pNumDevices) { - *pNumDevices = 0; + *pNumDevices = DeviceNumIter; } - } - return mapCLErrorToUR(Result); + return UR_RESULT_SUCCESS; + } catch (ur_result_t Err) { + return Err; + } catch (...) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } } static ur_device_fp_capability_flags_t @@ -344,10 +292,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * to UR */ switch (static_cast(propName)) { case UR_DEVICE_INFO_TYPE: { - cl_device_type CLType; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, - sizeof(cl_device_type), &CLType, nullptr)); + cl_device_type CLType = hDevice->Type; /* TODO UR: If the device is an Accelerator (FPGA, VPU, etc.), there is not * enough information in the OpenCL runtime to know exactly which type it @@ -367,25 +312,23 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_DEVICE_ID: { bool Supported = false; - UR_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( - cl_adapter::cast(hDevice), {"cl_khr_pci_bus_info"}, - Supported)); + UR_RETURN_ON_FAILURE( + hDevice->checkDeviceExtensions({"cl_khr_pci_bus_info"}, Supported)); if (!Supported) { return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } cl_device_pci_bus_info_khr PciInfo = {}; - CL_RETURN_ON_FAILURE(clGetDeviceInfo( - cl_adapter::cast(hDevice), CL_DEVICE_PCI_BUS_INFO_KHR, - sizeof(PciInfo), &PciInfo, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, + CL_DEVICE_PCI_BUS_INFO_KHR, + sizeof(PciInfo), &PciInfo, nullptr)); return ReturnValue(PciInfo.pci_device); } case UR_DEVICE_INFO_BACKEND_RUNTIME_VERSION: { oclv::OpenCLVersion Version; - UR_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( - cl_adapter::cast(hDevice), Version)); + UR_RETURN_ON_FAILURE(hDevice->getDeviceVersion(Version)); const std::string Results = std::to_string(Version.getMajor()) + "." + std::to_string(Version.getMinor()); @@ -394,14 +337,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_SUPPORTED_PARTITIONS: { size_t CLSize; CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, 0, - nullptr, &CLSize)); + clGetDeviceInfo(hDevice->CLDevice, CLPropName, 0, nullptr, &CLSize)); const size_t NProperties = CLSize / sizeof(cl_device_partition_property); std::vector CLValue(NProperties); - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, - CLSize, CLValue.data(), nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, CLPropName, CLSize, + CLValue.data(), nullptr)); /* The OpenCL implementation returns a value of 0 if no properties are * supported. UR will return a size of 0 for now. @@ -423,8 +364,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, size_t CLSize; CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, 0, - nullptr, &CLSize)); + clGetDeviceInfo(hDevice->CLDevice, CLPropName, 0, nullptr, &CLSize)); const size_t NProperties = CLSize / sizeof(cl_device_partition_property); /* The OpenCL implementation returns either a size of 0 or a value of 0 if @@ -438,9 +378,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, auto CLValue = reinterpret_cast(alloca(CLSize)); - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, - CLSize, CLValue, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, CLPropName, CLSize, + CLValue, nullptr)); std::vector URValue(NProperties - 1); @@ -493,14 +432,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, /* Corresponding OpenCL query is only available starting with OpenCL 2.1 * and we have to emulate it on older OpenCL runtimes. */ oclv::OpenCLVersion DevVer; - UR_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( - cl_adapter::cast(hDevice), DevVer)); + UR_RETURN_ON_FAILURE(hDevice->getDeviceVersion(DevVer)); if (DevVer >= oclv::V2_1) { cl_uint CLValue; - CL_RETURN_ON_FAILURE(clGetDeviceInfo( - cl_adapter::cast(hDevice), CL_DEVICE_MAX_NUM_SUB_GROUPS, - sizeof(cl_uint), &CLValue, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, + CL_DEVICE_MAX_NUM_SUB_GROUPS, + sizeof(cl_uint), &CLValue, nullptr)); if (CLValue == 0u) { /* OpenCL returns 0 if sub-groups are not supported, but SYCL 2020 @@ -523,8 +461,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * UR type: ur_device_fp_capability_flags_t */ if (propName == UR_DEVICE_INFO_HALF_FP_CONFIG) { bool Supported; - UR_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( - cl_adapter::cast(hDevice), {"cl_khr_fp16"}, Supported)); + UR_RETURN_ON_FAILURE( + hDevice->checkDeviceExtensions({"cl_khr_fp16"}, Supported)); if (!Supported) { return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; @@ -532,9 +470,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } cl_device_fp_config CLValue; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, - sizeof(cl_device_fp_config), &CLValue, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, CLPropName, + sizeof(cl_device_fp_config), &CLValue, + nullptr)); return ReturnValue(mapCLDeviceFpConfigToUR(CLValue)); } @@ -543,8 +481,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, /* This query is missing before OpenCL 3.0. Check version and handle * appropriately */ oclv::OpenCLVersion DevVer; - UR_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( - cl_adapter::cast(hDevice), DevVer)); + UR_RETURN_ON_FAILURE(hDevice->getDeviceVersion(DevVer)); /* Minimum required capability to be returned. For OpenCL 1.2, this is all * that is required */ @@ -555,8 +492,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, /* For OpenCL >=3.0, the query should be implemented */ cl_device_atomic_capabilities CLCapabilities; CL_RETURN_ON_FAILURE(clGetDeviceInfo( - cl_adapter::cast(hDevice), - CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, + hDevice->CLDevice, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, sizeof(cl_device_atomic_capabilities), &CLCapabilities, nullptr)); /* Mask operation to only consider atomic_memory_order* capabilities */ @@ -603,14 +539,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP; oclv::OpenCLVersion DevVer; - UR_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( - cl_adapter::cast(hDevice), DevVer)); + UR_RETURN_ON_FAILURE(hDevice->getDeviceVersion(DevVer)); cl_device_atomic_capabilities CLCapabilities; if (DevVer >= oclv::V3_0) { CL_RETURN_ON_FAILURE(clGetDeviceInfo( - cl_adapter::cast(hDevice), - CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, + hDevice->CLDevice, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, sizeof(cl_device_atomic_capabilities), &CLCapabilities, nullptr)); assert((CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) && @@ -657,14 +591,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL; oclv::OpenCLVersion DevVer; - UR_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( - cl_adapter::cast(hDevice), DevVer)); + UR_RETURN_ON_FAILURE(hDevice->getDeviceVersion(DevVer)); cl_device_atomic_capabilities CLCapabilities; if (DevVer >= oclv::V3_0) { CL_RETURN_ON_FAILURE(clGetDeviceInfo( - cl_adapter::cast(hDevice), - CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, + hDevice->CLDevice, CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, sizeof(cl_device_atomic_capabilities), &CLCapabilities, nullptr)); assert((CLCapabilities & CL_DEVICE_ATOMIC_ORDER_RELAXED) && @@ -707,8 +639,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP; oclv::OpenCLVersion DevVer; - UR_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( - cl_adapter::cast(hDevice), DevVer)); + UR_RETURN_ON_FAILURE(hDevice->getDeviceVersion(DevVer)); auto convertCapabilities = [](cl_device_atomic_capabilities CLCapabilities) { @@ -732,8 +663,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, if (DevVer >= oclv::V3_0) { cl_device_atomic_capabilities CLCapabilities; CL_RETURN_ON_FAILURE(clGetDeviceInfo( - cl_adapter::cast(hDevice), - CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, + hDevice->CLDevice, CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, sizeof(cl_device_atomic_capabilities), &CLCapabilities, nullptr)); assert((CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) && "Violates minimum mandated guarantee"); @@ -752,7 +682,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, // not return an error if the query is unsuccessful as this is expected // of an OpenCL 1.2 driver. cl_device_atomic_capabilities CLCapabilities; - if (CL_SUCCESS == clGetDeviceInfo(cl_adapter::cast(hDevice), + if (CL_SUCCESS == clGetDeviceInfo(hDevice->CLDevice, CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, sizeof(cl_device_atomic_capabilities), &CLCapabilities, nullptr)) { @@ -775,8 +705,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_ATOMIC_64: { bool Supported = false; - UR_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( - cl_adapter::cast(hDevice), + UR_RETURN_ON_FAILURE(hDevice->checkDeviceExtensions( {"cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics"}, Supported)); @@ -785,16 +714,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_BUILD_ON_SUBDEVICE: { cl_device_type DevType = CL_DEVICE_TYPE_DEFAULT; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CL_DEVICE_TYPE, - sizeof(cl_device_type), &DevType, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, CL_DEVICE_TYPE, + sizeof(cl_device_type), &DevType, + nullptr)); return ReturnValue(DevType == CL_DEVICE_TYPE_GPU); } case UR_DEVICE_INFO_MEM_CHANNEL_SUPPORT: { bool Supported = false; - UR_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( - cl_adapter::cast(hDevice), + UR_RETURN_ON_FAILURE(hDevice->checkDeviceExtensions( {"cl_intel_mem_channel_property"}, Supported)); return ReturnValue(Supported); @@ -802,14 +730,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_ESIMD_SUPPORT: { bool Supported = false; cl_device_type DevType = CL_DEVICE_TYPE_DEFAULT; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CL_DEVICE_TYPE, - sizeof(cl_device_type), &DevType, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, CL_DEVICE_TYPE, + sizeof(cl_device_type), &DevType, + nullptr)); cl_uint VendorID = 0; - CL_RETURN_ON_FAILURE(clGetDeviceInfo( - cl_adapter::cast(hDevice), CL_DEVICE_VENDOR_ID, - sizeof(VendorID), &VendorID, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, CL_DEVICE_VENDOR_ID, + sizeof(VendorID), &VendorID, nullptr)); /* ESIMD is only supported by Intel GPUs. */ Supported = DevType == CL_DEVICE_TYPE_GPU && VendorID == 0x8086; @@ -827,15 +754,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_HOST_PIPE_READ_WRITE_SUPPORTED: { bool Supported = false; - UR_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( - cl_adapter::cast(hDevice), + UR_RETURN_ON_FAILURE(hDevice->checkDeviceExtensions( {"cl_intel_program_scope_host_pipe"}, Supported)); return ReturnValue(Supported); } case UR_DEVICE_INFO_GLOBAL_VARIABLE_SUPPORT: { bool Supported = false; - UR_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( - cl_adapter::cast(hDevice), + UR_RETURN_ON_FAILURE(hDevice->checkDeviceExtensions( {"cl_intel_global_variable_access"}, Supported)); return ReturnValue(Supported); } @@ -850,9 +775,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * UR type: ur_flags_t (uint32_t) */ cl_bitfield CLValue = 0; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, - sizeof(cl_bitfield), &CLValue, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo( + hDevice->CLDevice, CLPropName, sizeof(cl_bitfield), &CLValue, nullptr)); /* We can just static_cast the output because OpenCL and UR bitfields * map 1 to 1 for these properties. cl_bitfield is uint64_t and ur_flags_t @@ -867,14 +791,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, /* CL type: cl_bitfield / enum * UR type: ur_flags_t (uint32_t) */ bool Supported = false; - UR_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( - cl_adapter::cast(hDevice), + UR_RETURN_ON_FAILURE(hDevice->checkDeviceExtensions( {"cl_intel_unified_shared_memory"}, Supported)); if (Supported) { cl_bitfield CLValue = 0; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, - sizeof(cl_bitfield), &CLValue, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, CLPropName, + sizeof(cl_bitfield), &CLValue, + nullptr)); return ReturnValue(static_cast(CLValue)); } else { return ReturnValue(0); @@ -892,9 +815,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * UR type: ur_bool_t */ cl_bool CLValue; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, - sizeof(cl_bool), &CLValue, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, CLPropName, + sizeof(cl_bool), &CLValue, nullptr)); /* cl_bool is uint32_t and ur_bool_t is bool */ return ReturnValue(static_cast(CLValue)); @@ -904,15 +826,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * UR type: ur_bool_t */ oclv::OpenCLVersion DevVer; - CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( - cl_adapter::cast(hDevice), DevVer)); + CL_RETURN_ON_FAILURE(hDevice->getDeviceVersion(DevVer)); /* Independent forward progress query is only supported as of OpenCL 2.1 * if version is older we return a default false. */ if (DevVer >= oclv::V2_1) { cl_bool CLValue; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, - sizeof(cl_bool), &CLValue, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, CLPropName, + sizeof(cl_bool), &CLValue, nullptr)); /* cl_bool is uint32_t and ur_bool_t is bool */ return ReturnValue(static_cast(CLValue)); @@ -946,7 +866,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_MAX_SAMPLERS: case UR_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE: case UR_DEVICE_INFO_MAX_CONSTANT_ARGS: - case UR_DEVICE_INFO_REFERENCE_COUNT: case UR_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES: case UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE: case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE: @@ -964,8 +883,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_MAX_PARAMETER_SIZE: case UR_DEVICE_INFO_PROFILING_TIMER_RESOLUTION: case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: - case UR_DEVICE_INFO_PLATFORM: - case UR_DEVICE_INFO_PARENT_DEVICE: case UR_DEVICE_INFO_IL_VERSION: case UR_DEVICE_INFO_NAME: case UR_DEVICE_INFO_VENDOR: @@ -986,33 +903,29 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * | ur_device_handle_t | cl_device_id | 8 | */ - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, - propSize, pPropValue, pPropSizeRet)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, CLPropName, + propSize, pPropValue, pPropSizeRet)); return UR_RESULT_SUCCESS; } case UR_DEVICE_INFO_IP_VERSION: { - bool Supported; - UR_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( - cl_adapter::cast(hDevice), + bool Supported = false; + UR_RETURN_ON_FAILURE(hDevice->checkDeviceExtensions( {"cl_intel_device_attribute_query"}, Supported)); if (!Supported) { return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, - propSize, pPropValue, pPropSizeRet)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, CLPropName, + propSize, pPropValue, pPropSizeRet)); return UR_RESULT_SUCCESS; } case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: { bool isExtensionSupported; - if (cl_adapter::checkDeviceExtensions( - cl_adapter::cast(hDevice), - {"cl_intel_required_subgroup_size"}, - isExtensionSupported) != UR_RESULT_SUCCESS || + if (hDevice->checkDeviceExtensions({"cl_intel_required_subgroup_size"}, + isExtensionSupported) != + UR_RESULT_SUCCESS || !isExtensionSupported) { std::vector aThreadIsItsOwnSubGroup({1}); return ReturnValue(aThreadIsItsOwnSubGroup.data(), @@ -1021,18 +934,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, // Have to convert size_t to uint32_t size_t SubGroupSizesSize = 0; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, 0, - nullptr, &SubGroupSizesSize)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, CLPropName, 0, + nullptr, &SubGroupSizesSize)); std::vector SubGroupSizes(SubGroupSizesSize / sizeof(size_t)); - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, - SubGroupSizesSize, SubGroupSizes.data(), nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, CLPropName, + SubGroupSizesSize, + SubGroupSizes.data(), nullptr)); return ReturnValue.template operator()(SubGroupSizes.data(), SubGroupSizes.size()); } case UR_DEVICE_INFO_EXTENSIONS: { - cl_device_id Dev = cl_adapter::cast(hDevice); + cl_device_id Dev = hDevice->CLDevice; size_t ExtSize = 0; CL_RETURN_ON_FAILURE( clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize)); @@ -1051,23 +963,31 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_UUID: { // Use the cl_khr_device_uuid extension, if available. bool isKhrDeviceUuidSupported = false; - if (cl_adapter::checkDeviceExtensions( - cl_adapter::cast(hDevice), {"cl_khr_device_uuid"}, - isKhrDeviceUuidSupported) != UR_RESULT_SUCCESS || + if (hDevice->checkDeviceExtensions({"cl_khr_device_uuid"}, + isKhrDeviceUuidSupported) != + UR_RESULT_SUCCESS || !isKhrDeviceUuidSupported) { return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } static_assert(CL_UUID_SIZE_KHR == 16); std::array UUID{}; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), - CL_DEVICE_UUID_KHR, UUID.size(), UUID.data(), nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, CL_DEVICE_UUID_KHR, + UUID.size(), UUID.data(), nullptr)); return ReturnValue(UUID); } case UR_DEVICE_INFO_KERNEL_SET_SPECIALIZATION_CONSTANTS: { return ReturnValue(false); } + case UR_DEVICE_INFO_REFERENCE_COUNT: { + return ReturnValue(hDevice->getReferenceCount()); + } + case UR_DEVICE_INFO_PLATFORM: { + return ReturnValue(hDevice->Platform); + } + case UR_DEVICE_INFO_PARENT_DEVICE: { + return ReturnValue(hDevice->ParentDevice); + } case UR_DEVICE_INFO_USM_POOL_SUPPORT: { return ReturnValue(false); @@ -1097,7 +1017,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: { - cl_device_id Dev = cl_adapter::cast(hDevice); + cl_device_id Dev = hDevice->CLDevice; size_t ExtSize = 0; CL_RETURN_ON_FAILURE( clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize)); @@ -1111,7 +1031,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, std::string::npos); } case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: { - cl_device_id Dev = cl_adapter::cast(hDevice); + cl_device_id Dev = hDevice->CLDevice; ur_device_command_buffer_update_capability_flags_t UpdateCapabilities = 0; CL_RETURN_ON_FAILURE( getDeviceCommandBufferUpdateCapabilities(Dev, UpdateCapabilities)); @@ -1167,9 +1087,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( CLProperties[CLProperties.size() - 1] = 0; cl_uint CLNumDevicesRet; - CL_RETURN_ON_FAILURE( - clCreateSubDevices(cl_adapter::cast(hDevice), - CLProperties.data(), 0, nullptr, &CLNumDevicesRet)); + CL_RETURN_ON_FAILURE(clCreateSubDevices( + hDevice->CLDevice, CLProperties.data(), 0, nullptr, &CLNumDevicesRet)); if (pNumDevicesRet) { *pNumDevicesRet = CLNumDevicesRet; @@ -1179,63 +1098,103 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( * function shall only retrieve that number of sub-devices. */ if (phSubDevices) { std::vector CLSubDevices(CLNumDevicesRet); - CL_RETURN_ON_FAILURE(clCreateSubDevices( - cl_adapter::cast(hDevice), CLProperties.data(), - CLNumDevicesRet, CLSubDevices.data(), nullptr)); - - std::memcpy(phSubDevices, CLSubDevices.data(), - sizeof(cl_device_id) * NumDevices); + CL_RETURN_ON_FAILURE( + clCreateSubDevices(hDevice->CLDevice, CLProperties.data(), + CLNumDevicesRet, CLSubDevices.data(), nullptr)); + for (uint32_t i = 0; i < std::min(CLNumDevicesRet, NumDevices); i++) { + try { + auto URSubDevice = std::make_unique( + CLSubDevices[i], hDevice->Platform, hDevice); + phSubDevices[i] = URSubDevice.release(); + } catch (std::bad_alloc &) { + // Delete all the successfully created subdevices before the failed one. + for (uint32_t j = 0; j < i; j++) { + delete phSubDevices[j]; + } + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + // Delete all the successfully created subdevices before the failed one. + for (uint32_t j = 0; j < i; j++) { + delete phSubDevices[j]; + } + return UR_RESULT_ERROR_UNKNOWN; + } + } } return UR_RESULT_SUCCESS; } +// Root devices ref count are unchanged through out the program lifetime. UR_APIEXPORT ur_result_t UR_APICALL urDeviceRetain(ur_device_handle_t hDevice) { + if (hDevice->ParentDevice) { + hDevice->incrementReferenceCount(); + } - cl_int Result = clRetainDevice(cl_adapter::cast(hDevice)); - - return mapCLErrorToUR(Result); + return UR_RESULT_SUCCESS; } +// Root devices ref count are unchanged through out the program lifetime. UR_APIEXPORT ur_result_t UR_APICALL urDeviceRelease(ur_device_handle_t hDevice) { - - cl_int Result = clReleaseDevice(cl_adapter::cast(hDevice)); - - return mapCLErrorToUR(Result); + if (hDevice->ParentDevice) { + if (hDevice->decrementReferenceCount() == 0) { + delete hDevice; + } + } + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetNativeHandle( ur_device_handle_t hDevice, ur_native_handle_t *phNativeDevice) { - *phNativeDevice = reinterpret_cast(hDevice); + *phNativeDevice = reinterpret_cast(hDevice->CLDevice); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( ur_native_handle_t hNativeDevice, ur_adapter_handle_t, - const ur_device_native_properties_t *, ur_device_handle_t *phDevice) { - - *phDevice = reinterpret_cast(hNativeDevice); - return UR_RESULT_SUCCESS; + const ur_device_native_properties_t *pProperties, + ur_device_handle_t *phDevice) { + cl_device_id NativeHandle = reinterpret_cast(hNativeDevice); + + uint32_t NumPlatforms = 0; + UR_RETURN_ON_FAILURE(urPlatformGet(nullptr, 0, 0, nullptr, &NumPlatforms)); + std::vector Platforms(NumPlatforms); + UR_RETURN_ON_FAILURE( + urPlatformGet(nullptr, 0, NumPlatforms, Platforms.data(), nullptr)); + + for (uint32_t i = 0; i < NumPlatforms; i++) { + uint32_t NumDevices = 0; + UR_RETURN_ON_FAILURE( + urDeviceGet(Platforms[i], UR_DEVICE_TYPE_ALL, 0, nullptr, &NumDevices)); + std::vector Devices(NumDevices); + UR_RETURN_ON_FAILURE(urDeviceGet(Platforms[i], UR_DEVICE_TYPE_ALL, + NumDevices, Devices.data(), nullptr)); + + for (auto &Device : Devices) { + if (Device->CLDevice == NativeHandle) { + *phDevice = Device; + (*phDevice)->IsNativeHandleOwned = + pProperties ? pProperties->isNativeHandleOwned : false; + return UR_RESULT_SUCCESS; + } + } + } + return UR_RESULT_ERROR_INVALID_DEVICE; } UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetGlobalTimestamps( ur_device_handle_t hDevice, uint64_t *pDeviceTimestamp, uint64_t *pHostTimestamp) { oclv::OpenCLVersion DevVer, PlatVer; - cl_platform_id Platform; - cl_device_id DeviceId = cl_adapter::cast(hDevice); + cl_device_id DeviceId = hDevice->CLDevice; // TODO: Cache OpenCL version for each device and platform - auto RetErr = clGetDeviceInfo(DeviceId, CL_DEVICE_PLATFORM, - sizeof(cl_platform_id), &Platform, nullptr); - CL_RETURN_ON_FAILURE(RetErr); - - RetErr = cl_adapter::getDeviceVersion(DeviceId, DevVer); + auto RetErr = hDevice->getDeviceVersion(DevVer); CL_RETURN_ON_FAILURE(RetErr); - RetErr = cl_adapter::getPlatformVersion(Platform, PlatVer); + RetErr = hDevice->Platform->getPlatformVersion(PlatVer); if (PlatVer < oclv::V2_1 || DevVer < oclv::V2_1) { return UR_RESULT_ERROR_INVALID_OPERATION; @@ -1275,9 +1234,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceSelectBinary( // Get the type of the device cl_device_type DeviceType; constexpr uint32_t InvalidInd = std::numeric_limits::max(); - cl_int RetErr = - clGetDeviceInfo(cl_adapter::cast(hDevice), CL_DEVICE_TYPE, - sizeof(cl_device_type), &DeviceType, nullptr); + cl_int RetErr = clGetDeviceInfo(hDevice->CLDevice, CL_DEVICE_TYPE, + sizeof(cl_device_type), &DeviceType, nullptr); if (RetErr != CL_SUCCESS) { *pSelectedBinary = InvalidInd; CL_RETURN_ON_FAILURE(RetErr); diff --git a/source/adapters/opencl/device.hpp b/source/adapters/opencl/device.hpp index 548a5012f9..afd8a8b96d 100644 --- a/source/adapters/opencl/device.hpp +++ b/source/adapters/opencl/device.hpp @@ -10,11 +10,95 @@ #pragma once #include "common.hpp" +#include "platform.hpp" -namespace cl_adapter { -ur_result_t getDeviceVersion(cl_device_id Dev, oclv::OpenCLVersion &Version); +struct ur_device_handle_t_ { + using native_type = cl_device_id; + native_type CLDevice; + ur_platform_handle_t Platform; + cl_device_type Type = 0; + ur_device_handle_t ParentDevice = nullptr; + std::atomic RefCount = 0; + bool IsNativeHandleOwned = true; -ur_result_t checkDeviceExtensions(cl_device_id Dev, - const std::vector &Exts, - bool &Supported); -} // namespace cl_adapter + ur_device_handle_t_(native_type Dev, ur_platform_handle_t Plat, + ur_device_handle_t Parent) + : CLDevice(Dev), Platform(Plat), ParentDevice(Parent) { + RefCount = 1; + if (Parent) { + Type = Parent->Type; + } else { + clGetDeviceInfo(CLDevice, CL_DEVICE_TYPE, sizeof(cl_device_type), &Type, + nullptr); + } + } + + ~ur_device_handle_t_() { + if (ParentDevice && IsNativeHandleOwned) { + clReleaseDevice(CLDevice); + } + } + + uint32_t incrementReferenceCount() noexcept { return ++RefCount; } + + uint32_t decrementReferenceCount() noexcept { return --RefCount; } + + uint32_t getReferenceCount() const noexcept { return RefCount; } + + ur_result_t getDeviceVersion(oclv::OpenCLVersion &Version) { + size_t DevVerSize = 0; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(CLDevice, CL_DEVICE_VERSION, 0, nullptr, &DevVerSize)); + + std::string DevVer(DevVerSize, '\0'); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(CLDevice, CL_DEVICE_VERSION, + DevVerSize, DevVer.data(), nullptr)); + + Version = oclv::OpenCLVersion(DevVer); + if (!Version.isValid()) { + return UR_RESULT_ERROR_INVALID_DEVICE; + } + + return UR_RESULT_SUCCESS; + } + + bool isIntelFPGAEmuDevice() { + size_t NameSize = 0; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(CLDevice, CL_DEVICE_NAME, 0, nullptr, &NameSize)); + std::string NameStr(NameSize, '\0'); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(CLDevice, CL_DEVICE_NAME, NameSize, + NameStr.data(), nullptr)); + + return NameStr.find("Intel(R) FPGA Emulation Device") != std::string::npos; + } + + ur_result_t checkDeviceExtensions(const std::vector &Exts, + bool &Supported) { + size_t ExtSize = 0; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(CLDevice, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize)); + + std::string ExtStr(ExtSize, '\0'); + + CL_RETURN_ON_FAILURE(clGetDeviceInfo(CLDevice, CL_DEVICE_EXTENSIONS, + ExtSize, ExtStr.data(), nullptr)); + + Supported = true; + for (const std::string &Ext : Exts) { + if (!(Supported = (ExtStr.find(Ext) != std::string::npos))) { + // The Intel FPGA emulation device does actually support these, even if + // it doesn't report them. + if (isIntelFPGAEmuDevice() && + (Ext == "cl_intel_device_attribute_query" || + Ext == "cl_intel_required_subgroup_size")) { + Supported = true; + continue; + } + break; + } + } + + return UR_RESULT_SUCCESS; + } +}; diff --git a/source/adapters/opencl/enqueue.cpp b/source/adapters/opencl/enqueue.cpp index 6596a01317..b6effaee6e 100644 --- a/source/adapters/opencl/enqueue.cpp +++ b/source/adapters/opencl/enqueue.cpp @@ -9,6 +9,29 @@ //===----------------------------------------------------------------------===// #include "common.hpp" +#include "context.hpp" +#include "event.hpp" +#include "kernel.hpp" +#include "memory.hpp" +#include "program.hpp" +#include "queue.hpp" + +ur_result_t createUREvent(cl_event event, ur_context_handle_t context, + ur_queue_handle_t queue, + ur_event_handle_t *returnedEvent) { + if (returnedEvent) { + try { + auto UREvent = + std::make_unique(event, context, queue); + *returnedEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } + return UR_RESULT_SUCCESS; +} cl_map_flags convertURMapFlagsToCL(ur_map_flags_t URFlags) { cl_map_flags CLFlags = 0; @@ -25,6 +48,13 @@ cl_map_flags convertURMapFlagsToCL(ur_map_flags_t URFlags) { return CLFlags; } +void MapUREventsToCL(uint32_t numEvents, const ur_event_handle_t *UREvents, + std::vector &CLEvents) { + for (uint32_t i = 0; i < numEvents; i++) { + CLEvents[i] = UREvents[i]->CLEvent; + } +} + UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, @@ -34,15 +64,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( if (!pLocalWorkSize) { cl_device_id device = nullptr; CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( - cl_adapter::cast(hQueue), CL_QUEUE_DEVICE, - sizeof(device), &device, nullptr)); + hQueue->CLQueue, CL_QUEUE_DEVICE, sizeof(device), &device, nullptr)); // This query always returns size_t[3], if nothing was specified it returns // all zeroes. size_t queriedLocalWorkSize[3] = {0, 0, 0}; CL_RETURN_ON_FAILURE(clGetKernelWorkGroupInfo( - cl_adapter::cast(hKernel), device, - CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof(size_t[3]), - queriedLocalWorkSize, nullptr)); + hKernel->CLKernel, device, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, + sizeof(size_t[3]), queriedLocalWorkSize, nullptr)); if (queriedLocalWorkSize[0] != 0) { for (uint32_t i = 0; i < workDim; i++) { compiledLocalWorksize.push_back(queriedLocalWorkSize[i]); @@ -50,15 +78,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( } } + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + MapUREventsToCL(numEventsInWaitList, phEventWaitList, CLWaitEvents); CL_RETURN_ON_FAILURE(clEnqueueNDRangeKernel( - cl_adapter::cast(hQueue), - cl_adapter::cast(hKernel), workDim, pGlobalWorkOffset, + hQueue->CLQueue, hKernel->CLKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, compiledLocalWorksize.empty() ? pLocalWorkSize : compiledLocalWorksize.data(), - numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + numEventsInWaitList, CLWaitEvents.data(), &Event)); + UR_RETURN_ON_FAILURE(createUREvent(Event, hQueue->Context, hQueue, phEvent)); return UR_RESULT_SUCCESS; } @@ -75,24 +105,26 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + MapUREventsToCL(numEventsInWaitList, phEventWaitList, CLWaitEvents); CL_RETURN_ON_FAILURE(clEnqueueMarkerWithWaitList( - cl_adapter::cast(hQueue), numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + hQueue->CLQueue, numEventsInWaitList, CLWaitEvents.data(), &Event)); + UR_RETURN_ON_FAILURE(createUREvent(Event, hQueue->Context, hQueue, phEvent)); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + MapUREventsToCL(numEventsInWaitList, phEventWaitList, CLWaitEvents); CL_RETURN_ON_FAILURE(clEnqueueBarrierWithWaitList( - cl_adapter::cast(hQueue), numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + hQueue->CLQueue, numEventsInWaitList, CLWaitEvents.data(), &Event)); + UR_RETURN_ON_FAILURE(createUREvent(Event, hQueue->Context, hQueue, phEvent)); return UR_RESULT_SUCCESS; } @@ -100,13 +132,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, size_t offset, size_t size, void *pDst, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + MapUREventsToCL(numEventsInWaitList, phEventWaitList, CLWaitEvents); CL_RETURN_ON_FAILURE(clEnqueueReadBuffer( - cl_adapter::cast(hQueue), - cl_adapter::cast(hBuffer), blockingRead, offset, size, pDst, - numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + hQueue->CLQueue, hBuffer->CLMemory, blockingRead, offset, size, pDst, + numEventsInWaitList, CLWaitEvents.data(), &Event)); + UR_RETURN_ON_FAILURE(createUREvent(Event, hQueue->Context, hQueue, phEvent)); return UR_RESULT_SUCCESS; } @@ -114,13 +147,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, size_t offset, size_t size, const void *pSrc, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + MapUREventsToCL(numEventsInWaitList, phEventWaitList, CLWaitEvents); CL_RETURN_ON_FAILURE(clEnqueueWriteBuffer( - cl_adapter::cast(hQueue), - cl_adapter::cast(hBuffer), blockingWrite, offset, size, pSrc, - numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + hQueue->CLQueue, hBuffer->CLMemory, blockingWrite, offset, size, pSrc, + numEventsInWaitList, CLWaitEvents.data(), &Event)); + UR_RETURN_ON_FAILURE(createUREvent(Event, hQueue->Context, hQueue, phEvent)); return UR_RESULT_SUCCESS; } @@ -135,15 +169,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( bufferOrigin.z}; const size_t HostOrigin[3] = {hostOrigin.x, hostOrigin.y, hostOrigin.z}; const size_t Region[3] = {region.width, region.height, region.depth}; - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + MapUREventsToCL(numEventsInWaitList, phEventWaitList, CLWaitEvents); CL_RETURN_ON_FAILURE(clEnqueueReadBufferRect( - cl_adapter::cast(hQueue), - cl_adapter::cast(hBuffer), blockingRead, BufferOrigin, HostOrigin, - Region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, - pDst, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + hQueue->CLQueue, hBuffer->CLMemory, blockingRead, BufferOrigin, + HostOrigin, Region, bufferRowPitch, bufferSlicePitch, hostRowPitch, + hostSlicePitch, pDst, numEventsInWaitList, CLWaitEvents.data(), &Event)); + UR_RETURN_ON_FAILURE(createUREvent(Event, hQueue->Context, hQueue, phEvent)); return UR_RESULT_SUCCESS; } @@ -158,15 +192,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( bufferOrigin.z}; const size_t HostOrigin[3] = {hostOrigin.x, hostOrigin.y, hostOrigin.z}; const size_t Region[3] = {region.width, region.height, region.depth}; - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + MapUREventsToCL(numEventsInWaitList, phEventWaitList, CLWaitEvents); CL_RETURN_ON_FAILURE(clEnqueueWriteBufferRect( - cl_adapter::cast(hQueue), - cl_adapter::cast(hBuffer), blockingWrite, BufferOrigin, + hQueue->CLQueue, hBuffer->CLMemory, blockingWrite, BufferOrigin, HostOrigin, Region, bufferRowPitch, bufferSlicePitch, hostRowPitch, - hostSlicePitch, pSrc, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + hostSlicePitch, pSrc, numEventsInWaitList, CLWaitEvents.data(), &Event)); + UR_RETURN_ON_FAILURE(createUREvent(Event, hQueue->Context, hQueue, phEvent)); return UR_RESULT_SUCCESS; } @@ -175,14 +209,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( ur_mem_handle_t hBufferDst, size_t srcOffset, size_t dstOffset, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + MapUREventsToCL(numEventsInWaitList, phEventWaitList, CLWaitEvents); CL_RETURN_ON_FAILURE(clEnqueueCopyBuffer( - cl_adapter::cast(hQueue), - cl_adapter::cast(hBufferSrc), - cl_adapter::cast(hBufferDst), srcOffset, dstOffset, size, - numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + hQueue->CLQueue, hBufferSrc->CLMemory, hBufferDst->CLMemory, srcOffset, + dstOffset, size, numEventsInWaitList, CLWaitEvents.data(), &Event)); + UR_RETURN_ON_FAILURE(createUREvent(Event, hQueue->Context, hQueue, phEvent)); return UR_RESULT_SUCCESS; } @@ -196,15 +230,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( const size_t SrcOrigin[3] = {srcOrigin.x, srcOrigin.y, srcOrigin.z}; const size_t DstOrigin[3] = {dstOrigin.x, dstOrigin.y, dstOrigin.z}; const size_t Region[3] = {region.width, region.height, region.depth}; - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + MapUREventsToCL(numEventsInWaitList, phEventWaitList, CLWaitEvents); CL_RETURN_ON_FAILURE(clEnqueueCopyBufferRect( - cl_adapter::cast(hQueue), - cl_adapter::cast(hBufferSrc), - cl_adapter::cast(hBufferDst), SrcOrigin, DstOrigin, Region, - srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, - numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + hQueue->CLQueue, hBufferSrc->CLMemory, hBufferDst->CLMemory, SrcOrigin, + DstOrigin, Region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, + numEventsInWaitList, CLWaitEvents.data(), &Event)); + UR_RETURN_ON_FAILURE(createUREvent(Event, hQueue->Context, hQueue, phEvent)); return UR_RESULT_SUCCESS; } @@ -216,12 +250,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( // CL FillBuffer only allows pattern sizes up to the largest CL type: // long16/double16 if (patternSize <= 128) { - CL_RETURN_ON_FAILURE( - clEnqueueFillBuffer(cl_adapter::cast(hQueue), - cl_adapter::cast(hBuffer), pPattern, - patternSize, offset, size, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + MapUREventsToCL(numEventsInWaitList, phEventWaitList, CLWaitEvents); + CL_RETURN_ON_FAILURE(clEnqueueFillBuffer( + hQueue->CLQueue, hBuffer->CLMemory, pPattern, patternSize, offset, size, + numEventsInWaitList, CLWaitEvents.data(), &Event)); + + UR_RETURN_ON_FAILURE( + createUREvent(Event, hQueue->Context, hQueue, phEvent)); return UR_RESULT_SUCCESS; } @@ -233,11 +270,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( } cl_event WriteEvent = nullptr; + std::vector CLWaitEvents(numEventsInWaitList); + MapUREventsToCL(numEventsInWaitList, phEventWaitList, CLWaitEvents); auto ClErr = clEnqueueWriteBuffer( - cl_adapter::cast(hQueue), - cl_adapter::cast(hBuffer), false, offset, size, HostBuffer, - numEventsInWaitList, cl_adapter::cast(phEventWaitList), - &WriteEvent); + hQueue->CLQueue, hBuffer->CLMemory, false, offset, size, HostBuffer, + numEventsInWaitList, CLWaitEvents.data(), &WriteEvent); if (ClErr != CL_SUCCESS) { delete[] HostBuffer; CL_RETURN_ON_FAILURE(ClErr); @@ -258,7 +295,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( } if (phEvent) { - *phEvent = cl_adapter::cast(WriteEvent); + UR_RETURN_ON_FAILURE( + createUREvent(WriteEvent, hQueue->Context, hQueue, phEvent)); } else { CL_RETURN_ON_FAILURE(clReleaseEvent(WriteEvent)); } @@ -273,14 +311,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { const size_t Origin[3] = {origin.x, origin.y, origin.z}; const size_t Region[3] = {region.width, region.height, region.depth}; - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + MapUREventsToCL(numEventsInWaitList, phEventWaitList, CLWaitEvents); CL_RETURN_ON_FAILURE(clEnqueueReadImage( - cl_adapter::cast(hQueue), - cl_adapter::cast(hImage), blockingRead, Origin, Region, rowPitch, - slicePitch, pDst, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + hQueue->CLQueue, hImage->CLMemory, blockingRead, Origin, Region, rowPitch, + slicePitch, pDst, numEventsInWaitList, CLWaitEvents.data(), &Event)); + UR_RETURN_ON_FAILURE(createUREvent(Event, hQueue->Context, hQueue, phEvent)); return UR_RESULT_SUCCESS; } @@ -291,14 +329,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { const size_t Origin[3] = {origin.x, origin.y, origin.z}; const size_t Region[3] = {region.width, region.height, region.depth}; - - CL_RETURN_ON_FAILURE(clEnqueueWriteImage( - cl_adapter::cast(hQueue), - cl_adapter::cast(hImage), blockingWrite, Origin, Region, rowPitch, - slicePitch, pSrc, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + MapUREventsToCL(numEventsInWaitList, phEventWaitList, CLWaitEvents); + CL_RETURN_ON_FAILURE( + clEnqueueWriteImage(hQueue->CLQueue, hImage->CLMemory, blockingWrite, + Origin, Region, rowPitch, slicePitch, pSrc, + numEventsInWaitList, CLWaitEvents.data(), &Event)); + UR_RETURN_ON_FAILURE(createUREvent(Event, hQueue->Context, hQueue, phEvent)); return UR_RESULT_SUCCESS; } @@ -311,14 +349,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( const size_t SrcOrigin[3] = {srcOrigin.x, srcOrigin.y, srcOrigin.z}; const size_t DstOrigin[3] = {dstOrigin.x, dstOrigin.y, dstOrigin.z}; const size_t Region[3] = {region.width, region.height, region.depth}; - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + MapUREventsToCL(numEventsInWaitList, phEventWaitList, CLWaitEvents); CL_RETURN_ON_FAILURE(clEnqueueCopyImage( - cl_adapter::cast(hQueue), - cl_adapter::cast(hImageSrc), cl_adapter::cast(hImageDst), - SrcOrigin, DstOrigin, Region, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + hQueue->CLQueue, hImageSrc->CLMemory, hImageDst->CLMemory, SrcOrigin, + DstOrigin, Region, numEventsInWaitList, CLWaitEvents.data(), &Event)); + UR_RETURN_ON_FAILURE(createUREvent(Event, hQueue->Context, hQueue, phEvent)); return UR_RESULT_SUCCESS; } @@ -327,15 +364,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( ur_map_flags_t mapFlags, size_t offset, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, void **ppRetMap) { - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + MapUREventsToCL(numEventsInWaitList, phEventWaitList, CLWaitEvents); cl_int Err; - *ppRetMap = clEnqueueMapBuffer( - cl_adapter::cast(hQueue), - cl_adapter::cast(hBuffer), blockingMap, - convertURMapFlagsToCL(mapFlags), offset, size, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent), &Err); - + *ppRetMap = clEnqueueMapBuffer(hQueue->CLQueue, hBuffer->CLMemory, + blockingMap, convertURMapFlagsToCL(mapFlags), + offset, size, numEventsInWaitList, + CLWaitEvents.data(), &Event, &Err); + UR_RETURN_ON_FAILURE(createUREvent(Event, hQueue->Context, hQueue, phEvent)); return mapCLErrorToUR(Err); } @@ -343,13 +380,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( ur_queue_handle_t hQueue, ur_mem_handle_t hMem, void *pMappedPtr, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - - CL_RETURN_ON_FAILURE(clEnqueueUnmapMemObject( - cl_adapter::cast(hQueue), - cl_adapter::cast(hMem), pMappedPtr, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + MapUREventsToCL(numEventsInWaitList, phEventWaitList, CLWaitEvents); + CL_RETURN_ON_FAILURE(clEnqueueUnmapMemObject(hQueue->CLQueue, hMem->CLMemory, + pMappedPtr, numEventsInWaitList, + CLWaitEvents.data(), &Event)); + UR_RETURN_ON_FAILURE(createUREvent(Event, hQueue->Context, hQueue, phEvent)); return UR_RESULT_SUCCESS; } @@ -359,25 +396,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - cl_context Ctx = nullptr; - cl_int Res = - clGetCommandQueueInfo(cl_adapter::cast(hQueue), - CL_QUEUE_CONTEXT, sizeof(Ctx), &Ctx, nullptr); - - if (Res != CL_SUCCESS) - return mapCLErrorToUR(Res); - + cl_context Ctx = hQueue->Context->CLContext; + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + MapUREventsToCL(numEventsInWaitList, phEventWaitList, CLWaitEvents); cl_ext::clEnqueueWriteGlobalVariable_fn F = nullptr; UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( Ctx, cl_ext::ExtFuncPtrCache->clEnqueueWriteGlobalVariableCache, cl_ext::EnqueueWriteGlobalVariableName, &F)); - Res = F(cl_adapter::cast(hQueue), - cl_adapter::cast(hProgram), name, blockingWrite, count, - offset, pSrc, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent)); - + cl_int Res = + F(hQueue->CLQueue, hProgram->CLProgram, name, blockingWrite, count, + offset, pSrc, numEventsInWaitList, CLWaitEvents.data(), &Event); + UR_RETURN_ON_FAILURE(createUREvent(Event, hQueue->Context, hQueue, phEvent)); return mapCLErrorToUR(Res); } @@ -387,25 +418,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - cl_context Ctx = nullptr; - cl_int Res = - clGetCommandQueueInfo(cl_adapter::cast(hQueue), - CL_QUEUE_CONTEXT, sizeof(Ctx), &Ctx, nullptr); - - if (Res != CL_SUCCESS) - return mapCLErrorToUR(Res); - + cl_context Ctx = hQueue->Context->CLContext; + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + MapUREventsToCL(numEventsInWaitList, phEventWaitList, CLWaitEvents); cl_ext::clEnqueueReadGlobalVariable_fn F = nullptr; UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( Ctx, cl_ext::ExtFuncPtrCache->clEnqueueReadGlobalVariableCache, cl_ext::EnqueueReadGlobalVariableName, &F)); - Res = F(cl_adapter::cast(hQueue), - cl_adapter::cast(hProgram), name, blockingRead, count, - offset, pDst, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent)); + cl_int Res = + F(hQueue->CLQueue, hProgram->CLProgram, name, blockingRead, count, offset, + pDst, numEventsInWaitList, CLWaitEvents.data(), &Event); + UR_RETURN_ON_FAILURE(createUREvent(Event, hQueue->Context, hQueue, phEvent)); return mapCLErrorToUR(Res); } @@ -415,14 +441,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - cl_context CLContext; - cl_int CLErr = clGetCommandQueueInfo( - cl_adapter::cast(hQueue), CL_QUEUE_CONTEXT, - sizeof(cl_context), &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return mapCLErrorToUR(CLErr); - } - + cl_context CLContext = hQueue->Context->CLContext; + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + MapUREventsToCL(numEventsInWaitList, phEventWaitList, CLWaitEvents); cl_ext::clEnqueueReadHostPipeINTEL_fn FuncPtr = nullptr; UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( @@ -431,11 +453,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( if (FuncPtr) { CL_RETURN_ON_FAILURE( - FuncPtr(cl_adapter::cast(hQueue), - cl_adapter::cast(hProgram), pipe_symbol, blocking, - pDst, size, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + FuncPtr(hQueue->CLQueue, hProgram->CLProgram, pipe_symbol, blocking, + pDst, size, numEventsInWaitList, CLWaitEvents.data(), &Event)); + + UR_RETURN_ON_FAILURE( + createUREvent(Event, hQueue->Context, hQueue, phEvent)); } return UR_RESULT_SUCCESS; @@ -447,14 +469,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - cl_context CLContext; - cl_int CLErr = clGetCommandQueueInfo( - cl_adapter::cast(hQueue), CL_QUEUE_CONTEXT, - sizeof(cl_context), &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return mapCLErrorToUR(CLErr); - } - + cl_context CLContext = hQueue->Context->CLContext; + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + MapUREventsToCL(numEventsInWaitList, phEventWaitList, CLWaitEvents); cl_ext::clEnqueueWriteHostPipeINTEL_fn FuncPtr = nullptr; UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( @@ -463,11 +481,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( if (FuncPtr) { CL_RETURN_ON_FAILURE( - FuncPtr(cl_adapter::cast(hQueue), - cl_adapter::cast(hProgram), pipe_symbol, blocking, - pSrc, size, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + FuncPtr(hQueue->CLQueue, hProgram->CLProgram, pipe_symbol, blocking, + pSrc, size, numEventsInWaitList, CLWaitEvents.data(), &Event)); + UR_RETURN_ON_FAILURE( + createUREvent(Event, hQueue->Context, hQueue, phEvent)); } return UR_RESULT_SUCCESS; diff --git a/source/adapters/opencl/event.cpp b/source/adapters/opencl/event.cpp index 45550a68e8..5fef1803bc 100644 --- a/source/adapters/opencl/event.cpp +++ b/source/adapters/opencl/event.cpp @@ -8,6 +8,7 @@ // //===----------------------------------------------------------------------===// +#include "event.hpp" #include "common.hpp" #include @@ -110,39 +111,49 @@ ur_command_t convertCLCommandTypeToUR(const cl_command_type &CommandType) { } } -UR_APIEXPORT ur_result_t UR_APICALL -urEventCreateWithNativeHandle(ur_native_handle_t hNativeEvent, - [[maybe_unused]] ur_context_handle_t hContext, - const ur_event_native_properties_t *pProperties, - ur_event_handle_t *phEvent) { - *phEvent = reinterpret_cast(hNativeEvent); - if (!pProperties || !pProperties->isNativeHandleOwned) { - return urEventRetain(*phEvent); +UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( + ur_native_handle_t hNativeEvent, ur_context_handle_t hContext, + const ur_event_native_properties_t *pProperties, + ur_event_handle_t *phEvent) { + cl_event NativeHandle = reinterpret_cast(hNativeEvent); + try { + auto UREvent = + std::make_unique(NativeHandle, hContext, nullptr); + UREvent->IsNativeHandleOwned = + pProperties ? pProperties->isNativeHandleOwned : false; + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; } return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle( ur_event_handle_t hEvent, ur_native_handle_t *phNativeEvent) { - return getNativeHandle(hEvent, phNativeEvent); + return getNativeHandle(hEvent->CLEvent, phNativeEvent); } UR_APIEXPORT ur_result_t UR_APICALL urEventRelease(ur_event_handle_t hEvent) { - cl_int RetErr = clReleaseEvent(cl_adapter::cast(hEvent)); - CL_RETURN_ON_FAILURE(RetErr); + if (hEvent->decrementReferenceCount() == 0) { + delete hEvent; + } return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEventRetain(ur_event_handle_t hEvent) { - cl_int RetErr = clRetainEvent(cl_adapter::cast(hEvent)); - CL_RETURN_ON_FAILURE(RetErr); + hEvent->incrementReferenceCount(); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEventWait(uint32_t numEvents, const ur_event_handle_t *phEventWaitList) { - cl_int RetErr = clWaitForEvents( - numEvents, cl_adapter::cast(phEventWaitList)); + std::vector CLEvents(numEvents); + for (uint32_t i = 0; i < numEvents; i++) { + CLEvents[i] = phEventWaitList[i]->CLEvent; + } + cl_int RetErr = clWaitForEvents(numEvents, CLEvents.data()); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } @@ -153,32 +164,47 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent, void *pPropValue, size_t *pPropSizeRet) { cl_event_info CLEventInfo = convertUREventInfoToCL(propName); + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); - size_t CheckPropSize = 0; - cl_int RetErr = - clGetEventInfo(cl_adapter::cast(hEvent), CLEventInfo, propSize, - pPropValue, &CheckPropSize); - if (pPropValue && CheckPropSize != propSize) { - return UR_RESULT_ERROR_INVALID_SIZE; + switch (propName) { + case UR_EVENT_INFO_CONTEXT: { + return ReturnValue(hEvent->Context); } - CL_RETURN_ON_FAILURE(RetErr); - if (pPropSizeRet) { - *pPropSizeRet = CheckPropSize; + case UR_EVENT_INFO_COMMAND_QUEUE: { + return ReturnValue(hEvent->Queue); } + case UR_EVENT_INFO_REFERENCE_COUNT: { + return ReturnValue(hEvent->getReferenceCount()); + } + default: { + size_t CheckPropSize = 0; + cl_int RetErr = clGetEventInfo(hEvent->CLEvent, CLEventInfo, propSize, + pPropValue, &CheckPropSize); + if (pPropValue && CheckPropSize != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + CL_RETURN_ON_FAILURE(RetErr); + if (pPropSizeRet) { + *pPropSizeRet = CheckPropSize; + } - if (pPropValue) { - if (propName == UR_EVENT_INFO_COMMAND_TYPE) { - *reinterpret_cast(pPropValue) = convertCLCommandTypeToUR( - *reinterpret_cast(pPropValue)); - } else if (propName == UR_EVENT_INFO_COMMAND_EXECUTION_STATUS) { - const auto param_value_int = static_cast(pPropValue); - if (*param_value_int < 0) { - // This can contain a negative return code to signify that the command - // terminated in an unexpected way. - *param_value_int = UR_EVENT_STATUS_ERROR; + if (pPropValue) { + if (propName == UR_EVENT_INFO_COMMAND_TYPE) { + *reinterpret_cast(pPropValue) = + convertCLCommandTypeToUR( + *reinterpret_cast(pPropValue)); + } else if (propName == UR_EVENT_INFO_COMMAND_EXECUTION_STATUS) { + const auto param_value_int = + static_cast(pPropValue); + if (*param_value_int < 0) { + // This can contain a negative return code to signify that the command + // terminated in an unexpected way. + *param_value_int = UR_EVENT_STATUS_ERROR; + } } } } + } return UR_RESULT_SUCCESS; } @@ -187,9 +213,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( ur_event_handle_t hEvent, ur_profiling_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { cl_profiling_info CLProfilingInfo = convertURProfilingInfoToCL(propName); - cl_int RetErr = clGetEventProfilingInfo(cl_adapter::cast(hEvent), - CLProfilingInfo, propSize, pPropValue, - pPropSizeRet); + cl_int RetErr = clGetEventProfilingInfo(hEvent->CLEvent, CLProfilingInfo, + propSize, pPropValue, pPropSizeRet); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } @@ -254,8 +279,8 @@ urEventSetCallback(ur_event_handle_t hEvent, ur_execution_info_t execStatus, auto *C = static_cast(pUserData); C->execute(); }; - CL_RETURN_ON_FAILURE(clSetEventCallback(cl_adapter::cast(hEvent), - CallbackType, ClCallback, Callback)); + CL_RETURN_ON_FAILURE( + clSetEventCallback(hEvent->CLEvent, CallbackType, ClCallback, Callback)); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/event.hpp b/source/adapters/opencl/event.hpp new file mode 100644 index 0000000000..a323685818 --- /dev/null +++ b/source/adapters/opencl/event.hpp @@ -0,0 +1,49 @@ +//===--------- queue.hpp - OpenCL Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include "common.hpp" + +#include + +struct ur_event_handle_t_ { + using native_type = cl_event; + native_type CLEvent; + ur_context_handle_t Context; + ur_queue_handle_t Queue; + std::atomic RefCount = 0; + bool IsNativeHandleOwned = true; + + ur_event_handle_t_(native_type Event, ur_context_handle_t Ctx, + ur_queue_handle_t Queue) + : CLEvent(Event), Context(Ctx), Queue(Queue) { + RefCount = 1; + urContextRetain(Context); + if (Queue) { + urQueueRetain(Queue); + } + } + + ~ur_event_handle_t_() { + urContextRelease(Context); + if (Queue) { + urQueueRelease(Queue); + } + if (IsNativeHandleOwned) { + clReleaseEvent(CLEvent); + } + } + + uint32_t incrementReferenceCount() noexcept { return ++RefCount; } + + uint32_t decrementReferenceCount() noexcept { return --RefCount; } + + uint32_t getReferenceCount() const noexcept { return RefCount; } +}; diff --git a/source/adapters/opencl/kernel.cpp b/source/adapters/opencl/kernel.cpp index 617b6a9b2c..c56d356c1e 100644 --- a/source/adapters/opencl/kernel.cpp +++ b/source/adapters/opencl/kernel.cpp @@ -7,20 +7,75 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +#include "kernel.hpp" #include "common.hpp" +#include "device.hpp" +#include "memory.hpp" +#include "program.hpp" +#include "queue.hpp" +#include "sampler.hpp" #include #include #include +ur_result_t ur_kernel_handle_t_::makeWithNative(native_type NativeKernel, + ur_program_handle_t Program, + ur_context_handle_t Context, + ur_kernel_handle_t &Kernel) { + try { + cl_context CLContext; + CL_RETURN_ON_FAILURE(clGetKernelInfo(NativeKernel, CL_KERNEL_CONTEXT, + sizeof(CLContext), &CLContext, + nullptr)); + cl_program CLProgram; + CL_RETURN_ON_FAILURE(clGetKernelInfo(NativeKernel, CL_KERNEL_PROGRAM, + sizeof(CLProgram), &CLProgram, + nullptr)); + + if (Context->CLContext != CLContext) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + if (Program) { + if (Program->CLProgram != CLProgram) { + return UR_RESULT_ERROR_INVALID_PROGRAM; + } + } else { + ur_native_handle_t hNativeHandle = + reinterpret_cast(CLProgram); + UR_RETURN_ON_FAILURE(urProgramCreateWithNativeHandle( + hNativeHandle, Context, nullptr, &Program)); + } + + auto URKernel = + std::make_unique(NativeKernel, Program, Context); + Kernel = URKernel.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + + return UR_RESULT_SUCCESS; +} + UR_APIEXPORT ur_result_t UR_APICALL urKernelCreate(ur_program_handle_t hProgram, const char *pKernelName, ur_kernel_handle_t *phKernel) { + try { + cl_int CLResult; + cl_kernel Kernel = + clCreateKernel(hProgram->CLProgram, pKernelName, &CLResult); + CL_RETURN_ON_FAILURE(CLResult); + auto URKernel = std::make_unique(Kernel, hProgram, + hProgram->Context); + *phKernel = URKernel.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } - cl_int CLResult; - *phKernel = cl_adapter::cast(clCreateKernel( - cl_adapter::cast(hProgram), pKernelName, &CLResult)); - CL_RETURN_ON_FAILURE(CLResult); return UR_RESULT_SUCCESS; } @@ -28,9 +83,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue( ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize, const ur_kernel_arg_value_properties_t *, const void *pArgValue) { - CL_RETURN_ON_FAILURE(clSetKernelArg(cl_adapter::cast(hKernel), - cl_adapter::cast(argIndex), - argSize, pArgValue)); + CL_RETURN_ON_FAILURE(clSetKernelArg( + hKernel->CLKernel, static_cast(argIndex), argSize, pArgValue)); return UR_RESULT_SUCCESS; } @@ -39,9 +93,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgLocal(ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize, const ur_kernel_arg_local_properties_t *) { - CL_RETURN_ON_FAILURE(clSetKernelArg(cl_adapter::cast(hKernel), - cl_adapter::cast(argIndex), - argSize, nullptr)); + CL_RETURN_ON_FAILURE(clSetKernelArg( + hKernel->CLKernel, static_cast(argIndex), argSize, nullptr)); return UR_RESULT_SUCCESS; } @@ -73,20 +126,36 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { + + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + + switch (propName) { // OpenCL doesn't have a way to support this. - if (propName == UR_KERNEL_INFO_NUM_REGS) { + case UR_KERNEL_INFO_NUM_REGS: { return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } - size_t CheckPropSize = 0; - cl_int ClResult = clGetKernelInfo(cl_adapter::cast(hKernel), - mapURKernelInfoToCL(propName), propSize, - pPropValue, &CheckPropSize); - if (pPropValue && CheckPropSize != propSize) { - return UR_RESULT_ERROR_INVALID_SIZE; + case UR_KERNEL_INFO_PROGRAM: { + return ReturnValue(hKernel->Program); + } + case UR_KERNEL_INFO_CONTEXT: { + return ReturnValue(hKernel->Context); + } + case UR_KERNEL_INFO_REFERENCE_COUNT: { + return ReturnValue(hKernel->getReferenceCount()); + } + default: { + size_t CheckPropSize = 0; + cl_int ClResult = + clGetKernelInfo(hKernel->CLKernel, mapURKernelInfoToCL(propName), + propSize, pPropValue, &CheckPropSize); + if (pPropValue && CheckPropSize != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + CL_RETURN_ON_FAILURE(ClResult); + if (pPropSizeRet) { + *pPropSizeRet = CheckPropSize; + } } - CL_RETURN_ON_FAILURE(ClResult); - if (pPropSizeRet) { - *pPropSizeRet = CheckPropSize; } return UR_RESULT_SUCCESS; @@ -123,9 +192,9 @@ urKernelGetGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, // to deter naive use of the query. if (propName == UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE) { cl_device_type ClDeviceType; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CL_DEVICE_TYPE, - sizeof(ClDeviceType), &ClDeviceType, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, CL_DEVICE_TYPE, + sizeof(ClDeviceType), &ClDeviceType, + nullptr)); if (ClDeviceType != CL_DEVICE_TYPE_CUSTOM) { return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } @@ -135,9 +204,8 @@ urKernelGetGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } CL_RETURN_ON_FAILURE(clGetKernelWorkGroupInfo( - cl_adapter::cast(hKernel), - cl_adapter::cast(hDevice), - mapURKernelGroupInfoToCL(propName), propSize, pPropValue, pPropSizeRet)); + hKernel->CLKernel, hDevice->CLDevice, mapURKernelGroupInfoToCL(propName), + propSize, pPropValue, pPropSizeRet)); return UR_RESULT_SUCCESS; } @@ -189,8 +257,7 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, InputValueSize = MaxDims * sizeof(size_t); } - cl_int Ret = clGetKernelSubGroupInfo(cl_adapter::cast(hKernel), - cl_adapter::cast(hDevice), + cl_int Ret = clGetKernelSubGroupInfo(hKernel->CLKernel, hDevice->CLDevice, mapURKernelSubGroupInfoToCL(propName), InputValueSize, InputValue.get(), sizeof(size_t), &RetVal, pPropSizeRet); @@ -237,13 +304,15 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, } UR_APIEXPORT ur_result_t UR_APICALL urKernelRetain(ur_kernel_handle_t hKernel) { - CL_RETURN_ON_FAILURE(clRetainKernel(cl_adapter::cast(hKernel))); + hKernel->incrementReferenceCount(); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urKernelRelease(ur_kernel_handle_t hKernel) { - CL_RETURN_ON_FAILURE(clReleaseKernel(cl_adapter::cast(hKernel))); + if (hKernel->decrementReferenceCount() == 0) { + delete hKernel; + } return UR_RESULT_SUCCESS; } @@ -261,19 +330,18 @@ static ur_result_t usmSetIndirectAccess(ur_kernel_handle_t hKernel) { /* We test that each alloc type is supported before we actually try to set * KernelExecInfo. */ - CL_RETURN_ON_FAILURE(clGetKernelInfo(cl_adapter::cast(hKernel), - CL_KERNEL_CONTEXT, sizeof(cl_context), - &CLContext, nullptr)); + CL_RETURN_ON_FAILURE(clGetKernelInfo(hKernel->CLKernel, CL_KERNEL_CONTEXT, + sizeof(cl_context), &CLContext, + nullptr)); UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache, cl_ext::HostMemAllocName, &HFunc)); if (HFunc) { - CL_RETURN_ON_FAILURE( - clSetKernelExecInfo(cl_adapter::cast(hKernel), - CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, - sizeof(cl_bool), &TrueVal)); + CL_RETURN_ON_FAILURE(clSetKernelExecInfo( + hKernel->CLKernel, CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, + sizeof(cl_bool), &TrueVal)); } UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( @@ -281,10 +349,9 @@ static ur_result_t usmSetIndirectAccess(ur_kernel_handle_t hKernel) { cl_ext::DeviceMemAllocName, &DFunc)); if (DFunc) { - CL_RETURN_ON_FAILURE( - clSetKernelExecInfo(cl_adapter::cast(hKernel), - CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, - sizeof(cl_bool), &TrueVal)); + CL_RETURN_ON_FAILURE(clSetKernelExecInfo( + hKernel->CLKernel, CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, + sizeof(cl_bool), &TrueVal)); } UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( @@ -292,10 +359,9 @@ static ur_result_t usmSetIndirectAccess(ur_kernel_handle_t hKernel) { cl_ext::SharedMemAllocName, &SFunc)); if (SFunc) { - CL_RETURN_ON_FAILURE( - clSetKernelExecInfo(cl_adapter::cast(hKernel), - CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, - sizeof(cl_bool), &TrueVal)); + CL_RETURN_ON_FAILURE(clSetKernelExecInfo( + hKernel->CLKernel, CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, + sizeof(cl_bool), &TrueVal)); } return UR_RESULT_SUCCESS; } @@ -317,9 +383,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetExecInfo( return UR_RESULT_SUCCESS; } case UR_KERNEL_EXEC_INFO_USM_PTRS: { - CL_RETURN_ON_FAILURE(clSetKernelExecInfo( - cl_adapter::cast(hKernel), - CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL, propSize, pPropValue)); + CL_RETURN_ON_FAILURE(clSetKernelExecInfo(hKernel->CLKernel, + CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL, + propSize, pPropValue)); return UR_RESULT_SUCCESS; } default: { @@ -333,9 +399,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( const ur_kernel_arg_pointer_properties_t *, const void *pArgValue) { cl_context CLContext; - CL_RETURN_ON_FAILURE(clGetKernelInfo(cl_adapter::cast(hKernel), - CL_KERNEL_CONTEXT, sizeof(cl_context), - &CLContext, nullptr)); + CL_RETURN_ON_FAILURE(clGetKernelInfo(hKernel->CLKernel, CL_KERNEL_CONTEXT, + sizeof(cl_context), &CLContext, + nullptr)); clSetKernelArgMemPointerINTEL_fn FuncPtr = nullptr; UR_RETURN_ON_FAILURE( @@ -345,9 +411,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( cl_ext::SetKernelArgMemPointerName, &FuncPtr)); if (FuncPtr) { - CL_RETURN_ON_FAILURE(FuncPtr(cl_adapter::cast(hKernel), - cl_adapter::cast(argIndex), - pArgValue)); + CL_RETURN_ON_FAILURE( + FuncPtr(hKernel->CLKernel, static_cast(argIndex), pArgValue)); } return UR_RESULT_SUCCESS; @@ -355,7 +420,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle( ur_kernel_handle_t hKernel, ur_native_handle_t *phNativeKernel) { - *phNativeKernel = reinterpret_cast(hKernel); + *phNativeKernel = reinterpret_cast(hKernel->CLKernel); return UR_RESULT_SUCCESS; } @@ -368,13 +433,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp( } UR_APIEXPORT ur_result_t UR_APICALL urKernelCreateWithNativeHandle( - ur_native_handle_t hNativeKernel, ur_context_handle_t, ur_program_handle_t, - const ur_kernel_native_properties_t *pProperties, + ur_native_handle_t hNativeKernel, ur_context_handle_t hContext, + ur_program_handle_t hProgram, + [[maybe_unused]] const ur_kernel_native_properties_t *pProperties, ur_kernel_handle_t *phKernel) { - *phKernel = reinterpret_cast(hNativeKernel); - if (!pProperties || !pProperties->isNativeHandleOwned) { - return urKernelRetain(*phKernel); - } + cl_kernel NativeHandle = reinterpret_cast(hNativeKernel); + + UR_RETURN_ON_FAILURE(ur_kernel_handle_t_::makeWithNative( + NativeHandle, hProgram, hContext, *phKernel)); + + (*phKernel)->IsNativeHandleOwned = + pProperties ? pProperties->isNativeHandleOwned : false; return UR_RESULT_SUCCESS; } @@ -382,10 +451,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( ur_kernel_handle_t hKernel, uint32_t argIndex, const ur_kernel_arg_mem_obj_properties_t *, ur_mem_handle_t hArgValue) { - cl_int RetErr = clSetKernelArg( - cl_adapter::cast(hKernel), cl_adapter::cast(argIndex), - sizeof(hArgValue), cl_adapter::cast(&hArgValue)); - CL_RETURN_ON_FAILURE(RetErr); + cl_mem CLArgValue = hArgValue ? hArgValue->CLMemory : nullptr; + CL_RETURN_ON_FAILURE(clSetKernelArg(hKernel->CLKernel, + static_cast(argIndex), + sizeof(CLArgValue), &CLArgValue)); return UR_RESULT_SUCCESS; } @@ -393,9 +462,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgSampler( ur_kernel_handle_t hKernel, uint32_t argIndex, const ur_kernel_arg_sampler_properties_t *, ur_sampler_handle_t hArgValue) { - cl_int RetErr = clSetKernelArg( - cl_adapter::cast(hKernel), cl_adapter::cast(argIndex), - sizeof(hArgValue), cl_adapter::cast(&hArgValue)); + cl_sampler CLArgSampler = hArgValue->CLSampler; + cl_int RetErr = + clSetKernelArg(hKernel->CLKernel, static_cast(argIndex), + sizeof(CLArgSampler), &CLArgSampler); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } @@ -407,9 +477,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize( cl_device_id Device; cl_platform_id Platform; - CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( - cl_adapter::cast(hQueue), CL_QUEUE_DEVICE, - sizeof(cl_device_id), &Device, nullptr)); + CL_RETURN_ON_FAILURE(clGetCommandQueueInfo(hQueue->CLQueue, CL_QUEUE_DEVICE, + sizeof(cl_device_id), &Device, + nullptr)); CL_RETURN_ON_FAILURE(clGetDeviceInfo( Device, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &Platform, nullptr)); @@ -422,8 +492,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; CL_RETURN_ON_FAILURE(GetKernelSuggestedLocalWorkSizeFuncPtr( - cl_adapter::cast(hQueue), - cl_adapter::cast(hKernel), workDim, pGlobalWorkOffset, + hQueue->CLQueue, hKernel->CLKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pSuggestedLocalWorkSize)); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/kernel.hpp b/source/adapters/opencl/kernel.hpp new file mode 100644 index 0000000000..a1cb5c317e --- /dev/null +++ b/source/adapters/opencl/kernel.hpp @@ -0,0 +1,56 @@ +//===--------- kernel.hpp - OpenCL Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include "common.hpp" +#include "context.hpp" +#include "program.hpp" + +#include + +struct ur_kernel_handle_t_ { + using native_type = cl_kernel; + native_type CLKernel; + ur_program_handle_t Program; + ur_context_handle_t Context; + std::atomic RefCount = 0; + bool IsNativeHandleOwned = true; + + ur_kernel_handle_t_(native_type Kernel, ur_program_handle_t Program, + ur_context_handle_t Context) + : CLKernel(Kernel), Program(Program), Context(Context) { + RefCount = 1; + if (Program) { + urProgramRetain(Program); + } + urContextRetain(Context); + } + + ~ur_kernel_handle_t_() { + if (Program) { + urProgramRelease(Program); + } + urContextRelease(Context); + if (IsNativeHandleOwned) { + clReleaseKernel(CLKernel); + } + } + + uint32_t incrementReferenceCount() noexcept { return ++RefCount; } + + uint32_t decrementReferenceCount() noexcept { return --RefCount; } + + uint32_t getReferenceCount() const noexcept { return RefCount; } + + static ur_result_t makeWithNative(native_type NativeKernel, + ur_program_handle_t Program, + ur_context_handle_t Context, + ur_kernel_handle_t &Kernel); +}; diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp index 201df1f678..8a0d656379 100644 --- a/source/adapters/opencl/memory.cpp +++ b/source/adapters/opencl/memory.cpp @@ -8,7 +8,9 @@ // //===----------------------------------------------------------------------===// +#include "memory.hpp" #include "common.hpp" +#include "context.hpp" cl_image_format mapURImageFormatToCL(const ur_image_format_t *PImageFormat) { cl_image_format CLImageFormat; @@ -119,8 +121,7 @@ cl_image_format mapURImageFormatToCL(const ur_image_format_t *PImageFormat) { cl_image_desc mapURImageDescToCL(const ur_image_desc_t *PImageDesc) { cl_image_desc CLImageDesc; - CLImageDesc.image_type = - cl_adapter::cast(PImageDesc->type); + CLImageDesc.image_type = static_cast(PImageDesc->type); switch (PImageDesc->type) { case UR_MEM_TYPE_IMAGE2D: @@ -217,16 +218,40 @@ cl_map_flags convertURMemFlagsToCL(ur_mem_flags_t URFlags) { return CLFlags; } +ur_result_t ur_mem_handle_t_::makeWithNative(native_type NativeMem, + ur_context_handle_t Ctx, + ur_mem_handle_t &Mem) { + if (!Ctx) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + try { + cl_context CLContext; + CL_RETURN_ON_FAILURE(clGetMemObjectInfo( + NativeMem, CL_MEM_CONTEXT, sizeof(CLContext), &CLContext, nullptr)); + + if (Ctx->CLContext != CLContext) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + auto URMem = std::make_unique(NativeMem, Ctx); + Mem = URMem.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + + return UR_RESULT_SUCCESS; +} + UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( ur_context_handle_t hContext, ur_mem_flags_t flags, size_t size, const ur_buffer_properties_t *pProperties, ur_mem_handle_t *phBuffer) { - cl_int RetErr = CL_INVALID_OPERATION; if (pProperties) { // TODO: need to check if all properties are supported by OpenCL RT and // ignore unsupported clCreateBufferWithPropertiesINTEL_fn FuncPtr = nullptr; - cl_context CLContext = cl_adapter::cast(hContext); + cl_context CLContext = hContext->CLContext; // First we need to look up the function pointer RetErr = cl_ext::getExtFuncFromContext( @@ -257,18 +282,35 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( } PropertiesIntel.push_back(0); - *phBuffer = reinterpret_cast(FuncPtr( - CLContext, PropertiesIntel.data(), static_cast(flags), - size, pProperties->pHost, cl_adapter::cast(&RetErr))); + try { + cl_mem Buffer = FuncPtr( + CLContext, PropertiesIntel.data(), static_cast(flags), + size, pProperties->pHost, static_cast(&RetErr)); + CL_RETURN_ON_FAILURE(RetErr); + auto URMem = std::make_unique(Buffer, hContext); + *phBuffer = URMem.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } return mapCLErrorToUR(RetErr); } } void *HostPtr = pProperties ? pProperties->pHost : nullptr; - *phBuffer = reinterpret_cast(clCreateBuffer( - cl_adapter::cast(hContext), static_cast(flags), - size, HostPtr, cl_adapter::cast(&RetErr))); - CL_RETURN_ON_FAILURE(RetErr); + try { + cl_mem Buffer = + clCreateBuffer(hContext->CLContext, static_cast(flags), + size, HostPtr, static_cast(&RetErr)); + CL_RETURN_ON_FAILURE(RetErr); + auto URMem = std::make_unique(Buffer, hContext); + *phBuffer = URMem.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } return UR_RESULT_SUCCESS; } @@ -284,10 +326,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( cl_image_desc ImageDesc = mapURImageDescToCL(pImageDesc); cl_map_flags MapFlags = convertURMemFlagsToCL(flags); - *phMem = reinterpret_cast(clCreateImage( - cl_adapter::cast(hContext), MapFlags, &ImageFormat, - &ImageDesc, pHost, cl_adapter::cast(&RetErr))); - CL_RETURN_ON_FAILURE(RetErr); + try { + cl_mem Mem = + clCreateImage(hContext->CLContext, MapFlags, &ImageFormat, &ImageDesc, + pHost, static_cast(&RetErr)); + CL_RETURN_ON_FAILURE(RetErr); + auto URMem = std::make_unique(Mem, hContext); + *phMem = URMem.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } return UR_RESULT_SUCCESS; } @@ -311,48 +361,55 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( _cl_buffer_region BufferRegion; BufferRegion.origin = pRegion->origin; BufferRegion.size = pRegion->size; - - *phMem = reinterpret_cast(clCreateSubBuffer( - cl_adapter::cast(hBuffer), static_cast(flags), - BufferCreateType, &BufferRegion, cl_adapter::cast(&RetErr))); - - if (RetErr == CL_INVALID_VALUE) { - size_t BufferSize = 0; - CL_RETURN_ON_FAILURE(clGetMemObjectInfo(cl_adapter::cast(hBuffer), - CL_MEM_SIZE, sizeof(BufferSize), - &BufferSize, nullptr)); - if (BufferRegion.size + BufferRegion.origin > BufferSize) - return UR_RESULT_ERROR_INVALID_BUFFER_SIZE; + try { + cl_mem Buffer = clCreateSubBuffer( + hBuffer->CLMemory, static_cast(flags), BufferCreateType, + &BufferRegion, static_cast(&RetErr)); + if (RetErr == CL_INVALID_VALUE) { + size_t BufferSize = 0; + CL_RETURN_ON_FAILURE(clGetMemObjectInfo(hBuffer->CLMemory, CL_MEM_SIZE, + sizeof(BufferSize), &BufferSize, + nullptr)); + if (BufferRegion.size + BufferRegion.origin > BufferSize) + return UR_RESULT_ERROR_INVALID_BUFFER_SIZE; + } + CL_RETURN_ON_FAILURE(RetErr); + auto URMem = std::make_unique(Buffer, hBuffer->Context); + *phMem = URMem.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; } return mapCLErrorToUR(RetErr); } UR_APIEXPORT ur_result_t UR_APICALL urMemGetNativeHandle( ur_mem_handle_t hMem, ur_device_handle_t, ur_native_handle_t *phNativeMem) { - return getNativeHandle(hMem, phNativeMem); + return getNativeHandle(hMem->CLMemory, phNativeMem); } UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( - ur_native_handle_t hNativeMem, - [[maybe_unused]] ur_context_handle_t hContext, + ur_native_handle_t hNativeMem, ur_context_handle_t hContext, const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem) { - *phMem = reinterpret_cast(hNativeMem); - if (!pProperties || !pProperties->isNativeHandleOwned) { - return urMemRetain(*phMem); - } + cl_mem NativeHandle = reinterpret_cast(hNativeMem); + UR_RETURN_ON_FAILURE( + ur_mem_handle_t_::makeWithNative(NativeHandle, hContext, *phMem)); + (*phMem)->IsNativeHandleOwned = + pProperties ? pProperties->isNativeHandleOwned : false; return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( - ur_native_handle_t hNativeMem, - [[maybe_unused]] ur_context_handle_t hContext, + ur_native_handle_t hNativeMem, ur_context_handle_t hContext, [[maybe_unused]] const ur_image_format_t *pImageFormat, [[maybe_unused]] const ur_image_desc_t *pImageDesc, const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem) { - *phMem = reinterpret_cast(hNativeMem); - if (!pProperties || !pProperties->isNativeHandleOwned) { - return urMemRetain(*phMem); - } + cl_mem NativeHandle = reinterpret_cast(hNativeMem); + UR_RETURN_ON_FAILURE( + ur_mem_handle_t_::makeWithNative(NativeHandle, hContext, *phMem)); + (*phMem)->IsNativeHandleOwned = + pProperties ? pProperties->isNativeHandleOwned : false; return UR_RESULT_SUCCESS; } @@ -365,17 +422,27 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); const cl_int CLPropName = mapURMemInfoToCL(propName); - size_t CheckPropSize = 0; - auto ClResult = - clGetMemObjectInfo(cl_adapter::cast(hMemory), CLPropName, - propSize, pPropValue, &CheckPropSize); - if (pPropValue && CheckPropSize != propSize) { - return UR_RESULT_ERROR_INVALID_SIZE; + switch (static_cast(propName)) { + case UR_MEM_INFO_CONTEXT: { + return ReturnValue(hMemory->Context); } - CL_RETURN_ON_FAILURE(ClResult); - if (pPropSizeRet) { - *pPropSizeRet = CheckPropSize; + case UR_MEM_INFO_REFERENCE_COUNT: { + return ReturnValue(hMemory->getReferenceCount()); + } + default: { + size_t CheckPropSize = 0; + auto ClResult = clGetMemObjectInfo(hMemory->CLMemory, CLPropName, propSize, + pPropValue, &CheckPropSize); + if (pPropValue && CheckPropSize != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + CL_RETURN_ON_FAILURE(ClResult); + if (pPropSizeRet) { + *pPropSizeRet = CheckPropSize; + } } + } + return UR_RESULT_SUCCESS; } @@ -389,8 +456,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory, const cl_int CLPropName = mapURMemImageInfoToCL(propName); size_t CheckPropSize = 0; - auto ClResult = clGetImageInfo(cl_adapter::cast(hMemory), CLPropName, - propSize, pPropValue, &CheckPropSize); + auto ClResult = clGetImageInfo(hMemory->CLMemory, CLPropName, propSize, + pPropValue, &CheckPropSize); if (pPropValue && CheckPropSize != propSize) { return UR_RESULT_ERROR_INVALID_SIZE; } @@ -402,11 +469,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory, } UR_APIEXPORT ur_result_t UR_APICALL urMemRetain(ur_mem_handle_t hMem) { - CL_RETURN_ON_FAILURE(clRetainMemObject(cl_adapter::cast(hMem))); + hMem->incrementReferenceCount(); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urMemRelease(ur_mem_handle_t hMem) { - CL_RETURN_ON_FAILURE(clReleaseMemObject(cl_adapter::cast(hMem))); + if (hMem->decrementReferenceCount() == 0) { + delete hMem; + } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/memory.hpp b/source/adapters/opencl/memory.hpp new file mode 100644 index 0000000000..1aa1b16d4e --- /dev/null +++ b/source/adapters/opencl/memory.hpp @@ -0,0 +1,46 @@ +//===--------- memory.hpp - OpenCL Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include "common.hpp" +#include "context.hpp" + +#include + +struct ur_mem_handle_t_ { + using native_type = cl_mem; + native_type CLMemory; + ur_context_handle_t Context; + std::atomic RefCount = 0; + bool IsNativeHandleOwned = true; + + ur_mem_handle_t_(native_type Mem, ur_context_handle_t Ctx) + : CLMemory(Mem), Context(Ctx) { + RefCount = 1; + urContextRetain(Context); + } + + ~ur_mem_handle_t_() { + urContextRelease(Context); + if (IsNativeHandleOwned) { + clReleaseMemObject(CLMemory); + } + } + + uint32_t incrementReferenceCount() noexcept { return ++RefCount; } + + uint32_t decrementReferenceCount() noexcept { return --RefCount; } + + uint32_t getReferenceCount() const noexcept { return RefCount; } + + static ur_result_t makeWithNative(native_type NativeMem, + ur_context_handle_t Ctx, + ur_mem_handle_t &Mem); +}; diff --git a/source/adapters/opencl/platform.cpp b/source/adapters/opencl/platform.cpp index 218a5e7f00..341830b1c2 100644 --- a/source/adapters/opencl/platform.cpp +++ b/source/adapters/opencl/platform.cpp @@ -9,25 +9,7 @@ //===----------------------------------------------------------------------===// #include "platform.hpp" - -ur_result_t cl_adapter::getPlatformVersion(cl_platform_id Plat, - oclv::OpenCLVersion &Version) { - - size_t PlatVerSize = 0; - CL_RETURN_ON_FAILURE( - clGetPlatformInfo(Plat, CL_PLATFORM_VERSION, 0, nullptr, &PlatVerSize)); - - std::string PlatVer(PlatVerSize, '\0'); - CL_RETURN_ON_FAILURE(clGetPlatformInfo(Plat, CL_PLATFORM_VERSION, PlatVerSize, - PlatVer.data(), nullptr)); - - Version = oclv::OpenCLVersion(PlatVer); - if (!Version.isValid()) { - return UR_RESULT_ERROR_INVALID_PLATFORM; - } - - return UR_RESULT_SUCCESS; -} +#include "adapter.hpp" static cl_int mapURPlatformInfoToCL(ur_platform_info_t URPropName) { @@ -62,9 +44,11 @@ urPlatformGetInfo(ur_platform_handle_t hPlatform, ur_platform_info_t propName, case UR_PLATFORM_INFO_VERSION: case UR_PLATFORM_INFO_EXTENSIONS: case UR_PLATFORM_INFO_PROFILE: { + cl_platform_id Plat = hPlatform->CLPlatform; + CL_RETURN_ON_FAILURE( - clGetPlatformInfo(cl_adapter::cast(hPlatform), - CLPropName, propSize, pPropValue, pSizeRet)); + clGetPlatformInfo(Plat, CLPropName, propSize, pPropValue, pSizeRet)); + return UR_RESULT_SUCCESS; } default: @@ -83,38 +67,83 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, ur_platform_handle_t *phPlatforms, uint32_t *pNumPlatforms) { - cl_int Result = - clGetPlatformIDs(cl_adapter::cast(NumEntries), - cl_adapter::cast(phPlatforms), - cl_adapter::cast(pNumPlatforms)); - - /* Absorb the CL_PLATFORM_NOT_FOUND_KHR and just return 0 in num_platforms */ - if (Result == CL_PLATFORM_NOT_FOUND_KHR) { - Result = CL_SUCCESS; - if (pNumPlatforms) { - *pNumPlatforms = 0; + ur_adapter_handle_t Adapter = nullptr; + UR_RETURN_ON_FAILURE(urAdapterGet(1, &Adapter, nullptr)); + if (Adapter && !(Adapter->NumPlatforms)) { + uint32_t NumPlatforms = 0; + cl_int Res = clGetPlatformIDs(0, nullptr, &NumPlatforms); + + std::vector CLPlatforms(NumPlatforms); + Res = clGetPlatformIDs(static_cast(NumPlatforms), + CLPlatforms.data(), nullptr); + + /* Absorb the CL_PLATFORM_NOT_FOUND_KHR and just return 0 in num_platforms + */ + if (Res == CL_PLATFORM_NOT_FOUND_KHR) { + if (pNumPlatforms) { + *pNumPlatforms = 0; + return UR_RESULT_SUCCESS; + } + } + /* INVALID_VALUE is returned when the size is invalid, special case it here + */ + if (Res == CL_INVALID_VALUE && phPlatforms != nullptr && NumEntries == 0) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + CL_RETURN_ON_FAILURE(Res); + try { + for (uint32_t i = 0; i < NumPlatforms; i++) { + auto URPlatform = + std::make_unique(CLPlatforms[i]); + UR_RETURN_ON_FAILURE(URPlatform->InitDevices()); + Adapter->URPlatforms.emplace_back(URPlatform.release()); + } + Adapter->NumPlatforms = NumPlatforms; + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_INVALID_PLATFORM; } } - /* INVALID_VALUE is returned when the size is invalid, special case it here */ - if (Result == CL_INVALID_VALUE && phPlatforms != nullptr && NumEntries == 0) { - return UR_RESULT_ERROR_INVALID_SIZE; + if (pNumPlatforms != nullptr) { + *pNumPlatforms = Adapter->NumPlatforms; + } + if (NumEntries && phPlatforms) { + for (uint32_t i = 0; i < NumEntries; i++) { + phPlatforms[i] = Adapter->URPlatforms[i].get(); + } } - return mapCLErrorToUR(Result); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetNativeHandle( ur_platform_handle_t hPlatform, ur_native_handle_t *phNativePlatform) { - *phNativePlatform = reinterpret_cast(hPlatform); + *phNativePlatform = + reinterpret_cast(hPlatform->CLPlatform); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( ur_native_handle_t hNativePlatform, ur_adapter_handle_t, const ur_platform_native_properties_t *, ur_platform_handle_t *phPlatform) { - *phPlatform = reinterpret_cast(hNativePlatform); - return UR_RESULT_SUCCESS; + cl_platform_id NativeHandle = + reinterpret_cast(hNativePlatform); + + uint32_t NumPlatforms = 0; + UR_RETURN_ON_FAILURE(urPlatformGet(nullptr, 0, 0, nullptr, &NumPlatforms)); + std::vector Platforms(NumPlatforms); + UR_RETURN_ON_FAILURE( + urPlatformGet(nullptr, 0, NumPlatforms, Platforms.data(), nullptr)); + + for (uint32_t i = 0; i < NumPlatforms; i++) { + if (Platforms[i]->CLPlatform == NativeHandle) { + *phPlatform = Platforms[i]; + return UR_RESULT_SUCCESS; + } + } + return UR_RESULT_ERROR_INVALID_PLATFORM; } // Returns plugin specific backend option. diff --git a/source/adapters/opencl/platform.hpp b/source/adapters/opencl/platform.hpp index 31fd69de14..85699ded95 100644 --- a/source/adapters/opencl/platform.hpp +++ b/source/adapters/opencl/platform.hpp @@ -10,8 +10,79 @@ #pragma once #include "common.hpp" +#include "device.hpp" -namespace cl_adapter { -ur_result_t getPlatformVersion(cl_platform_id Plat, - oclv::OpenCLVersion &Version); -} +#include + +struct ur_platform_handle_t_ { + using native_type = cl_platform_id; + native_type CLPlatform = nullptr; + std::vector> Devices; + + ur_platform_handle_t_(native_type Plat) : CLPlatform(Plat) {} + + ~ur_platform_handle_t_() { + for (auto &Dev : Devices) { + Dev.reset(); + } + Devices.clear(); + } + + template + ur_result_t getExtFunc(T CachedExtFunc, const char *FuncName, T *Fptr) { + if (!CachedExtFunc) { + // TODO: check that the function is available + CachedExtFunc = reinterpret_cast( + clGetExtensionFunctionAddressForPlatform(CLPlatform, FuncName)); + if (!CachedExtFunc) { + return UR_RESULT_ERROR_INVALID_VALUE; + } + } + *Fptr = CachedExtFunc; + return UR_RESULT_SUCCESS; + } + + ur_result_t InitDevices() { + if (Devices.empty()) { + cl_uint DeviceNum = 0; + CL_RETURN_ON_FAILURE(clGetDeviceIDs(CLPlatform, CL_DEVICE_TYPE_ALL, 0, + nullptr, &DeviceNum)); + + std::vector CLDevices(DeviceNum); + CL_RETURN_ON_FAILURE(clGetDeviceIDs(CLPlatform, CL_DEVICE_TYPE_ALL, + DeviceNum, CLDevices.data(), + nullptr)); + + try { + Devices.resize(DeviceNum); + for (size_t i = 0; i < DeviceNum; i++) { + Devices[i] = std::make_unique(CLDevices[i], this, + nullptr); + } + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } + + return UR_RESULT_SUCCESS; + } + + ur_result_t getPlatformVersion(oclv::OpenCLVersion &Version) { + size_t PlatVerSize = 0; + CL_RETURN_ON_FAILURE(clGetPlatformInfo(CLPlatform, CL_PLATFORM_VERSION, 0, + nullptr, &PlatVerSize)); + + std::string PlatVer(PlatVerSize, '\0'); + CL_RETURN_ON_FAILURE(clGetPlatformInfo( + CLPlatform, CL_PLATFORM_VERSION, PlatVerSize, PlatVer.data(), nullptr)); + + Version = oclv::OpenCLVersion(PlatVer); + if (!Version.isValid()) { + return UR_RESULT_ERROR_INVALID_PLATFORM; + } + + return UR_RESULT_SUCCESS; + } +}; diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index 20aaa8fd3a..6c9b51e1a0 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -8,30 +8,37 @@ // //===----------------------------------------------------------------------===// +#include "program.hpp" #include "adapter.hpp" #include "common.hpp" #include "context.hpp" #include "device.hpp" #include "platform.hpp" -static ur_result_t getDevicesFromProgram( - ur_program_handle_t hProgram, - std::unique_ptr> &DevicesInProgram) { +#include - cl_uint DeviceCount; - CL_RETURN_ON_FAILURE(clGetProgramInfo(cl_adapter::cast(hProgram), - CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), - &DeviceCount, nullptr)); - - if (DeviceCount < 1) { - return UR_RESULT_ERROR_INVALID_CONTEXT; +ur_result_t ur_program_handle_t_::makeWithNative(native_type NativeProg, + ur_context_handle_t Context, + ur_program_handle_t &Program) { + if (!Context) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + try { + cl_context CLContext; + CL_RETURN_ON_FAILURE(clGetProgramInfo(NativeProg, CL_PROGRAM_CONTEXT, + sizeof(CLContext), &CLContext, + nullptr)); + if (Context->CLContext != CLContext) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + auto URProgram = std::make_unique( + NativeProg, Context, Context->DeviceCount, Context->Devices.data()); + Program = URProgram.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; } - - DevicesInProgram = std::make_unique>(DeviceCount); - - CL_RETURN_ON_FAILURE(clGetProgramInfo( - cl_adapter::cast(hProgram), CL_PROGRAM_DEVICES, - DeviceCount * sizeof(cl_device_id), (*DevicesInProgram).data(), nullptr)); return UR_RESULT_SUCCESS; } @@ -40,29 +47,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( ur_context_handle_t hContext, const void *pIL, size_t length, const ur_program_properties_t *, ur_program_handle_t *phProgram) { - std::unique_ptr> DevicesInCtx; - CL_RETURN_ON_FAILURE_AND_SET_NULL( - cl_adapter::getDevicesFromContext(hContext, DevicesInCtx), phProgram); - - cl_platform_id CurPlatform; - CL_RETURN_ON_FAILURE_AND_SET_NULL( - clGetDeviceInfo((*DevicesInCtx)[0], CL_DEVICE_PLATFORM, - sizeof(cl_platform_id), &CurPlatform, nullptr), - phProgram); + ur_platform_handle_t CurPlatform = hContext->Devices[0]->Platform; oclv::OpenCLVersion PlatVer; - CL_RETURN_ON_FAILURE_AND_SET_NULL( - cl_adapter::getPlatformVersion(CurPlatform, PlatVer), phProgram); + CL_RETURN_ON_FAILURE_AND_SET_NULL(CurPlatform->getPlatformVersion(PlatVer), + phProgram); cl_int Err = CL_SUCCESS; + cl_program Program; if (PlatVer >= oclv::V2_1) { /* Make sure all devices support CL 2.1 or newer as well. */ - for (cl_device_id Dev : *DevicesInCtx) { + for (ur_device_handle_t URDev : hContext->Devices) { oclv::OpenCLVersion DevVer; - CL_RETURN_ON_FAILURE_AND_SET_NULL( - cl_adapter::getDeviceVersion(Dev, DevVer), phProgram); + CL_RETURN_ON_FAILURE_AND_SET_NULL(URDev->getDeviceVersion(DevVer), + phProgram); /* If the device does not support CL 2.1 or greater, we need to make sure * it supports the cl_khr_il_program extension. @@ -70,8 +70,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( if (DevVer < oclv::V2_1) { bool Supported = false; CL_RETURN_ON_FAILURE_AND_SET_NULL( - cl_adapter::checkDeviceExtensions(Dev, {"cl_khr_il_program"}, - Supported), + URDev->checkDeviceExtensions({"cl_khr_il_program"}, Supported), phProgram); if (!Supported) { @@ -80,18 +79,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( } } - *phProgram = cl_adapter::cast(clCreateProgramWithIL( - cl_adapter::cast(hContext), pIL, length, &Err)); + Program = clCreateProgramWithIL(hContext->CLContext, pIL, length, &Err); } else { - /* If none of the devices conform with CL 2.1 or newer make sure they all * support the cl_khr_il_program extension. */ - for (cl_device_id Dev : *DevicesInCtx) { + for (ur_device_handle_t URDev : hContext->Devices) { bool Supported = false; CL_RETURN_ON_FAILURE_AND_SET_NULL( - cl_adapter::checkDeviceExtensions(Dev, {"cl_khr_il_program"}, - Supported), + URDev->checkDeviceExtensions({"cl_khr_il_program"}, Supported), phProgram); if (!Supported) { @@ -103,12 +99,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( cl_program(CL_API_CALL *)(cl_context, const void *, size_t, cl_int *); ApiFuncT FuncPtr = reinterpret_cast(clGetExtensionFunctionAddressForPlatform( - CurPlatform, "clCreateProgramWithILKHR")); + CurPlatform->CLPlatform, "clCreateProgramWithILKHR")); assert(FuncPtr != nullptr); - *phProgram = cl_adapter::cast( - FuncPtr(cl_adapter::cast(hContext), pIL, length, &Err)); + Program = FuncPtr(hContext->CLContext, pIL, length, &Err); } // INVALID_VALUE is only returned in three circumstances according to the cl @@ -130,6 +125,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( CL_RETURN_ON_FAILURE(Err); } + try { + auto URProgram = std::make_unique( + Program, hContext, hContext->DeviceCount, hContext->Devices.data()); + *phProgram = URProgram.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + return UR_RESULT_SUCCESS; } @@ -137,15 +142,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( ur_context_handle_t hContext, uint32_t numDevices, ur_device_handle_t *phDevices, size_t *pLengths, const uint8_t **ppBinaries, const ur_program_properties_t *, ur_program_handle_t *phProgram) { - std::vector Devices(numDevices); + std::vector CLDevices(numDevices); for (uint32_t i = 0; i < numDevices; ++i) - Devices[i] = cl_adapter::cast(phDevices[i]); + CLDevices[i] = phDevices[i]->CLDevice; std::vector BinaryStatus(numDevices); cl_int CLResult; - *phProgram = cl_adapter::cast(clCreateProgramWithBinary( - cl_adapter::cast(hContext), - cl_adapter::cast(numDevices), Devices.data(), pLengths, - ppBinaries, BinaryStatus.data(), &CLResult)); + cl_program Program = clCreateProgramWithBinary( + hContext->CLContext, static_cast(numDevices), CLDevices.data(), + pLengths, ppBinaries, BinaryStatus.data(), &CLResult); + CL_RETURN_ON_FAILURE(CLResult); + auto URProgram = std::make_unique( + Program, hContext, numDevices, phDevices); + *phProgram = URProgram.release(); for (uint32_t i = 0; i < numDevices; ++i) { CL_RETURN_ON_FAILURE(BinaryStatus[i]); } @@ -158,12 +166,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCompile([[maybe_unused]] ur_context_handle_t hContext, ur_program_handle_t hProgram, const char *pOptions) { - std::unique_ptr> DevicesInProgram; - UR_RETURN_ON_FAILURE(getDevicesFromProgram(hProgram, DevicesInProgram)); + uint32_t DeviceCount = hProgram->NumDevices; + std::vector CLDevicesInProgram(DeviceCount); + for (uint32_t i = 0; i < DeviceCount; i++) { + CLDevicesInProgram[i] = hProgram->Devices[i]->CLDevice; + } - CL_RETURN_ON_FAILURE(clCompileProgram(cl_adapter::cast(hProgram), - DevicesInProgram->size(), - DevicesInProgram->data(), pOptions, 0, + CL_RETURN_ON_FAILURE(clCompileProgram(hProgram->CLProgram, DeviceCount, + CLDevicesInProgram.data(), pOptions, 0, nullptr, nullptr, nullptr, nullptr)); return UR_RESULT_SUCCESS; @@ -198,17 +208,38 @@ static cl_int mapURProgramInfoToCL(ur_program_info_t URPropName) { UR_APIEXPORT ur_result_t UR_APICALL urProgramGetInfo(ur_program_handle_t hProgram, ur_program_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - size_t CheckPropSize = 0; - auto ClResult = clGetProgramInfo(cl_adapter::cast(hProgram), - mapURProgramInfoToCL(propName), propSize, - pPropValue, &CheckPropSize); - if (pPropValue && CheckPropSize != propSize) { - return UR_RESULT_ERROR_INVALID_SIZE; + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + + const cl_program_info CLPropName = mapURProgramInfoToCL(propName); + + switch (static_cast(propName)) { + case UR_PROGRAM_INFO_CONTEXT: { + return ReturnValue(hProgram->Context); } - CL_RETURN_ON_FAILURE(ClResult); - if (pPropSizeRet) { - *pPropSizeRet = CheckPropSize; + case UR_PROGRAM_INFO_NUM_DEVICES: { + cl_uint DeviceCount = hProgram->NumDevices; + return ReturnValue(DeviceCount); + } + case UR_PROGRAM_INFO_DEVICES: { + return ReturnValue(hProgram->Devices.data(), hProgram->NumDevices); + } + case UR_PROGRAM_INFO_REFERENCE_COUNT: { + return ReturnValue(hProgram->getReferenceCount()); } + default: { + size_t CheckPropSize = 0; + auto ClResult = clGetProgramInfo(hProgram->CLProgram, CLPropName, propSize, + pPropValue, &CheckPropSize); + if (pPropValue && CheckPropSize != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + CL_RETURN_ON_FAILURE(ClResult); + if (pPropSizeRet) { + *pPropSizeRet = CheckPropSize; + } + } + } + return UR_RESULT_SUCCESS; } @@ -216,12 +247,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild([[maybe_unused]] ur_context_handle_t hContext, ur_program_handle_t hProgram, const char *pOptions) { - std::unique_ptr> DevicesInProgram; - UR_RETURN_ON_FAILURE(getDevicesFromProgram(hProgram, DevicesInProgram)); + uint32_t DeviceCount = hProgram->NumDevices; + std::vector CLDevicesInProgram(DeviceCount); + for (uint32_t i = 0; i < DeviceCount; i++) { + CLDevicesInProgram[i] = hProgram->Devices[i]->CLDevice; + } - CL_RETURN_ON_FAILURE(clBuildProgram( - cl_adapter::cast(hProgram), DevicesInProgram->size(), - DevicesInProgram->data(), pOptions, nullptr, nullptr)); + CL_RETURN_ON_FAILURE( + clBuildProgram(hProgram->CLProgram, CLDevicesInProgram.size(), + CLDevicesInProgram.data(), pOptions, nullptr, nullptr)); return UR_RESULT_SUCCESS; } @@ -231,19 +265,29 @@ urProgramLink(ur_context_handle_t hContext, uint32_t count, ur_program_handle_t *phProgram) { cl_int CLResult; - *phProgram = cl_adapter::cast( - clLinkProgram(cl_adapter::cast(hContext), 0, nullptr, - pOptions, cl_adapter::cast(count), - cl_adapter::cast(phPrograms), nullptr, - nullptr, &CLResult)); + std::vector CLPrograms(count); + for (uint32_t i = 0; i < count; i++) { + CLPrograms[i] = phPrograms[i]->CLProgram; + } + cl_program Program = clLinkProgram( + hContext->CLContext, 0, nullptr, pOptions, static_cast(count), + CLPrograms.data(), nullptr, nullptr, &CLResult); if (CL_INVALID_BINARY == CLResult) { // Some OpenCL drivers incorrectly return CL_INVALID_BINARY here, convert it // to CL_LINK_PROGRAM_FAILURE CLResult = CL_LINK_PROGRAM_FAILURE; } - CL_RETURN_ON_FAILURE(CLResult); + try { + auto URProgram = std::make_unique( + Program, hContext, hContext->DeviceCount, hContext->Devices.data()); + *phProgram = URProgram.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } return UR_RESULT_SUCCESS; } @@ -318,15 +362,13 @@ urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t hDevice, UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); cl_program_binary_type BinaryType; CL_RETURN_ON_FAILURE(clGetProgramBuildInfo( - cl_adapter::cast(hProgram), - cl_adapter::cast(hDevice), + hProgram->CLProgram, hDevice->CLDevice, mapURProgramBuildInfoToCL(propName), sizeof(cl_program_binary_type), &BinaryType, nullptr)); return ReturnValue(mapCLBinaryTypeToUR(BinaryType)); } size_t CheckPropSize = 0; - cl_int ClErr = clGetProgramBuildInfo(cl_adapter::cast(hProgram), - cl_adapter::cast(hDevice), + cl_int ClErr = clGetProgramBuildInfo(hProgram->CLProgram, hDevice->CLDevice, mapURProgramBuildInfoToCL(propName), propSize, pPropValue, &CheckPropSize); if (pPropValue && CheckPropSize != propSize) { @@ -342,34 +384,35 @@ urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t hDevice, UR_APIEXPORT ur_result_t UR_APICALL urProgramRetain(ur_program_handle_t hProgram) { - - CL_RETURN_ON_FAILURE(clRetainProgram(cl_adapter::cast(hProgram))); + hProgram->incrementReferenceCount(); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urProgramRelease(ur_program_handle_t hProgram) { - - CL_RETURN_ON_FAILURE( - clReleaseProgram(cl_adapter::cast(hProgram))); + if (hProgram->decrementReferenceCount() == 0) { + delete hProgram; + } return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle( ur_program_handle_t hProgram, ur_native_handle_t *phNativeProgram) { - *phNativeProgram = reinterpret_cast(hProgram); + *phNativeProgram = reinterpret_cast(hProgram->CLProgram); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithNativeHandle( - ur_native_handle_t hNativeProgram, ur_context_handle_t, + ur_native_handle_t hNativeProgram, ur_context_handle_t hContext, const ur_program_native_properties_t *pProperties, ur_program_handle_t *phProgram) { - *phProgram = reinterpret_cast(hNativeProgram); - if (!pProperties || !pProperties->isNativeHandleOwned) { - return urProgramRetain(*phProgram); - } + cl_program NativeHandle = reinterpret_cast(hNativeProgram); + + UR_RETURN_ON_FAILURE( + ur_program_handle_t_::makeWithNative(NativeHandle, hContext, *phProgram)); + (*phProgram)->IsNativeHandleOwned = + pProperties ? pProperties->isNativeHandleOwned : false; return UR_RESULT_SUCCESS; } @@ -377,21 +420,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( ur_program_handle_t hProgram, uint32_t count, const ur_specialization_constant_info_t *pSpecConstants) { - cl_program CLProg = cl_adapter::cast(hProgram); - cl_context Ctx = nullptr; - size_t RetSize = 0; - - CL_RETURN_ON_FAILURE(clGetProgramInfo(CLProg, CL_PROGRAM_CONTEXT, sizeof(Ctx), - &Ctx, &RetSize)); - - std::unique_ptr> DevicesInCtx; - UR_RETURN_ON_FAILURE(cl_adapter::getDevicesFromContext( - cl_adapter::cast(Ctx), DevicesInCtx)); - - cl_platform_id CurPlatform; - CL_RETURN_ON_FAILURE(clGetDeviceInfo((*DevicesInCtx)[0], CL_DEVICE_PLATFORM, - sizeof(cl_platform_id), &CurPlatform, - nullptr)); + cl_program CLProg = hProgram->CLProgram; + if (!hProgram->Context) { + return UR_RESULT_ERROR_INVALID_PROGRAM; + } + ur_context_handle_t Ctx = hProgram->Context; + if (!Ctx->DeviceCount || !Ctx->Devices[0]->Platform) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } if (ur::cl::getAdapter()->clSetProgramSpecializationConstant) { for (uint32_t i = 0; i < count; ++i) { @@ -437,10 +473,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( ur_device_handle_t hDevice, ur_program_handle_t hProgram, const char *pFunctionName, void **ppFunctionPointer) { - cl_context CLContext = nullptr; - CL_RETURN_ON_FAILURE(clGetProgramInfo(cl_adapter::cast(hProgram), - CL_PROGRAM_CONTEXT, sizeof(CLContext), - &CLContext, nullptr)); + cl_context CLContext = hProgram->Context->CLContext; cl_ext::clGetDeviceFunctionPointer_fn FuncT = nullptr; @@ -456,15 +489,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( // throws exceptions. *ppFunctionPointer = 0; size_t Size; - CL_RETURN_ON_FAILURE(clGetProgramInfo(cl_adapter::cast(hProgram), - CL_PROGRAM_KERNEL_NAMES, 0, nullptr, - &Size)); + CL_RETURN_ON_FAILURE(clGetProgramInfo( + hProgram->CLProgram, CL_PROGRAM_KERNEL_NAMES, 0, nullptr, &Size)); std::string KernelNames(Size, ' '); - CL_RETURN_ON_FAILURE(clGetProgramInfo( - cl_adapter::cast(hProgram), CL_PROGRAM_KERNEL_NAMES, - KernelNames.size(), &KernelNames[0], nullptr)); + CL_RETURN_ON_FAILURE( + clGetProgramInfo(hProgram->CLProgram, CL_PROGRAM_KERNEL_NAMES, + KernelNames.size(), &KernelNames[0], nullptr)); // Get rid of the null terminator and search for the kernel name. If the // function cannot be found, return an error code to indicate it exists. @@ -474,8 +506,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( } const cl_int CLResult = - FuncT(cl_adapter::cast(hDevice), - cl_adapter::cast(hProgram), pFunctionName, + FuncT(hDevice->CLDevice, hProgram->CLProgram, pFunctionName, reinterpret_cast(ppFunctionPointer)); // GPU runtime sometimes returns CL_INVALID_ARG_VALUE if the function address // cannot be found but the kernel exists. As the kernel does exist, return @@ -496,9 +527,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetGlobalVariablePointer( void **ppGlobalVariablePointerRet) { cl_context CLContext = nullptr; - CL_RETURN_ON_FAILURE(clGetProgramInfo(cl_adapter::cast(hProgram), - CL_PROGRAM_CONTEXT, sizeof(CLContext), - &CLContext, nullptr)); + CL_RETURN_ON_FAILURE(clGetProgramInfo(hProgram->CLProgram, CL_PROGRAM_CONTEXT, + sizeof(CLContext), &CLContext, + nullptr)); cl_ext::clGetDeviceGlobalVariablePointer_fn FuncT = nullptr; @@ -508,8 +539,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetGlobalVariablePointer( cl_ext::GetDeviceGlobalVariablePointerName, &FuncT)); const cl_int CLResult = - FuncT(cl_adapter::cast(hDevice), - cl_adapter::cast(hProgram), pGlobalVariableName, + FuncT(hDevice->CLDevice, hProgram->CLProgram, pGlobalVariableName, pGlobalVariableSizeRet, ppGlobalVariablePointerRet); if (CLResult != CL_SUCCESS) { diff --git a/source/adapters/opencl/program.hpp b/source/adapters/opencl/program.hpp new file mode 100644 index 0000000000..4bdbad5249 --- /dev/null +++ b/source/adapters/opencl/program.hpp @@ -0,0 +1,52 @@ +//===--------- program.hpp - OpenCL Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include "common.hpp" +#include "context.hpp" + +#include + +struct ur_program_handle_t_ { + using native_type = cl_program; + native_type CLProgram; + ur_context_handle_t Context; + std::atomic RefCount = 0; + bool IsNativeHandleOwned = true; + uint32_t NumDevices = 0; + std::vector Devices; + + ur_program_handle_t_(native_type Prog, ur_context_handle_t Ctx, + uint32_t NumDevices, ur_device_handle_t *Devs) + : CLProgram(Prog), Context(Ctx), NumDevices(NumDevices) { + RefCount = 1; + urContextRetain(Context); + for (uint32_t i = 0; i < NumDevices; i++) { + Devices.push_back(Devs[i]); + } + } + + ~ur_program_handle_t_() { + urContextRelease(Context); + if (IsNativeHandleOwned) { + clReleaseProgram(CLProgram); + } + } + + uint32_t incrementReferenceCount() noexcept { return ++RefCount; } + + uint32_t decrementReferenceCount() noexcept { return --RefCount; } + + uint32_t getReferenceCount() const noexcept { return RefCount; } + + static ur_result_t makeWithNative(native_type NativeProg, + ur_context_handle_t Context, + ur_program_handle_t &Program); +}; diff --git a/source/adapters/opencl/queue.cpp b/source/adapters/opencl/queue.cpp index 2e40963ad1..80353ed7f5 100644 --- a/source/adapters/opencl/queue.cpp +++ b/source/adapters/opencl/queue.cpp @@ -6,7 +6,10 @@ // //===-----------------------------------------------------------------===// +#include "queue.hpp" #include "common.hpp" +#include "context.hpp" +#include "device.hpp" #include "platform.hpp" cl_command_queue_info mapURQueueInfoToCL(const ur_queue_info_t PropName) { @@ -67,16 +70,46 @@ mapCLQueuePropsToUR(const cl_command_queue_properties &Properties) { return Flags; } +ur_result_t ur_queue_handle_t_::makeWithNative(native_type NativeQueue, + ur_context_handle_t Context, + ur_device_handle_t Device, + ur_queue_handle_t &Queue) { + try { + cl_context CLContext; + CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( + NativeQueue, CL_QUEUE_CONTEXT, sizeof(CLContext), &CLContext, nullptr)); + cl_device_id CLDevice; + CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( + NativeQueue, CL_QUEUE_DEVICE, sizeof(CLDevice), &CLDevice, nullptr)); + if (Context->CLContext != CLContext) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + if (Device) { + if (Device->CLDevice != CLDevice) { + return UR_RESULT_ERROR_INVALID_DEVICE; + } + } else { + ur_native_handle_t hNativeHandle = + reinterpret_cast(CLDevice); + UR_RETURN_ON_FAILURE(urDeviceCreateWithNativeHandle( + hNativeHandle, nullptr, nullptr, &Device)); + } + auto URQueue = + std::make_unique(NativeQueue, Context, Device); + Queue = URQueue.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + return UR_RESULT_SUCCESS; +} + UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_queue_properties_t *pProperties, ur_queue_handle_t *phQueue) { - cl_platform_id CurPlatform; - CL_RETURN_ON_FAILURE_AND_SET_NULL( - clGetDeviceInfo(cl_adapter::cast(hDevice), - CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &CurPlatform, - nullptr), - phQueue); + ur_platform_handle_t CurPlatform = hDevice->Platform; cl_command_queue_properties CLProperties = pProperties ? convertURQueuePropertiesToCL(pProperties) : 0; @@ -87,29 +120,44 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT; oclv::OpenCLVersion Version; - CL_RETURN_ON_FAILURE_AND_SET_NULL( - cl_adapter::getPlatformVersion(CurPlatform, Version), phQueue); + CL_RETURN_ON_FAILURE_AND_SET_NULL(CurPlatform->getPlatformVersion(Version), + phQueue); cl_int RetErr = CL_INVALID_OPERATION; if (Version < oclv::V2_0) { - *phQueue = cl_adapter::cast( - clCreateCommandQueue(cl_adapter::cast(hContext), - cl_adapter::cast(hDevice), - CLProperties & SupportByOpenCL, &RetErr)); + cl_command_queue Queue = + clCreateCommandQueue(hContext->CLContext, hDevice->CLDevice, + CLProperties & SupportByOpenCL, &RetErr); CL_RETURN_ON_FAILURE(RetErr); + try { + auto URQueue = + std::make_unique(Queue, hContext, hDevice); + *phQueue = URQueue.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + return UR_RESULT_SUCCESS; } /* TODO: Add support for CL_QUEUE_PRIORITY_KHR */ cl_queue_properties CreationFlagProperties[] = { CL_QUEUE_PROPERTIES, CLProperties & SupportByOpenCL, 0}; - *phQueue = - cl_adapter::cast(clCreateCommandQueueWithProperties( - cl_adapter::cast(hContext), - cl_adapter::cast(hDevice), CreationFlagProperties, - &RetErr)); + cl_command_queue Queue = clCreateCommandQueueWithProperties( + hContext->CLContext, hDevice->CLDevice, CreationFlagProperties, &RetErr); CL_RETURN_ON_FAILURE(RetErr); + try { + auto URQueue = + std::make_unique(Queue, hContext, hDevice); + *phQueue = URQueue.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } return UR_RESULT_SUCCESS; } @@ -123,23 +171,31 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } cl_command_queue_info CLCommandQueueInfo = mapURQueueInfoToCL(propName); - + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + switch (propName) { + case UR_QUEUE_INFO_CONTEXT: { + return ReturnValue(hQueue->Context); + } + case UR_QUEUE_INFO_DEVICE: { + return ReturnValue(hQueue->Device); + } // Unfortunately the size of cl_bitfield (unsigned long) doesn't line up with // our enums (forced to be sizeof(uint32_t)) so this needs special handling. - if (propName == UR_QUEUE_INFO_FLAGS) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); - + case UR_QUEUE_INFO_FLAGS: { cl_command_queue_properties QueueProperties = 0; CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( - cl_adapter::cast(hQueue), CLCommandQueueInfo, - sizeof(QueueProperties), &QueueProperties, nullptr)); + hQueue->CLQueue, CLCommandQueueInfo, sizeof(QueueProperties), + &QueueProperties, nullptr)); return ReturnValue(mapCLQueuePropsToUR(QueueProperties)); - } else { + } + case UR_QUEUE_INFO_REFERENCE_COUNT: { + return ReturnValue(hQueue->getReferenceCount()); + } + default: { size_t CheckPropSize = 0; - cl_int RetErr = clGetCommandQueueInfo( - cl_adapter::cast(hQueue), CLCommandQueueInfo, - propSize, pPropValue, &CheckPropSize); + cl_int RetErr = clGetCommandQueueInfo(hQueue->CLQueue, CLCommandQueueInfo, + propSize, pPropValue, &CheckPropSize); if (pPropValue && CheckPropSize != propSize) { return UR_RESULT_ERROR_INVALID_SIZE; } @@ -148,6 +204,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, *pPropSizeRet = CheckPropSize; } } + } return UR_RESULT_SUCCESS; } @@ -155,45 +212,46 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, UR_APIEXPORT ur_result_t UR_APICALL urQueueGetNativeHandle(ur_queue_handle_t hQueue, ur_queue_native_desc_t *, ur_native_handle_t *phNativeQueue) { - return getNativeHandle(hQueue, phNativeQueue); + return getNativeHandle(hQueue->CLQueue, phNativeQueue); } UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( - ur_native_handle_t hNativeQueue, - [[maybe_unused]] ur_context_handle_t hContext, - [[maybe_unused]] ur_device_handle_t hDevice, - [[maybe_unused]] const ur_queue_native_properties_t *pProperties, + ur_native_handle_t hNativeQueue, ur_context_handle_t hContext, + ur_device_handle_t hDevice, const ur_queue_native_properties_t *pProperties, ur_queue_handle_t *phQueue) { - *phQueue = reinterpret_cast(hNativeQueue); - cl_int RetErr = - clRetainCommandQueue(cl_adapter::cast(*phQueue)); - CL_RETURN_ON_FAILURE(RetErr); + cl_command_queue NativeHandle = + reinterpret_cast(hNativeQueue); + + UR_RETURN_ON_FAILURE(ur_queue_handle_t_::makeWithNative( + NativeHandle, hContext, hDevice, *phQueue)); + + (*phQueue)->IsNativeHandleOwned = + pProperties ? pProperties->isNativeHandleOwned : false; + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish(ur_queue_handle_t hQueue) { - cl_int RetErr = clFinish(cl_adapter::cast(hQueue)); + cl_int RetErr = clFinish(hQueue->CLQueue); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urQueueFlush(ur_queue_handle_t hQueue) { - cl_int RetErr = clFinish(cl_adapter::cast(hQueue)); + cl_int RetErr = clFinish(hQueue->CLQueue); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urQueueRetain(ur_queue_handle_t hQueue) { - cl_int RetErr = - clRetainCommandQueue(cl_adapter::cast(hQueue)); - CL_RETURN_ON_FAILURE(RetErr); + hQueue->incrementReferenceCount(); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(ur_queue_handle_t hQueue) { - cl_int RetErr = - clReleaseCommandQueue(cl_adapter::cast(hQueue)); - CL_RETURN_ON_FAILURE(RetErr); + if (hQueue->decrementReferenceCount() == 0) { + delete hQueue; + } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/queue.hpp b/source/adapters/opencl/queue.hpp new file mode 100644 index 0000000000..7b2924bfdd --- /dev/null +++ b/source/adapters/opencl/queue.hpp @@ -0,0 +1,56 @@ +//===--------- queue.hpp - OpenCL Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include "common.hpp" +#include "context.hpp" +#include "device.hpp" + +#include + +struct ur_queue_handle_t_ { + using native_type = cl_command_queue; + native_type CLQueue; + ur_context_handle_t Context; + ur_device_handle_t Device; + std::atomic RefCount = 0; + bool IsNativeHandleOwned = true; + + ur_queue_handle_t_(native_type Queue, ur_context_handle_t Ctx, + ur_device_handle_t Dev) + : CLQueue(Queue), Context(Ctx), Device(Dev) { + RefCount = 1; + if (Device) { + urDeviceRetain(Device); + } + urContextRetain(Context); + } + + static ur_result_t makeWithNative(native_type NativeQueue, + ur_context_handle_t Context, + ur_device_handle_t Device, + ur_queue_handle_t &Queue); + + ~ur_queue_handle_t_() { + if (Device) { + urDeviceRelease(Device); + } + urContextRelease(Context); + if (IsNativeHandleOwned) { + clReleaseCommandQueue(CLQueue); + } + } + + uint32_t incrementReferenceCount() noexcept { return ++RefCount; } + + uint32_t decrementReferenceCount() noexcept { return --RefCount; } + + uint32_t getReferenceCount() const noexcept { return RefCount; } +}; diff --git a/source/adapters/opencl/sampler.cpp b/source/adapters/opencl/sampler.cpp index a47ba7f894..1cd2532000 100644 --- a/source/adapters/opencl/sampler.cpp +++ b/source/adapters/opencl/sampler.cpp @@ -8,7 +8,9 @@ // //===----------------------------------------------------------------------===// +#include "sampler.hpp" #include "common.hpp" +#include "context.hpp" namespace { @@ -137,16 +139,23 @@ ur_result_t urSamplerCreate(ur_context_handle_t hContext, ur_sampler_handle_t *phSampler) { // Initialize properties according to OpenCL 2.1 spec. - ur_result_t ErrorCode; + cl_int ErrorCode; cl_addressing_mode AddressingMode = ur2CLAddressingMode(pDesc->addressingMode); cl_filter_mode FilterMode = ur2CLFilterMode(pDesc->filterMode); - - // Always call OpenCL 1.0 API - *phSampler = cl_adapter::cast(clCreateSampler( - cl_adapter::cast(hContext), - static_cast(pDesc->normalizedCoords), AddressingMode, FilterMode, - cl_adapter::cast(&ErrorCode))); + try { + // Always call OpenCL 1.0 API + cl_sampler Sampler = clCreateSampler( + hContext->CLContext, static_cast(pDesc->normalizedCoords), + AddressingMode, FilterMode, &ErrorCode); + CL_RETURN_ON_FAILURE(ErrorCode); + auto URSampler = std::make_unique(Sampler, hContext); + *phSampler = URSampler.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } return mapCLErrorToUR(ErrorCode); } @@ -157,16 +166,25 @@ urSamplerGetInfo(ur_sampler_handle_t hSampler, ur_sampler_info_t propName, cl_sampler_info SamplerInfo = ur2CLSamplerInfo(propName); static_assert(sizeof(cl_addressing_mode) == sizeof(ur_sampler_addressing_mode_t)); + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); ur_result_t Err = UR_RESULT_SUCCESS; + + switch (propName) { + case UR_SAMPLER_INFO_CONTEXT: { + return ReturnValue(hSampler->Context); + } + case UR_SAMPLER_INFO_REFERENCE_COUNT: { + return ReturnValue(hSampler->getReferenceCount()); + } // ur_bool_t have a size of uint8_t, but cl_bool size have the size of // uint32_t so this adjust UR_SAMPLER_INFO_NORMALIZED_COORDS info to map // between them. - if (propName == UR_SAMPLER_INFO_NORMALIZED_COORDS) { + case UR_SAMPLER_INFO_NORMALIZED_COORDS: { cl_bool normalized_coords = false; - Err = mapCLErrorToUR( - clGetSamplerInfo(cl_adapter::cast(hSampler), SamplerInfo, - sizeof(cl_bool), &normalized_coords, nullptr)); + Err = mapCLErrorToUR(clGetSamplerInfo(hSampler->CLSampler, SamplerInfo, + sizeof(cl_bool), &normalized_coords, + nullptr)); if (pPropValue && propSize != sizeof(ur_bool_t)) { return UR_RESULT_ERROR_INVALID_SIZE; } @@ -178,52 +196,65 @@ urSamplerGetInfo(ur_sampler_handle_t hSampler, ur_sampler_info_t propName, if (pPropSizeRet) { *pPropSizeRet = sizeof(ur_bool_t); } - } else { + break; + } + default: { size_t CheckPropSize = 0; - Err = mapCLErrorToUR( - clGetSamplerInfo(cl_adapter::cast(hSampler), SamplerInfo, - propSize, pPropValue, &CheckPropSize)); + ur_result_t Err = + mapCLErrorToUR(clGetSamplerInfo(hSampler->CLSampler, SamplerInfo, + propSize, pPropValue, &CheckPropSize)); if (pPropValue && CheckPropSize != propSize) { return UR_RESULT_ERROR_INVALID_SIZE; } - UR_RETURN_ON_FAILURE(Err); + CL_RETURN_ON_FAILURE(Err); if (pPropSizeRet) { *pPropSizeRet = CheckPropSize; } - } - // Convert OpenCL returns to UR - cl2URSamplerInfoValue(SamplerInfo, pPropValue); + // Convert OpenCL returns to UR + cl2URSamplerInfoValue(SamplerInfo, pPropValue); + } + } return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urSamplerRetain(ur_sampler_handle_t hSampler) { - return mapCLErrorToUR( - clRetainSampler(cl_adapter::cast(hSampler))); + hSampler->incrementReferenceCount(); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urSamplerRelease(ur_sampler_handle_t hSampler) { - return mapCLErrorToUR( - clReleaseSampler(cl_adapter::cast(hSampler))); + if (hSampler->decrementReferenceCount() == 0) { + delete hSampler; + } + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetNativeHandle( ur_sampler_handle_t hSampler, ur_native_handle_t *phNativeSampler) { - *phNativeSampler = reinterpret_cast( - cl_adapter::cast(hSampler)); + *phNativeSampler = reinterpret_cast(hSampler->CLSampler); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( - ur_native_handle_t hNativeSampler, ur_context_handle_t, + ur_native_handle_t hNativeSampler, ur_context_handle_t hContext, const ur_sampler_native_properties_t *pProperties, ur_sampler_handle_t *phSampler) { - *phSampler = reinterpret_cast(hNativeSampler); - if (!pProperties || !pProperties->isNativeHandleOwned) { - return urSamplerRetain(*phSampler); + cl_sampler NativeHandle = reinterpret_cast(hNativeSampler); + try { + auto URSampler = + std::make_unique(NativeHandle, hContext); + URSampler->IsNativeHandleOwned = + pProperties ? pProperties->isNativeHandleOwned : false; + *phSampler = URSampler.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; } + return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/sampler.hpp b/source/adapters/opencl/sampler.hpp new file mode 100644 index 0000000000..68839dbd7e --- /dev/null +++ b/source/adapters/opencl/sampler.hpp @@ -0,0 +1,41 @@ +//===--------- sampler.hpp - OpenCL Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include "common.hpp" + +#include + +struct ur_sampler_handle_t_ { + using native_type = cl_sampler; + native_type CLSampler; + ur_context_handle_t Context; + std::atomic RefCount = 0; + bool IsNativeHandleOwned = false; + + ur_sampler_handle_t_(native_type Sampler, ur_context_handle_t Ctx) + : CLSampler(Sampler), Context(Ctx) { + RefCount = 1; + urContextRetain(Context); + } + + ~ur_sampler_handle_t_() { + urContextRelease(Context); + if (IsNativeHandleOwned) { + clReleaseSampler(CLSampler); + } + } + + uint32_t incrementReferenceCount() noexcept { return ++RefCount; } + + uint32_t decrementReferenceCount() noexcept { return --RefCount; } + + uint32_t getReferenceCount() const noexcept { return RefCount; } +}; diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index dfcc1dfafa..297b84f6a9 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -11,6 +11,10 @@ #include #include "common.hpp" +#include "context.hpp" +#include "device.hpp" +#include "event.hpp" +#include "queue.hpp" #include "usm.hpp" template @@ -96,7 +100,7 @@ usmDescToCLMemProperties(const ur_base_desc_t *Desc, } UR_APIEXPORT ur_result_t UR_APICALL -urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, +urUSMHostAlloc(ur_context_handle_t Context, const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t, size_t size, void **ppMem) { void *Ptr = nullptr; @@ -115,7 +119,7 @@ urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, // First we need to look up the function pointer clHostMemAllocINTEL_fn FuncPtr = nullptr; - cl_context CLContext = cl_adapter::cast(hContext); + cl_context CLContext = Context->CLContext; if (auto UrResult = cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache, cl_ext::HostMemAllocName, &FuncPtr)) { @@ -143,7 +147,7 @@ urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, } UR_APIEXPORT ur_result_t UR_APICALL -urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, +urUSMDeviceAlloc(ur_context_handle_t Context, ur_device_handle_t hDevice, const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t, size_t size, void **ppMem) { @@ -163,7 +167,7 @@ urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, // First we need to look up the function pointer clDeviceMemAllocINTEL_fn FuncPtr = nullptr; - cl_context CLContext = cl_adapter::cast(hContext); + cl_context CLContext = Context->CLContext; if (auto UrResult = cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clDeviceMemAllocINTELCache, cl_ext::DeviceMemAllocName, &FuncPtr)) { @@ -172,7 +176,7 @@ urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, if (FuncPtr) { cl_int ClResult = CL_SUCCESS; - Ptr = FuncPtr(CLContext, cl_adapter::cast(hDevice), + Ptr = FuncPtr(CLContext, hDevice->CLDevice, AllocProperties.empty() ? nullptr : AllocProperties.data(), size, Alignment, &ClResult); if (ClResult == CL_INVALID_BUFFER_SIZE) { @@ -191,7 +195,7 @@ urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, } UR_APIEXPORT ur_result_t UR_APICALL -urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, +urUSMSharedAlloc(ur_context_handle_t Context, ur_device_handle_t hDevice, const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t, size_t size, void **ppMem) { @@ -211,7 +215,7 @@ urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, // First we need to look up the function pointer clSharedMemAllocINTEL_fn FuncPtr = nullptr; - cl_context CLContext = cl_adapter::cast(hContext); + cl_context CLContext = Context->CLContext; if (auto UrResult = cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clSharedMemAllocINTELCache, cl_ext::SharedMemAllocName, &FuncPtr)) { @@ -220,9 +224,9 @@ urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, if (FuncPtr) { cl_int ClResult = CL_SUCCESS; - Ptr = FuncPtr(CLContext, cl_adapter::cast(hDevice), + Ptr = FuncPtr(CLContext, hDevice->CLDevice, AllocProperties.empty() ? nullptr : AllocProperties.data(), - size, Alignment, cl_adapter::cast(&ClResult)); + size, Alignment, static_cast(&ClResult)); if (ClResult == CL_INVALID_BUFFER_SIZE) { return UR_RESULT_ERROR_INVALID_USM_SIZE; } @@ -237,14 +241,14 @@ urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMFree(ur_context_handle_t hContext, +UR_APIEXPORT ur_result_t UR_APICALL urUSMFree(ur_context_handle_t Context, void *pMem) { // Use a blocking free to avoid issues with indirect access from kernels that // might be still running. clMemBlockingFreeINTEL_fn FuncPtr = nullptr; - cl_context CLContext = cl_adapter::cast(hContext); + cl_context CLContext = Context->CLContext; ur_result_t RetVal = UR_RESULT_ERROR_INVALID_OPERATION; RetVal = cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clMemBlockingFreeINTELCache, @@ -262,13 +266,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( const void *pPattern, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { // Have to look up the context from the kernel - cl_context CLContext; - cl_int CLErr = clGetCommandQueueInfo( - cl_adapter::cast(hQueue), CL_QUEUE_CONTEXT, - sizeof(cl_context), &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return mapCLErrorToUR(CLErr); - } + cl_context CLContext = hQueue->Context->CLContext; if (patternSize <= 128 && isPowerOf2(patternSize)) { clEnqueueMemFillINTEL_fn EnqueueMemFill = nullptr; @@ -276,12 +274,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clEnqueueMemFillINTELCache, cl_ext::EnqueueMemFillName, &EnqueueMemFill)); - - CL_RETURN_ON_FAILURE( - EnqueueMemFill(cl_adapter::cast(hQueue), ptr, - pPattern, patternSize, size, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->CLEvent; + } + CL_RETURN_ON_FAILURE(EnqueueMemFill(hQueue->CLQueue, ptr, pPattern, + patternSize, size, numEventsInWaitList, + CLWaitEvents.data(), &Event)); + if (phEvent) { + try { + auto UREvent = std::make_unique( + Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } return UR_RESULT_SUCCESS; } @@ -315,16 +326,27 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( } cl_event CopyEvent = nullptr; - CL_RETURN_ON_FAILURE(USMMemcpy( - cl_adapter::cast(hQueue), false, ptr, HostBuffer, size, - numEventsInWaitList, cl_adapter::cast(phEventWaitList), - &CopyEvent)); + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->CLEvent; + } + CL_RETURN_ON_FAILURE(USMMemcpy(hQueue->CLQueue, false, ptr, HostBuffer, size, + numEventsInWaitList, CLWaitEvents.data(), + &CopyEvent)); if (phEvent) { // Since we're releasing this in the callback above we need to retain it // here to keep the user copy alive. CL_RETURN_ON_FAILURE(clRetainEvent(CopyEvent)); - *phEvent = cl_adapter::cast(CopyEvent); + try { + auto UREvent = std::make_unique( + CopyEvent, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } // This self destructs taking the event and allocation with it. @@ -351,14 +373,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { // Have to look up the context from the kernel - cl_context CLContext; - cl_int CLErr = clGetCommandQueueInfo( - cl_adapter::cast(hQueue), CL_QUEUE_CONTEXT, - sizeof(cl_context), &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return mapCLErrorToUR(CLErr); - } + cl_context CLContext = hQueue->Context->CLContext; + cl_int CLErr = CL_SUCCESS; clGetMemAllocInfoINTEL_fn GetMemAllocInfo = nullptr; UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clGetMemAllocInfoINTELCache, @@ -389,19 +406,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( // We need a queue associated with each device, so first figure out which // one we weren't given. cl_device_id QueueDevice = nullptr; - CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( - cl_adapter::cast(hQueue), CL_QUEUE_DEVICE, - sizeof(QueueDevice), &QueueDevice, nullptr)); + CL_RETURN_ON_FAILURE(clGetCommandQueueInfo(hQueue->CLQueue, CL_QUEUE_DEVICE, + sizeof(QueueDevice), + &QueueDevice, nullptr)); cl_command_queue MissingQueue = nullptr, SrcQueue = nullptr, DstQueue = nullptr; if (QueueDevice == SrcDevice) { MissingQueue = clCreateCommandQueue(CLContext, DstDevice, 0, &CLErr); - SrcQueue = cl_adapter::cast(hQueue); + SrcQueue = hQueue->CLQueue; DstQueue = MissingQueue; } else { MissingQueue = clCreateCommandQueue(CLContext, SrcDevice, 0, &CLErr); - DstQueue = cl_adapter::cast(hQueue); + DstQueue = hQueue->CLQueue; SrcQueue = MissingQueue; } CL_RETURN_ON_FAILURE(CLErr); @@ -431,9 +448,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( return UR_RESULT_SUCCESS; }; - UR_RETURN_ON_FAILURE(checkCLErr(USMMemcpy( - SrcQueue, blocking, HostAlloc, pSrc, size, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), &HostCopyEvent))); + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->CLEvent; + } + UR_RETURN_ON_FAILURE(checkCLErr( + USMMemcpy(SrcQueue, blocking, HostAlloc, pSrc, size, + numEventsInWaitList, CLWaitEvents.data(), &HostCopyEvent))); UR_RETURN_ON_FAILURE( checkCLErr(USMMemcpy(DstQueue, blocking, pDst, HostAlloc, size, 1, @@ -446,13 +467,29 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( CL_RETURN_ON_FAILURE(clReleaseEvent(HostCopyEvent)); CL_RETURN_ON_FAILURE(clReleaseCommandQueue(MissingQueue)); if (phEvent) { - *phEvent = cl_adapter::cast(FinalCopyEvent); + try { + auto UREvent = std::make_unique( + FinalCopyEvent, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } else { CL_RETURN_ON_FAILURE(clReleaseEvent(FinalCopyEvent)); } } else { if (phEvent) { - *phEvent = cl_adapter::cast(FinalCopyEvent); + try { + auto UREvent = std::make_unique( + FinalCopyEvent, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } // We are going to release this event in our callback so we need to // retain if the user wants a copy. CL_RETURN_ON_FAILURE(clRetainEvent(FinalCopyEvent)); @@ -476,11 +513,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( } } } else { - CL_RETURN_ON_FAILURE( - USMMemcpy(cl_adapter::cast(hQueue), blocking, pDst, - pSrc, size, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->CLEvent; + } + CL_RETURN_ON_FAILURE(USMMemcpy(hQueue->CLQueue, blocking, pDst, pSrc, size, + numEventsInWaitList, CLWaitEvents.data(), + &Event)); + if (phEvent) { + try { + auto UREvent = std::make_unique( + Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } } return UR_RESULT_SUCCESS; @@ -492,23 +543,29 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( [[maybe_unused]] ur_usm_migration_flags_t flags, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - - return mapCLErrorToUR(clEnqueueMarkerWithWaitList( - cl_adapter::cast(hQueue), numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->CLEvent; + } + CL_RETURN_ON_FAILURE(clEnqueueMarkerWithWaitList( + hQueue->CLQueue, numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } + return UR_RESULT_SUCCESS; /* // Use this once impls support it. // Have to look up the context from the kernel - cl_context CLContext; - cl_int CLErr = - clGetCommandQueueInfo(cl_adapter::cast(hQueue), - CL_QUEUE_CONTEXT, sizeof(cl_context), - &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return map_cl_error_to_ur(CLErr); - } + cl_context CLContext = hQueue->Context; clEnqueueMigrateMemINTEL_fn FuncPtr; ur_result_t Err = cl_ext::getExtFuncFromContext( @@ -519,7 +576,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( RetVal = Err; } else { RetVal = map_cl_error_to_ur( - FuncPtr(cl_adapter::cast(hQueue), pMem, size, flags, + FuncPtr(hQueue->CLQueue, pMem, size, flags, numEventsInWaitList, reinterpret_cast(phEventWaitList), reinterpret_cast(phEvent))); @@ -531,23 +588,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMAdvise( ur_queue_handle_t hQueue, [[maybe_unused]] const void *pMem, [[maybe_unused]] size_t size, [[maybe_unused]] ur_usm_advice_flags_t advice, ur_event_handle_t *phEvent) { - - return mapCLErrorToUR(clEnqueueMarkerWithWaitList( - cl_adapter::cast(hQueue), 0, nullptr, - reinterpret_cast(phEvent))); - + cl_event Event; + CL_RETURN_ON_FAILURE( + clEnqueueMarkerWithWaitList(hQueue->CLQueue, 0, nullptr, &Event)); + if (phEvent) { + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } + return UR_RESULT_SUCCESS; /* // Change to use this once drivers support it. // Have to look up the context from the kernel - cl_context CLContext; - cl_int CLErr = - clGetCommandQueueInfo(cl_adapter::cast(hQueue), - CL_QUEUE_CONTEXT, - sizeof(cl_context), - &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return map_cl_error_to_ur(CLErr); - } + cl_context CLContext = hQueue->Context; clEnqueueMemAdviseINTEL_fn FuncPtr; ur_result_t Err = @@ -559,7 +618,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMAdvise( RetVal = Err; } else { RetVal = - map_cl_error_to_ur(FuncPtr(cl_adapter::cast(hQueue), pMem, + map_cl_error_to_ur(FuncPtr(hQueue->CLQueue, pMem, size, advice, 0, nullptr, reinterpret_cast(phEvent))); } */ @@ -581,10 +640,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( const void *pSrc, size_t srcPitch, size_t width, size_t height, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - cl_context CLContext; - CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( - cl_adapter::cast(hQueue), CL_QUEUE_CONTEXT, - sizeof(cl_context), &CLContext, nullptr)); + cl_context CLContext = hQueue->Context->CLContext; clEnqueueMemcpyINTEL_fn FuncPtr = nullptr; ur_result_t RetVal = cl_ext::getExtFuncFromContext( @@ -598,12 +654,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( std::vector Events(height); for (size_t HeightIndex = 0; HeightIndex < height; HeightIndex++) { cl_event Event = nullptr; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->CLEvent; + } auto ClResult = - FuncPtr(cl_adapter::cast(hQueue), false, + FuncPtr(hQueue->CLQueue, false, static_cast(pDst) + dstPitch * HeightIndex, static_cast(pSrc) + srcPitch * HeightIndex, - width, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), &Event); + width, numEventsInWaitList, CLWaitEvents.data(), &Event); Events[HeightIndex] = Event; if (ClResult != CL_SUCCESS) { for (const auto &E : Events) { @@ -617,9 +676,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( ClResult = clWaitForEvents(Events.size(), Events.data()); } if (phEvent && ClResult == CL_SUCCESS) { - ClResult = clEnqueueBarrierWithWaitList( - cl_adapter::cast(hQueue), Events.size(), - Events.data(), cl_adapter::cast(phEvent)); + cl_event Event; + ClResult = clEnqueueBarrierWithWaitList(hQueue->CLQueue, Events.size(), + Events.data(), &Event); + if (phEvent) { + try { + auto UREvent = std::make_unique( + Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } } for (const auto &E : Events) { CL_RETURN_ON_FAILURE(clReleaseEvent(E)); @@ -643,13 +713,12 @@ mapCLUSMTypeToUR(const cl_unified_shared_memory_type_intel &Type) { } } -UR_APIEXPORT ur_result_t UR_APICALL -urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, - ur_usm_alloc_info_t propName, size_t propSize, - void *pPropValue, size_t *pPropSizeRet) { +UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo( + ur_context_handle_t Context, const void *pMem, ur_usm_alloc_info_t propName, + size_t propSize, void *pPropValue, size_t *pPropSizeRet) { clGetMemAllocInfoINTEL_fn GetMemAllocInfo = nullptr; - cl_context CLContext = cl_adapter::cast(hContext); + cl_context CLContext = Context->CLContext; UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clGetMemAllocInfoINTELCache, cl_ext::GetMemAllocInfoName, &GetMemAllocInfo)); @@ -671,11 +740,13 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, default: return UR_RESULT_ERROR_INVALID_VALUE; } - + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + if (propName == UR_USM_ALLOC_INFO_DEVICE) { + return ReturnValue(Context->Devices[0]); + } size_t CheckPropSize = 0; - cl_int ClErr = - GetMemAllocInfo(cl_adapter::cast(hContext), pMem, PropNameCL, - propSize, pPropValue, &CheckPropSize); + cl_int ClErr = GetMemAllocInfo(Context->CLContext, pMem, PropNameCL, propSize, + pPropValue, &CheckPropSize); if (pPropValue && CheckPropSize != propSize) { return UR_RESULT_ERROR_INVALID_SIZE; }