diff --git a/source/adapters/opencl/adapter.cpp b/source/adapters/opencl/adapter.cpp index 8ae1e77755..fbbdd84e59 100644 --- a/source/adapters/opencl/adapter.cpp +++ b/source/adapters/opencl/adapter.cpp @@ -22,9 +22,7 @@ urAdapterGet(uint32_t NumEntries, ur_adapter_handle_t *phAdapters, uint32_t *pNumAdapters) { if (NumEntries > 0 && phAdapters) { std::lock_guard Lock{adapter.Mutex}; - if (adapter.RefCount++ == 0) { - cl_ext::ExtFuncPtrCache = std::make_unique(); - } + adapter.RefCount++; *phAdapters = &adapter; } @@ -43,9 +41,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) { UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) { std::lock_guard Lock{adapter.Mutex}; - if (--adapter.RefCount == 0) { - cl_ext::ExtFuncPtrCache.reset(); - } + --adapter.RefCount; return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 88c661b4ae..815dfc9c06 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -10,6 +10,12 @@ #include "command_buffer.hpp" #include "common.hpp" +#include "context.hpp" +#include "event.hpp" +#include "kernel.hpp" +#include "memory.hpp" +#include "platform.hpp" +#include "queue.hpp" UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, @@ -19,26 +25,26 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( ur_queue_handle_t Queue = nullptr; UR_RETURN_ON_FAILURE(urQueueCreate(hContext, hDevice, nullptr, &Queue)); - cl_context CLContext = cl_adapter::cast(hContext); - cl_ext::clCreateCommandBufferKHR_fn clCreateCommandBufferKHR = nullptr; - cl_int Res = - cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clCreateCommandBufferKHRCache, - cl_ext::CreateCommandBufferName, &clCreateCommandBufferKHR); + ur_platform_handle_t Platform = hDevice->Platform; + cl_ext::clCreateCommandBufferKHR_fn clCreateCommandBufferKHR = + Platform->ExtFuncPtr->clCreateCommandBufferKHRCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clCreateCommandBufferKHR, + cl_ext::CreateCommandBufferName, + "cl_khr_command_buffer")); - if (!clCreateCommandBufferKHR || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; - - auto CLCommandBuffer = clCreateCommandBufferKHR( - 1, cl_adapter::cast(&Queue), nullptr, &Res); + cl_int Res = 0; + cl_command_queue CLQueue = Queue->get(); + auto CLCommandBuffer = clCreateCommandBufferKHR(1, &CLQueue, nullptr, &Res); CL_RETURN_ON_FAILURE_AND_SET_NULL(Res, phCommandBuffer); try { auto URCommandBuffer = std::make_unique( Queue, hContext, CLCommandBuffer); *phCommandBuffer = URCommandBuffer.release(); - } catch (...) { + } catch (std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; } CL_RETURN_ON_FAILURE(Res); @@ -49,14 +55,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer) { UR_RETURN_ON_FAILURE(urQueueRetain(hCommandBuffer->hInternalQueue)); - cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); - cl_ext::clRetainCommandBufferKHR_fn clRetainCommandBuffer = nullptr; - cl_int Res = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clRetainCommandBufferKHRCache, - cl_ext::RetainCommandBufferName, &clRetainCommandBuffer); - - if (!clRetainCommandBuffer || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; + ur_platform_handle_t Platform = hCommandBuffer->getPlatform(); + cl_ext::clRetainCommandBufferKHR_fn clRetainCommandBuffer = + Platform->ExtFuncPtr->clRetainCommandBufferKHRCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clRetainCommandBuffer, + cl_ext::RetainCommandBufferName, + "cl_khr_command_buffer")); CL_RETURN_ON_FAILURE(clRetainCommandBuffer(hCommandBuffer->CLCommandBuffer)); return UR_RESULT_SUCCESS; @@ -66,15 +70,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) { UR_RETURN_ON_FAILURE(urQueueRelease(hCommandBuffer->hInternalQueue)); - cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); - cl_ext::clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHR = nullptr; - cl_int Res = - cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clReleaseCommandBufferKHRCache, - cl_ext::ReleaseCommandBufferName, &clReleaseCommandBufferKHR); - - if (!clReleaseCommandBufferKHR || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; + ur_platform_handle_t Platform = hCommandBuffer->getPlatform(); + cl_ext::clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHR = + Platform->ExtFuncPtr->clReleaseCommandBufferKHRCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clReleaseCommandBufferKHR, + cl_ext::ReleaseCommandBufferName, + "cl_khr_command_buffer")); CL_RETURN_ON_FAILURE( clReleaseCommandBufferKHR(hCommandBuffer->CLCommandBuffer)); @@ -83,15 +84,12 @@ urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) { UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) { - cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); - cl_ext::clFinalizeCommandBufferKHR_fn clFinalizeCommandBufferKHR = nullptr; - cl_int Res = - cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clFinalizeCommandBufferKHRCache, - cl_ext::FinalizeCommandBufferName, &clFinalizeCommandBufferKHR); - - if (!clFinalizeCommandBufferKHR || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; + ur_platform_handle_t Platform = hCommandBuffer->getPlatform(); + cl_ext::clFinalizeCommandBufferKHR_fn clFinalizeCommandBufferKHR = + Platform->ExtFuncPtr->clFinalizeCommandBufferKHRCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clFinalizeCommandBufferKHR, + cl_ext::FinalizeCommandBufferName, + "cl_khr_command_buffer")); CL_RETURN_ON_FAILURE( clFinalizeCommandBufferKHR(hCommandBuffer->CLCommandBuffer)); @@ -107,21 +105,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_exp_command_buffer_command_handle_t *) { - cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); - cl_ext::clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR = nullptr; - cl_int Res = - cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clCommandNDRangeKernelKHRCache, - cl_ext::CommandNRRangeKernelName, &clCommandNDRangeKernelKHR); - - if (!clCommandNDRangeKernelKHR || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; + ur_platform_handle_t Platform = hCommandBuffer->getPlatform(); + cl_ext::clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR = + Platform->ExtFuncPtr->clCommandNDRangeKernelKHRCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clCommandNDRangeKernelKHR, + cl_ext::CommandNRRangeKernelName, + "cl_khr_command_buffer")); CL_RETURN_ON_FAILURE(clCommandNDRangeKernelKHR( - hCommandBuffer->CLCommandBuffer, nullptr, nullptr, - cl_adapter::cast(hKernel), workDim, pGlobalWorkOffset, - pGlobalWorkSize, pLocalWorkSize, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint, nullptr)); + hCommandBuffer->CLCommandBuffer, nullptr, nullptr, hKernel->get(), + workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, + numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, nullptr)); return UR_RESULT_SUCCESS; } @@ -155,18 +149,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ur_exp_command_buffer_sync_point_t *pSyncPoint) { - cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); - cl_ext::clCommandCopyBufferKHR_fn clCommandCopyBufferKHR = nullptr; - cl_int Res = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferKHRCache, - cl_ext::CommandCopyBufferName, &clCommandCopyBufferKHR); - - if (!clCommandCopyBufferKHR || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; + ur_platform_handle_t Platform = hCommandBuffer->getPlatform(); + cl_ext::clCommandCopyBufferKHR_fn clCommandCopyBufferKHR = + Platform->ExtFuncPtr->clCommandCopyBufferKHRCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clCommandCopyBufferKHR, + cl_ext::CommandCopyBufferName, + "cl_khr_command_buffer")); CL_RETURN_ON_FAILURE(clCommandCopyBufferKHR( - hCommandBuffer->CLCommandBuffer, nullptr, - cl_adapter::cast(hSrcMem), cl_adapter::cast(hDstMem), + hCommandBuffer->CLCommandBuffer, nullptr, hSrcMem->get(), hDstMem->get(), srcOffset, dstOffset, size, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, nullptr)); @@ -191,19 +182,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( size_t OpenCLDstRect[3]{dstOrigin.x, dstOrigin.y, dstOrigin.z}; size_t OpenCLRegion[3]{region.width, region.height, region.depth}; - cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); - cl_ext::clCommandCopyBufferRectKHR_fn clCommandCopyBufferRectKHR = nullptr; - cl_int Res = - cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferRectKHRCache, - cl_ext::CommandCopyBufferRectName, &clCommandCopyBufferRectKHR); - - if (!clCommandCopyBufferRectKHR || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; + ur_platform_handle_t Platform = hCommandBuffer->getPlatform(); + cl_ext::clCommandCopyBufferRectKHR_fn clCommandCopyBufferRectKHR = + Platform->ExtFuncPtr->clCommandCopyBufferRectKHRCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clCommandCopyBufferRectKHR, + cl_ext::CommandCopyBufferRectName, + "cl_khr_command_buffer")); CL_RETURN_ON_FAILURE(clCommandCopyBufferRectKHR( - hCommandBuffer->CLCommandBuffer, nullptr, - cl_adapter::cast(hSrcMem), cl_adapter::cast(hDstMem), + hCommandBuffer->CLCommandBuffer, nullptr, hSrcMem->get(), hDstMem->get(), OpenCLOriginRect, OpenCLDstRect, OpenCLRegion, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, nullptr)); @@ -281,19 +268,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ur_exp_command_buffer_sync_point_t *pSyncPoint) { - cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); - cl_ext::clCommandFillBufferKHR_fn clCommandFillBufferKHR = nullptr; - cl_int Res = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clCommandFillBufferKHRCache, - cl_ext::CommandFillBufferName, &clCommandFillBufferKHR); - - if (!clCommandFillBufferKHR || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; + ur_platform_handle_t Platform = hCommandBuffer->getPlatform(); + cl_ext::clCommandFillBufferKHR_fn clCommandFillBufferKHR = + Platform->ExtFuncPtr->clCommandFillBufferKHRCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clCommandFillBufferKHR, + cl_ext::CommandFillBufferName, + "cl_khr_command_buffer")); CL_RETURN_ON_FAILURE(clCommandFillBufferKHR( - hCommandBuffer->CLCommandBuffer, nullptr, - cl_adapter::cast(hBuffer), pPattern, patternSize, offset, size, - numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, nullptr)); + hCommandBuffer->CLCommandBuffer, nullptr, hBuffer->get(), pPattern, + patternSize, offset, size, numSyncPointsInWaitList, pSyncPointWaitList, + pSyncPoint, nullptr)); return UR_RESULT_SUCCESS; } @@ -337,24 +322,34 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); - cl_ext::clEnqueueCommandBufferKHR_fn clEnqueueCommandBufferKHR = nullptr; - cl_int Res = - cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clEnqueueCommandBufferKHRCache, - cl_ext::EnqueueCommandBufferName, &clEnqueueCommandBufferKHR); - - if (!clEnqueueCommandBufferKHR || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; + ur_platform_handle_t Platform = hCommandBuffer->getPlatform(); + cl_ext::clEnqueueCommandBufferKHR_fn clEnqueueCommandBufferKHR = + Platform->ExtFuncPtr->clEnqueueCommandBufferKHRCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clEnqueueCommandBufferKHR, + cl_ext::EnqueueCommandBufferName, + "cl_khr_command_buffer")); const uint32_t NumberOfQueues = 1; - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } + cl_command_queue CLQueue = hQueue->get(); CL_RETURN_ON_FAILURE(clEnqueueCommandBufferKHR( - NumberOfQueues, cl_adapter::cast(&hQueue), - hCommandBuffer->CLCommandBuffer, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + NumberOfQueues, &CLQueue, hCommandBuffer->CLCommandBuffer, + numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } return UR_RESULT_SUCCESS; } @@ -380,15 +375,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( ur_exp_command_buffer_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); - cl_ext::clGetCommandBufferInfoKHR_fn clGetCommandBufferInfoKHR = nullptr; - cl_int Res = - cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clGetCommandBufferInfoKHRCache, - cl_ext::GetCommandBufferInfoName, &clGetCommandBufferInfoKHR); - - if (!clGetCommandBufferInfoKHR || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; + ur_platform_handle_t Platform = hCommandBuffer->getPlatform(); + cl_ext::clGetCommandBufferInfoKHR_fn clGetCommandBufferInfoKHR = + Platform->ExtFuncPtr->clGetCommandBufferInfoKHRCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clGetCommandBufferInfoKHR, + cl_ext::GetCommandBufferInfoName, + "cl_khr_command_buffer")); if (propName != UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT) { return UR_RESULT_ERROR_INVALID_ENUMERATION; diff --git a/source/adapters/opencl/command_buffer.hpp b/source/adapters/opencl/command_buffer.hpp index d80f29594b..17e4e1f7b6 100644 --- a/source/adapters/opencl/command_buffer.hpp +++ b/source/adapters/opencl/command_buffer.hpp @@ -11,6 +11,8 @@ #include #include +#include "context.hpp" + struct ur_exp_command_buffer_handle_t_ { ur_queue_handle_t hInternalQueue; ur_context_handle_t hContext; @@ -21,4 +23,6 @@ struct ur_exp_command_buffer_handle_t_ { cl_command_buffer_khr CLCommandBuffer) : hInternalQueue(hQueue), hContext(hContext), CLCommandBuffer(CLCommandBuffer) {} + + ur_platform_handle_t getPlatform() { return hContext->Devices[0]->Platform; } }; diff --git a/source/adapters/opencl/common.hpp b/source/adapters/opencl/common.hpp index 0667cd3d17..2fd00afd36 100644 --- a/source/adapters/opencl/common.hpp +++ b/source/adapters/opencl/common.hpp @@ -305,110 +305,6 @@ using clGetCommandBufferInfoKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)( cl_command_buffer_khr command_buffer, cl_command_buffer_info_khr param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret); -template struct FuncPtrCache { - std::map Map; - std::mutex Mutex; -}; - -// FIXME: There's currently no mechanism for cleaning up this cache, meaning -// that it is invalidated whenever a context is destroyed. This could lead to -// reusing an invalid function pointer if another context happens to have the -// same native handle. -struct ExtFuncPtrCacheT { - FuncPtrCache clHostMemAllocINTELCache; - FuncPtrCache clDeviceMemAllocINTELCache; - FuncPtrCache clSharedMemAllocINTELCache; - FuncPtrCache clGetDeviceFunctionPointerCache; - FuncPtrCache - clCreateBufferWithPropertiesINTELCache; - FuncPtrCache clMemBlockingFreeINTELCache; - FuncPtrCache - clSetKernelArgMemPointerINTELCache; - FuncPtrCache clEnqueueMemFillINTELCache; - FuncPtrCache clEnqueueMemcpyINTELCache; - FuncPtrCache clGetMemAllocInfoINTELCache; - FuncPtrCache - clEnqueueWriteGlobalVariableCache; - FuncPtrCache clEnqueueReadGlobalVariableCache; - FuncPtrCache clEnqueueReadHostPipeINTELCache; - FuncPtrCache clEnqueueWriteHostPipeINTELCache; - FuncPtrCache - clSetProgramSpecializationConstantCache; - FuncPtrCache clCreateCommandBufferKHRCache; - FuncPtrCache clRetainCommandBufferKHRCache; - FuncPtrCache clReleaseCommandBufferKHRCache; - FuncPtrCache clFinalizeCommandBufferKHRCache; - FuncPtrCache clCommandNDRangeKernelKHRCache; - FuncPtrCache clCommandCopyBufferKHRCache; - FuncPtrCache clCommandCopyBufferRectKHRCache; - FuncPtrCache clCommandFillBufferKHRCache; - FuncPtrCache clEnqueueCommandBufferKHRCache; - FuncPtrCache clGetCommandBufferInfoKHRCache; -}; -// A raw pointer is used here since the lifetime of this map has to be tied to -// piTeardown to avoid issues with static destruction order (a user application -// might have static objects that indirectly access this cache in their -// destructor). -inline std::unique_ptr ExtFuncPtrCache; - -// USM helper function to get an extension function pointer -template -static ur_result_t getExtFuncFromContext(cl_context Context, - FuncPtrCache &FPtrCache, - const char *FuncName, T *Fptr) { - // TODO - // Potentially redo caching as UR interface changes. - // if cached, return cached FuncPtr - std::lock_guard CacheLock{FPtrCache.Mutex}; - std::map &FPtrMap = FPtrCache.Map; - auto It = FPtrMap.find(Context); - if (It != FPtrMap.end()) { - auto F = It->second; - // if cached that extension is not available return nullptr and - // UR_RESULT_ERROR_INVALID_VALUE - *Fptr = F; - return F ? UR_RESULT_SUCCESS : UR_RESULT_ERROR_INVALID_VALUE; - } - - cl_uint DeviceCount; - cl_int RetErr = clGetContextInfo(Context, CL_CONTEXT_NUM_DEVICES, - sizeof(cl_uint), &DeviceCount, nullptr); - - if (RetErr != CL_SUCCESS || DeviceCount < 1) { - return UR_RESULT_ERROR_INVALID_CONTEXT; - } - - std::vector DevicesInCtx(DeviceCount); - RetErr = clGetContextInfo(Context, CL_CONTEXT_DEVICES, - DeviceCount * sizeof(cl_device_id), - DevicesInCtx.data(), nullptr); - - if (RetErr != CL_SUCCESS) { - return UR_RESULT_ERROR_INVALID_CONTEXT; - } - - cl_platform_id CurPlatform; - RetErr = clGetDeviceInfo(DevicesInCtx[0], CL_DEVICE_PLATFORM, - sizeof(cl_platform_id), &CurPlatform, nullptr); - - if (RetErr != CL_SUCCESS) { - return UR_RESULT_ERROR_INVALID_CONTEXT; - } - - T FuncPtr = reinterpret_cast( - clGetExtensionFunctionAddressForPlatform(CurPlatform, FuncName)); - - if (!FuncPtr) { - // Cache that the extension is not available - FPtrMap[Context] = nullptr; - return UR_RESULT_ERROR_INVALID_VALUE; - } - - *Fptr = FuncPtr; - FPtrMap[Context] = FuncPtr; - - return UR_RESULT_SUCCESS; -} } // namespace cl_ext ur_result_t mapCLErrorToUR(cl_int Result); diff --git a/source/adapters/opencl/context.cpp b/source/adapters/opencl/context.cpp index fc7dc144e3..75e193b6ed 100644 --- a/source/adapters/opencl/context.cpp +++ b/source/adapters/opencl/context.cpp @@ -14,59 +14,31 @@ #include #include -ur_result_t cl_adapter::getDevicesFromContext( - ur_context_handle_t hContext, - std::unique_ptr> &DevicesInCtx) { - - cl_uint DeviceCount; - CL_RETURN_ON_FAILURE(clGetContextInfo(cl_adapter::cast(hContext), - CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), - &DeviceCount, nullptr)); - - if (DeviceCount < 1) { - return UR_RESULT_ERROR_INVALID_CONTEXT; - } - - DevicesInCtx = std::make_unique>(DeviceCount); - - CL_RETURN_ON_FAILURE(clGetContextInfo( - cl_adapter::cast(hContext), CL_CONTEXT_DEVICES, - DeviceCount * sizeof(cl_device_id), (*DevicesInCtx).data(), nullptr)); - - return UR_RESULT_SUCCESS; -} - UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( uint32_t DeviceCount, const ur_device_handle_t *phDevices, const ur_context_properties_t *, ur_context_handle_t *phContext) { cl_int Ret; - *phContext = cl_adapter::cast( - clCreateContext(nullptr, cl_adapter::cast(DeviceCount), - cl_adapter::cast(phDevices), - nullptr, nullptr, cl_adapter::cast(&Ret))); - - return mapCLErrorToUR(Ret); -} + std::vector CLDevices(DeviceCount); + for (size_t i = 0; i < DeviceCount; i++) { + CLDevices[i] = phDevices[i]->get(); + } -static cl_int mapURContextInfoToCL(ur_context_info_t URPropName) { - - cl_int CLPropName; - switch (URPropName) { - case UR_CONTEXT_INFO_NUM_DEVICES: - CLPropName = CL_CONTEXT_NUM_DEVICES; - break; - case UR_CONTEXT_INFO_DEVICES: - CLPropName = CL_CONTEXT_DEVICES; - break; - case UR_CONTEXT_INFO_REFERENCE_COUNT: - CLPropName = CL_CONTEXT_REFERENCE_COUNT; - break; - default: - CLPropName = -1; + try { + cl_context Ctx = clCreateContext( + nullptr, cl_adapter::cast(DeviceCount), CLDevices.data(), + nullptr, nullptr, cl_adapter::cast(&Ret)); + CL_RETURN_ON_FAILURE(Ret); + auto URContext = + std::make_unique(Ctx, DeviceCount, phDevices); + *phContext = URContext.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; } - return CLPropName; + return mapCLErrorToUR(Ret); } UR_APIEXPORT ur_result_t UR_APICALL @@ -74,7 +46,6 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); - const cl_int CLPropName = mapURContextInfoToCL(propName); switch (static_cast(propName)) { /* 2D USM memops are not supported. */ @@ -90,21 +61,14 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, * queries of each device separately and building the intersection set. */ return UR_RESULT_ERROR_INVALID_ARGUMENT; } - case UR_CONTEXT_INFO_NUM_DEVICES: - case UR_CONTEXT_INFO_DEVICES: + case UR_CONTEXT_INFO_NUM_DEVICES: { + return ReturnValue(hContext->DeviceCount); + } + case UR_CONTEXT_INFO_DEVICES: { + return ReturnValue(&hContext->Devices[0], hContext->DeviceCount); + } case UR_CONTEXT_INFO_REFERENCE_COUNT: { - size_t CheckPropSize = 0; - auto ClResult = - clGetContextInfo(cl_adapter::cast(hContext), CLPropName, - propSize, pPropValue, &CheckPropSize); - if (pPropValue && CheckPropSize != propSize) { - return UR_RESULT_ERROR_INVALID_SIZE; - } - CL_RETURN_ON_FAILURE(ClResult); - if (pPropSizeRet) { - *pPropSizeRet = CheckPropSize; - } - return UR_RESULT_SUCCESS; + return ReturnValue(hContext->getReferenceCount()); } default: return UR_RESULT_ERROR_INVALID_ENUMERATION; @@ -113,34 +77,42 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, UR_APIEXPORT ur_result_t UR_APICALL urContextRelease(ur_context_handle_t hContext) { - - cl_int Ret = clReleaseContext(cl_adapter::cast(hContext)); - return mapCLErrorToUR(Ret); + if (hContext->decrementReferenceCount() == 0) { + delete hContext; + } else { + CL_RETURN_ON_FAILURE(clReleaseContext(hContext->get())); + } + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urContextRetain(ur_context_handle_t hContext) { - - cl_int Ret = clRetainContext(cl_adapter::cast(hContext)); - return mapCLErrorToUR(Ret); + CL_RETURN_ON_FAILURE(clRetainContext(hContext->get())); + hContext->incrementReferenceCount(); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle( ur_context_handle_t hContext, ur_native_handle_t *phNativeContext) { - *phNativeContext = reinterpret_cast(hContext); + *phNativeContext = reinterpret_cast(hContext->get()); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle( - ur_native_handle_t hNativeContext, uint32_t, const ur_device_handle_t *, + ur_native_handle_t hNativeContext, uint32_t numDevices, + const ur_device_handle_t *phDevices, const ur_context_native_properties_t *pProperties, ur_context_handle_t *phContext) { - *phContext = reinterpret_cast(hNativeContext); + cl_context NativeHandle = reinterpret_cast(hNativeContext); + UR_RETURN_ON_FAILURE(ur_context_handle_t_::makeWithNative( + NativeHandle, numDevices, phDevices, *phContext)); + if (!pProperties || !pProperties->isNativeHandleOwned) { - return urContextRetain(*phContext); + CL_RETURN_ON_FAILURE(clRetainContext(NativeHandle)); } + return UR_RESULT_SUCCESS; } @@ -190,8 +162,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextSetExtendedDeleter( auto *C = static_cast(pUserData); C->execute(); }; - CL_RETURN_ON_FAILURE(clSetContextDestructorCallback( - cl_adapter::cast(hContext), ClCallback, Callback)); + CL_RETURN_ON_FAILURE( + clSetContextDestructorCallback(hContext->get(), ClCallback, Callback)); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/context.hpp b/source/adapters/opencl/context.hpp index 5319f68b55..cc537b2c8f 100644 --- a/source/adapters/opencl/context.hpp +++ b/source/adapters/opencl/context.hpp @@ -10,9 +10,78 @@ #pragma once #include "common.hpp" +#include "device.hpp" -namespace cl_adapter { -ur_result_t -getDevicesFromContext(ur_context_handle_t hContext, - std::unique_ptr> &DevicesInCtx); -} +#include + +struct ur_context_handle_t_ { + using native_type = cl_context; + native_type Context; + std::vector Devices; + uint32_t DeviceCount; + std::atomic RefCount = 0; + + ur_context_handle_t_(native_type Ctx, uint32_t DevCount, + const ur_device_handle_t *phDevices) + : Context(Ctx), DeviceCount(DevCount) { + for (uint32_t i = 0; i < DeviceCount; i++) { + Devices.emplace_back(phDevices[i]); + urDeviceRetain(phDevices[i]); + } + RefCount = 1; + } + + uint32_t incrementReferenceCount() noexcept { return ++RefCount; } + + uint32_t decrementReferenceCount() noexcept { return --RefCount; } + + uint32_t getReferenceCount() const noexcept { return RefCount; } + + static ur_result_t makeWithNative(native_type Ctx, uint32_t DevCount, + const ur_device_handle_t *phDevices, + ur_context_handle_t &Context) { + if (!phDevices) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + try { + uint32_t CLDeviceCount; + CL_RETURN_ON_FAILURE(clGetContextInfo(Ctx, CL_CONTEXT_NUM_DEVICES, + sizeof(CLDeviceCount), + &CLDeviceCount, nullptr)); + std::vector CLDevices(CLDeviceCount); + CL_RETURN_ON_FAILURE(clGetContextInfo(Ctx, CL_CONTEXT_DEVICES, + sizeof(CLDevices), CLDevices.data(), + nullptr)); + if (DevCount != CLDeviceCount) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + for (uint32_t i = 0; i < DevCount; i++) { + if (phDevices[i]->get() != CLDevices[i]) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + } + auto URContext = + std::make_unique(Ctx, DevCount, phDevices); + Context = URContext.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + + return UR_RESULT_SUCCESS; + } + + ~ur_context_handle_t_() { + for (uint32_t i = 0; i < DeviceCount; i++) { + urDeviceRelease(Devices[i]); + } + clReleaseContext(Context); + } + + native_type get() { return Context; } + + ur_platform_handle_t getPlatform() { return Devices[0]->Platform; } + + const std::vector &getDevices() { return Devices; } +}; diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index 115b9b2e09..62ae68b7cc 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -12,51 +12,10 @@ #include -ur_result_t cl_adapter::getDeviceVersion(cl_device_id Dev, - oclv::OpenCLVersion &Version) { - - size_t DevVerSize = 0; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(Dev, CL_DEVICE_VERSION, 0, nullptr, &DevVerSize)); - - std::string DevVer(DevVerSize, '\0'); - CL_RETURN_ON_FAILURE(clGetDeviceInfo(Dev, CL_DEVICE_VERSION, DevVerSize, - DevVer.data(), nullptr)); - - Version = oclv::OpenCLVersion(DevVer); - if (!Version.isValid()) { - return UR_RESULT_ERROR_INVALID_DEVICE; - } - - return UR_RESULT_SUCCESS; -} - -ur_result_t cl_adapter::checkDeviceExtensions( - cl_device_id Dev, const std::vector &Exts, bool &Supported) { - size_t ExtSize = 0; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize)); - - std::string ExtStr(ExtSize, '\0'); - - CL_RETURN_ON_FAILURE(clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, ExtSize, - ExtStr.data(), nullptr)); - - Supported = true; - for (const std::string &Ext : Exts) { - if (!(Supported = (ExtStr.find(Ext) != std::string::npos))) { - break; - } - } - - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform, - ur_device_type_t DeviceType, - uint32_t NumEntries, - ur_device_handle_t *phDevices, - uint32_t *pNumDevices) { +UR_APIEXPORT ur_result_t UR_APICALL +urDeviceGet(ur_platform_handle_t hPlatform, ur_device_type_t DeviceType, + [[maybe_unused]] uint32_t NumEntries, ur_device_handle_t *phDevices, + uint32_t *pNumDevices) { cl_device_type Type; switch (DeviceType) { @@ -75,26 +34,34 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform, Type = CL_DEVICE_TYPE_ACCELERATOR; break; case UR_DEVICE_TYPE_DEFAULT: - Type = UR_DEVICE_TYPE_DEFAULT; + Type = CL_DEVICE_TYPE_DEFAULT; break; default: return UR_RESULT_ERROR_INVALID_ENUMERATION; } - - cl_int Result = clGetDeviceIDs(cl_adapter::cast(hPlatform), - Type, cl_adapter::cast(NumEntries), - cl_adapter::cast(phDevices), - cl_adapter::cast(pNumDevices)); - - // Absorb the CL_DEVICE_NOT_FOUND and just return 0 in num_devices - if (Result == CL_DEVICE_NOT_FOUND) { - Result = CL_SUCCESS; + UR_RETURN_ON_FAILURE(hPlatform->InitDevices()); + try { + uint32_t AllDevicesNum = hPlatform->Devices.size(); + uint32_t DeviceNumIter = 0; + for (uint32_t i = 0; i < AllDevicesNum; i++) { + cl_device_type DeviceType = hPlatform->Devices[i]->Type; + if (DeviceType == Type || Type == CL_DEVICE_TYPE_ALL) { + if (phDevices) { + phDevices[DeviceNumIter] = hPlatform->Devices[i].get(); + } + DeviceNumIter++; + } + } if (pNumDevices) { - *pNumDevices = 0; + *pNumDevices = DeviceNumIter; } - } - return mapCLErrorToUR(Result); + return UR_RESULT_SUCCESS; + } catch (ur_result_t Err) { + return Err; + } catch (...) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } } static ur_device_fp_capability_flags_t @@ -324,10 +291,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * to UR */ switch (static_cast(propName)) { case UR_DEVICE_INFO_TYPE: { - cl_device_type CLType; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, - sizeof(cl_device_type), &CLType, nullptr)); + cl_device_type CLType = hDevice->Type; /* TODO UR: If the device is an Accelerator (FPGA, VPU, etc.), there is not * enough information in the OpenCL runtime to know exactly which type it @@ -347,25 +311,23 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_DEVICE_ID: { bool Supported = false; - CL_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( - cl_adapter::cast(hDevice), {"cl_khr_pci_bus_info"}, - Supported)); + CL_RETURN_ON_FAILURE( + hDevice->checkDeviceExtensions({"cl_khr_pci_bus_info"}, Supported)); if (!Supported) { return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } cl_device_pci_bus_info_khr PciInfo = {}; - CL_RETURN_ON_FAILURE(clGetDeviceInfo( - cl_adapter::cast(hDevice), CL_DEVICE_PCI_BUS_INFO_KHR, - sizeof(PciInfo), &PciInfo, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), + CL_DEVICE_PCI_BUS_INFO_KHR, + sizeof(PciInfo), &PciInfo, nullptr)); return ReturnValue(PciInfo.pci_device); } case UR_DEVICE_INFO_BACKEND_RUNTIME_VERSION: { oclv::OpenCLVersion Version; - CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( - cl_adapter::cast(hDevice), Version)); + CL_RETURN_ON_FAILURE(hDevice->getDeviceVersion(Version)); const std::string Results = std::to_string(Version.getMajor()) + "." + std::to_string(Version.getMinor()); @@ -374,14 +336,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_SUPPORTED_PARTITIONS: { size_t CLSize; CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, 0, - nullptr, &CLSize)); + clGetDeviceInfo(hDevice->get(), CLPropName, 0, nullptr, &CLSize)); const size_t NProperties = CLSize / sizeof(cl_device_partition_property); std::vector CLValue(NProperties); - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, - CLSize, CLValue.data(), nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), CLPropName, CLSize, + CLValue.data(), nullptr)); /* The OpenCL implementation returns a value of 0 if no properties are * supported. UR will return a size of 0 for now. @@ -403,8 +363,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, size_t CLSize; CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, 0, - nullptr, &CLSize)); + clGetDeviceInfo(hDevice->get(), CLPropName, 0, nullptr, &CLSize)); const size_t NProperties = CLSize / sizeof(cl_device_partition_property); /* The OpenCL implementation returns either a size of 0 or a value of 0 if @@ -419,8 +378,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, auto CLValue = reinterpret_cast(alloca(CLSize)); CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, - CLSize, CLValue, nullptr)); + clGetDeviceInfo(hDevice->get(), CLPropName, CLSize, CLValue, nullptr)); std::vector URValue(NProperties - 1); @@ -472,14 +430,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, /* Corresponding OpenCL query is only available starting with OpenCL 2.1 * and we have to emulate it on older OpenCL runtimes. */ oclv::OpenCLVersion DevVer; - CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( - cl_adapter::cast(hDevice), DevVer)); + CL_RETURN_ON_FAILURE(hDevice->getDeviceVersion(DevVer)); if (DevVer >= oclv::V2_1) { cl_uint CLValue; - CL_RETURN_ON_FAILURE(clGetDeviceInfo( - cl_adapter::cast(hDevice), CL_DEVICE_MAX_NUM_SUB_GROUPS, - sizeof(cl_uint), &CLValue, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), + CL_DEVICE_MAX_NUM_SUB_GROUPS, + sizeof(cl_uint), &CLValue, nullptr)); if (CLValue == 0u) { /* OpenCL returns 0 if sub-groups are not supported, but SYCL 2020 @@ -501,9 +458,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, /* CL type: cl_device_fp_config * UR type: ur_device_fp_capability_flags_t */ if (propName == UR_DEVICE_INFO_HALF_FP_CONFIG) { - bool Supported; - CL_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( - cl_adapter::cast(hDevice), {"cl_khr_fp16"}, Supported)); + bool Supported = false; + CL_RETURN_ON_FAILURE( + hDevice->checkDeviceExtensions({"cl_khr_fp16"}, Supported)); if (!Supported) { return UR_RESULT_ERROR_INVALID_ENUMERATION; @@ -511,9 +468,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } cl_device_fp_config CLValue; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, - sizeof(cl_device_fp_config), &CLValue, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), CLPropName, + sizeof(cl_device_fp_config), &CLValue, + nullptr)); return ReturnValue(mapCLDeviceFpConfigToUR(CLValue)); } @@ -522,8 +479,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, /* This query is missing before OpenCL 3.0. Check version and handle * appropriately */ oclv::OpenCLVersion DevVer; - CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( - cl_adapter::cast(hDevice), DevVer)); + CL_RETURN_ON_FAILURE(hDevice->getDeviceVersion(DevVer)); /* Minimum required capability to be returned. For OpenCL 1.2, this is all * that is required */ @@ -534,8 +490,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, /* For OpenCL >=3.0, the query should be implemented */ cl_device_atomic_capabilities CLCapabilities; CL_RETURN_ON_FAILURE(clGetDeviceInfo( - cl_adapter::cast(hDevice), - CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, + hDevice->get(), CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, sizeof(cl_device_atomic_capabilities), &CLCapabilities, nullptr)); /* Mask operation to only consider atomic_memory_order* capabilities */ @@ -581,14 +536,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP; oclv::OpenCLVersion DevVer; - CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( - cl_adapter::cast(hDevice), DevVer)); + CL_RETURN_ON_FAILURE(hDevice->getDeviceVersion(DevVer)); cl_device_atomic_capabilities CLCapabilities; if (DevVer >= oclv::V3_0) { CL_RETURN_ON_FAILURE(clGetDeviceInfo( - cl_adapter::cast(hDevice), - CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, + hDevice->get(), CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, sizeof(cl_device_atomic_capabilities), &CLCapabilities, nullptr)); assert((CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) && @@ -634,14 +587,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL; oclv::OpenCLVersion DevVer; - CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( - cl_adapter::cast(hDevice), DevVer)); + CL_RETURN_ON_FAILURE(hDevice->getDeviceVersion(DevVer)); cl_device_atomic_capabilities CLCapabilities; if (DevVer >= oclv::V3_0) { CL_RETURN_ON_FAILURE(clGetDeviceInfo( - cl_adapter::cast(hDevice), - CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, + hDevice->get(), CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, sizeof(cl_device_atomic_capabilities), &CLCapabilities, nullptr)); assert((CLCapabilities & CL_DEVICE_ATOMIC_ORDER_RELAXED) && @@ -683,14 +634,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP; oclv::OpenCLVersion DevVer; - CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( - cl_adapter::cast(hDevice), DevVer)); + CL_RETURN_ON_FAILURE(hDevice->getDeviceVersion(DevVer)); cl_device_atomic_capabilities CLCapabilities; if (DevVer >= oclv::V3_0) { CL_RETURN_ON_FAILURE(clGetDeviceInfo( - cl_adapter::cast(hDevice), - CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, + hDevice->get(), CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, sizeof(cl_device_atomic_capabilities), &CLCapabilities, nullptr)); assert((CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) && @@ -736,8 +685,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_ATOMIC_64: { bool Supported = false; - CL_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( - cl_adapter::cast(hDevice), + CL_RETURN_ON_FAILURE(hDevice->checkDeviceExtensions( {"cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics"}, Supported)); @@ -746,16 +694,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_BUILD_ON_SUBDEVICE: { cl_device_type DevType = CL_DEVICE_TYPE_DEFAULT; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CL_DEVICE_TYPE, - sizeof(cl_device_type), &DevType, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), CL_DEVICE_TYPE, + sizeof(cl_device_type), &DevType, + nullptr)); return ReturnValue(DevType == CL_DEVICE_TYPE_GPU); } case UR_DEVICE_INFO_MEM_CHANNEL_SUPPORT: { bool Supported = false; - CL_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( - cl_adapter::cast(hDevice), + CL_RETURN_ON_FAILURE(hDevice->checkDeviceExtensions( {"cl_intel_mem_channel_property"}, Supported)); return ReturnValue(Supported); @@ -763,14 +710,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_ESIMD_SUPPORT: { bool Supported = false; cl_device_type DevType = CL_DEVICE_TYPE_DEFAULT; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CL_DEVICE_TYPE, - sizeof(cl_device_type), &DevType, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), CL_DEVICE_TYPE, + sizeof(cl_device_type), &DevType, + nullptr)); cl_uint VendorID = 0; - CL_RETURN_ON_FAILURE(clGetDeviceInfo( - cl_adapter::cast(hDevice), CL_DEVICE_VENDOR_ID, - sizeof(VendorID), &VendorID, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), CL_DEVICE_VENDOR_ID, + sizeof(VendorID), &VendorID, nullptr)); /* ESIMD is only supported by Intel GPUs. */ Supported = DevType == CL_DEVICE_TYPE_GPU && VendorID == 0x8086; @@ -782,11 +728,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_HOST_PIPE_READ_WRITE_SUPPORTED: { bool Supported = false; - CL_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( - cl_adapter::cast(hDevice), + CL_RETURN_ON_FAILURE(hDevice->checkDeviceExtensions( {"cl_intel_program_scope_host_pipe"}, Supported)); return ReturnValue(Supported); } + case UR_DEVICE_INFO_REFERENCE_COUNT: { + return ReturnValue(hDevice->getReferenceCount()); + } case UR_DEVICE_INFO_QUEUE_PROPERTIES: case UR_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES: case UR_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES: @@ -803,9 +751,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * UR type: ur_flags_t (uint32_t) */ cl_bitfield CLValue = 0; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, - sizeof(cl_bitfield), &CLValue, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo( + hDevice->get(), CLPropName, sizeof(cl_bitfield), &CLValue, nullptr)); /* We can just static_cast the output because OpenCL and UR bitfields * map 1 to 1 for these properties. cl_bitfield is uint64_t and ur_flags_t @@ -826,9 +773,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * UR type: ur_bool_t */ cl_bool CLValue; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, - sizeof(cl_bool), &CLValue, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), CLPropName, + sizeof(cl_bool), &CLValue, nullptr)); /* cl_bool is uint32_t and ur_bool_t is bool */ return ReturnValue(static_cast(CLValue)); @@ -859,7 +805,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_MAX_SAMPLERS: case UR_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE: case UR_DEVICE_INFO_MAX_CONSTANT_ARGS: - case UR_DEVICE_INFO_REFERENCE_COUNT: case UR_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES: case UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE: case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE: @@ -877,8 +822,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_MAX_PARAMETER_SIZE: case UR_DEVICE_INFO_PROFILING_TIMER_RESOLUTION: case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: - case UR_DEVICE_INFO_PLATFORM: - case UR_DEVICE_INFO_PARENT_DEVICE: case UR_DEVICE_INFO_IL_VERSION: case UR_DEVICE_INFO_NAME: case UR_DEVICE_INFO_VENDOR: @@ -901,14 +844,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * | ur_device_handle_t | cl_device_id | 8 | */ - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, - propSize, pPropValue, pPropSizeRet)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), CLPropName, propSize, + pPropValue, pPropSizeRet)); return UR_RESULT_SUCCESS; } + case UR_DEVICE_INFO_PLATFORM: { + if (hDevice->Platform && hDevice->Platform->get()) { + return ReturnValue(hDevice->Platform); + } + return UR_RESULT_ERROR_INVALID_DEVICE; + } + case UR_DEVICE_INFO_PARENT_DEVICE: { + return ReturnValue(hDevice->ParentDevice); + } case UR_DEVICE_INFO_EXTENSIONS: { - cl_device_id Dev = cl_adapter::cast(hDevice); + cl_device_id Dev = hDevice->get(); size_t ExtSize = 0; CL_RETURN_ON_FAILURE( clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize)); @@ -1017,9 +968,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( CLProperties[CLProperties.size() - 1] = 0; cl_uint CLNumDevicesRet; - CL_RETURN_ON_FAILURE( - clCreateSubDevices(cl_adapter::cast(hDevice), - CLProperties.data(), 0, nullptr, &CLNumDevicesRet)); + CL_RETURN_ON_FAILURE(clCreateSubDevices(hDevice->get(), CLProperties.data(), + 0, nullptr, &CLNumDevicesRet)); if (pNumDevicesRet) { *pNumDevicesRet = CLNumDevicesRet; @@ -1029,63 +979,103 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( * function shall only retrieve that number of sub-devices. */ if (phSubDevices) { std::vector CLSubDevices(CLNumDevicesRet); - CL_RETURN_ON_FAILURE(clCreateSubDevices( - cl_adapter::cast(hDevice), CLProperties.data(), - CLNumDevicesRet, CLSubDevices.data(), nullptr)); - - std::memcpy(phSubDevices, CLSubDevices.data(), - sizeof(cl_device_id) * NumDevices); + CL_RETURN_ON_FAILURE(clCreateSubDevices(hDevice->get(), CLProperties.data(), + CLNumDevicesRet, + CLSubDevices.data(), nullptr)); + for (uint32_t i = 0; i < std::min(CLNumDevicesRet, NumDevices); i++) { + try { + auto URSubDevice = std::make_unique( + CLSubDevices[i], hDevice->Platform, hDevice); + phSubDevices[i] = URSubDevice.release(); + } catch (std::bad_alloc &) { + // Delete all the successfully created subdevices before the failed one. + for (uint32_t j = 0; j < i; j++) { + delete phSubDevices[j]; + } + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + // Delete all the successfully created subdevices before the failed one. + for (uint32_t j = 0; j < i; j++) { + delete phSubDevices[j]; + } + return UR_RESULT_ERROR_UNKNOWN; + } + } } return UR_RESULT_SUCCESS; } +// Root devices ref count are unchanged through out the program lifetime. UR_APIEXPORT ur_result_t UR_APICALL urDeviceRetain(ur_device_handle_t hDevice) { + if (hDevice->ParentDevice) { + CL_RETURN_ON_FAILURE(clRetainDevice(hDevice->get())); + hDevice->incrementReferenceCount(); + } - cl_int Result = clRetainDevice(cl_adapter::cast(hDevice)); - - return mapCLErrorToUR(Result); + return UR_RESULT_SUCCESS; } +// Root devices ref count are unchanged through out the program lifetime. UR_APIEXPORT ur_result_t UR_APICALL urDeviceRelease(ur_device_handle_t hDevice) { - - cl_int Result = clReleaseDevice(cl_adapter::cast(hDevice)); - - return mapCLErrorToUR(Result); + if (hDevice->ParentDevice) { + if (hDevice->decrementReferenceCount() == 0) { + delete hDevice; + } else { + CL_RETURN_ON_FAILURE(clReleaseDevice(hDevice->get())); + } + } + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetNativeHandle( ur_device_handle_t hDevice, ur_native_handle_t *phNativeDevice) { - *phNativeDevice = reinterpret_cast(hDevice); + *phNativeDevice = reinterpret_cast(hDevice->get()); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( ur_native_handle_t hNativeDevice, ur_platform_handle_t, const ur_device_native_properties_t *, ur_device_handle_t *phDevice) { - - *phDevice = reinterpret_cast(hNativeDevice); - return UR_RESULT_SUCCESS; + cl_device_id NativeHandle = reinterpret_cast(hNativeDevice); + + uint32_t NumPlatforms = 0; + UR_RETURN_ON_FAILURE(urPlatformGet(nullptr, 0, 0, nullptr, &NumPlatforms)); + std::vector Platforms(NumPlatforms); + UR_RETURN_ON_FAILURE( + urPlatformGet(nullptr, 0, NumPlatforms, Platforms.data(), nullptr)); + + for (uint32_t i = 0; i < NumPlatforms; i++) { + uint32_t NumDevices = 0; + UR_RETURN_ON_FAILURE( + urDeviceGet(Platforms[i], UR_DEVICE_TYPE_ALL, 0, nullptr, &NumDevices)); + std::vector Devices(NumDevices); + UR_RETURN_ON_FAILURE(urDeviceGet(Platforms[i], UR_DEVICE_TYPE_ALL, + NumDevices, Devices.data(), nullptr)); + + for (auto &Device : Devices) { + if (Device->get() == NativeHandle) { + *phDevice = Device; + return UR_RESULT_SUCCESS; + } + } + } + return UR_RESULT_ERROR_INVALID_DEVICE; } UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetGlobalTimestamps( ur_device_handle_t hDevice, uint64_t *pDeviceTimestamp, uint64_t *pHostTimestamp) { oclv::OpenCLVersion DevVer, PlatVer; - cl_platform_id Platform; - cl_device_id DeviceId = cl_adapter::cast(hDevice); + cl_device_id DeviceId = hDevice->get(); // TODO: Cache OpenCL version for each device and platform - auto RetErr = clGetDeviceInfo(DeviceId, CL_DEVICE_PLATFORM, - sizeof(cl_platform_id), &Platform, nullptr); - CL_RETURN_ON_FAILURE(RetErr); - - RetErr = cl_adapter::getDeviceVersion(DeviceId, DevVer); + auto RetErr = hDevice->getDeviceVersion(DevVer); CL_RETURN_ON_FAILURE(RetErr); - RetErr = cl_adapter::getPlatformVersion(Platform, PlatVer); + RetErr = hDevice->Platform->getPlatformVersion(PlatVer); if (PlatVer < oclv::V2_1 || DevVer < oclv::V2_1) { return UR_RESULT_ERROR_INVALID_OPERATION; @@ -1125,9 +1115,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceSelectBinary( // Get the type of the device cl_device_type DeviceType; constexpr uint32_t InvalidInd = std::numeric_limits::max(); - cl_int RetErr = - clGetDeviceInfo(cl_adapter::cast(hDevice), CL_DEVICE_TYPE, - sizeof(cl_device_type), &DeviceType, nullptr); + cl_int RetErr = clGetDeviceInfo(hDevice->get(), CL_DEVICE_TYPE, + sizeof(cl_device_type), &DeviceType, nullptr); if (RetErr != CL_SUCCESS) { *pSelectedBinary = InvalidInd; CL_RETURN_ON_FAILURE(RetErr); diff --git a/source/adapters/opencl/device.hpp b/source/adapters/opencl/device.hpp index 548a5012f9..b1cd437e8f 100644 --- a/source/adapters/opencl/device.hpp +++ b/source/adapters/opencl/device.hpp @@ -11,10 +11,71 @@ #include "common.hpp" -namespace cl_adapter { -ur_result_t getDeviceVersion(cl_device_id Dev, oclv::OpenCLVersion &Version); +struct ur_device_handle_t_ { + using native_type = cl_device_id; + native_type Device; + ur_platform_handle_t Platform; + cl_device_type Type = 0; + ur_device_handle_t ParentDevice = nullptr; + std::atomic RefCount = 0; -ur_result_t checkDeviceExtensions(cl_device_id Dev, - const std::vector &Exts, - bool &Supported); -} // namespace cl_adapter + ur_device_handle_t_(native_type Dev, ur_platform_handle_t Plat, + ur_device_handle_t Parent) + : Device(Dev), Platform(Plat), ParentDevice(Parent) { + RefCount = 1; + if (Parent) { + Type = Parent->Type; + } else { + clGetDeviceInfo(Device, CL_DEVICE_TYPE, sizeof(cl_device_type), &Type, + nullptr); + } + } + + ~ur_device_handle_t_() {} + + uint32_t incrementReferenceCount() noexcept { return ++RefCount; } + + uint32_t decrementReferenceCount() noexcept { return --RefCount; } + + uint32_t getReferenceCount() const noexcept { return RefCount; } + + native_type get() { return Device; } + + ur_result_t getDeviceVersion(oclv::OpenCLVersion &Version) { + size_t DevVerSize = 0; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(Device, CL_DEVICE_VERSION, 0, nullptr, &DevVerSize)); + + std::string DevVer(DevVerSize, '\0'); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(Device, CL_DEVICE_VERSION, DevVerSize, + DevVer.data(), nullptr)); + + Version = oclv::OpenCLVersion(DevVer); + if (!Version.isValid()) { + return UR_RESULT_ERROR_INVALID_DEVICE; + } + + return UR_RESULT_SUCCESS; + } + + ur_result_t checkDeviceExtensions(const std::vector &Exts, + bool &Supported) { + size_t ExtSize = 0; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(Device, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize)); + + std::string ExtStr(ExtSize, '\0'); + + CL_RETURN_ON_FAILURE(clGetDeviceInfo(Device, CL_DEVICE_EXTENSIONS, ExtSize, + ExtStr.data(), nullptr)); + + Supported = true; + for (const std::string &Ext : Exts) { + if (!(Supported = (ExtStr.find(Ext) != std::string::npos))) { + break; + } + } + + return UR_RESULT_SUCCESS; + } +}; diff --git a/source/adapters/opencl/enqueue.cpp b/source/adapters/opencl/enqueue.cpp index 6830a28eec..1f9320cace 100644 --- a/source/adapters/opencl/enqueue.cpp +++ b/source/adapters/opencl/enqueue.cpp @@ -9,6 +9,13 @@ //===----------------------------------------------------------------------===// #include "common.hpp" +#include "context.hpp" +#include "event.hpp" +#include "kernel.hpp" +#include "memory.hpp" +#include "platform.hpp" +#include "program.hpp" +#include "queue.hpp" cl_map_flags convertURMapFlagsToCL(ur_map_flags_t URFlags) { cl_map_flags CLFlags = 0; @@ -30,38 +37,74 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - - CL_RETURN_ON_FAILURE(clEnqueueNDRangeKernel( - cl_adapter::cast(hQueue), - cl_adapter::cast(hKernel), workDim, pGlobalWorkOffset, - pGlobalWorkSize, pLocalWorkSize, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } + CL_RETURN_ON_FAILURE( + clEnqueueNDRangeKernel(hQueue->get(), hKernel->get(), workDim, + pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, + numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } CL_RETURN_ON_FAILURE(clEnqueueMarkerWithWaitList( - cl_adapter::cast(hQueue), numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + hQueue->get(), numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } CL_RETURN_ON_FAILURE(clEnqueueBarrierWithWaitList( - cl_adapter::cast(hQueue), numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + hQueue->get(), numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } return UR_RESULT_SUCCESS; } @@ -69,13 +112,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, size_t offset, size_t size, void *pDst, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } CL_RETURN_ON_FAILURE(clEnqueueReadBuffer( - cl_adapter::cast(hQueue), - cl_adapter::cast(hBuffer), blockingRead, offset, size, pDst, - numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + hQueue->get(), hBuffer->get(), blockingRead, offset, size, pDst, + numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } return UR_RESULT_SUCCESS; } @@ -83,13 +138,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, size_t offset, size_t size, const void *pSrc, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } CL_RETURN_ON_FAILURE(clEnqueueWriteBuffer( - cl_adapter::cast(hQueue), - cl_adapter::cast(hBuffer), blockingWrite, offset, size, pSrc, - numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + hQueue->get(), hBuffer->get(), blockingWrite, offset, size, pSrc, + numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } return UR_RESULT_SUCCESS; } @@ -104,15 +171,26 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( bufferOrigin.z}; const size_t HostOrigin[3] = {hostOrigin.x, hostOrigin.y, hostOrigin.z}; const size_t Region[3] = {region.width, region.height, region.depth}; - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } CL_RETURN_ON_FAILURE(clEnqueueReadBufferRect( - cl_adapter::cast(hQueue), - cl_adapter::cast(hBuffer), blockingRead, BufferOrigin, HostOrigin, + hQueue->get(), hBuffer->get(), blockingRead, BufferOrigin, HostOrigin, Region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, - pDst, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + pDst, numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } return UR_RESULT_SUCCESS; } @@ -127,15 +205,26 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( bufferOrigin.z}; const size_t HostOrigin[3] = {hostOrigin.x, hostOrigin.y, hostOrigin.z}; const size_t Region[3] = {region.width, region.height, region.depth}; - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } CL_RETURN_ON_FAILURE(clEnqueueWriteBufferRect( - cl_adapter::cast(hQueue), - cl_adapter::cast(hBuffer), blockingWrite, BufferOrigin, - HostOrigin, Region, bufferRowPitch, bufferSlicePitch, hostRowPitch, - hostSlicePitch, pSrc, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + hQueue->get(), hBuffer->get(), blockingWrite, BufferOrigin, HostOrigin, + Region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, + pSrc, numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } return UR_RESULT_SUCCESS; } @@ -144,14 +233,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( ur_mem_handle_t hBufferDst, size_t srcOffset, size_t dstOffset, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } CL_RETURN_ON_FAILURE(clEnqueueCopyBuffer( - cl_adapter::cast(hQueue), - cl_adapter::cast(hBufferSrc), - cl_adapter::cast(hBufferDst), srcOffset, dstOffset, size, - numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + hQueue->get(), hBufferSrc->get(), hBufferDst->get(), srcOffset, dstOffset, + size, numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } return UR_RESULT_SUCCESS; } @@ -165,15 +265,26 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( const size_t SrcOrigin[3] = {srcOrigin.x, srcOrigin.y, srcOrigin.z}; const size_t DstOrigin[3] = {dstOrigin.x, dstOrigin.y, dstOrigin.z}; const size_t Region[3] = {region.width, region.height, region.depth}; - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } CL_RETURN_ON_FAILURE(clEnqueueCopyBufferRect( - cl_adapter::cast(hQueue), - cl_adapter::cast(hBufferSrc), - cl_adapter::cast(hBufferDst), SrcOrigin, DstOrigin, Region, - srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, - numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + hQueue->get(), hBufferSrc->get(), hBufferDst->get(), SrcOrigin, DstOrigin, + Region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, + numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } return UR_RESULT_SUCCESS; } @@ -185,12 +296,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( // CL FillBuffer only allows pattern sizes up to the largest CL type: // long16/double16 if (patternSize <= 128) { - CL_RETURN_ON_FAILURE( - clEnqueueFillBuffer(cl_adapter::cast(hQueue), - cl_adapter::cast(hBuffer), pPattern, - patternSize, offset, size, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } + CL_RETURN_ON_FAILURE(clEnqueueFillBuffer( + hQueue->get(), hBuffer->get(), pPattern, patternSize, offset, size, + numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + try { + auto UREvent = std::make_unique( + Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } return UR_RESULT_SUCCESS; } @@ -202,11 +326,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( } cl_event WriteEvent = nullptr; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } auto ClErr = clEnqueueWriteBuffer( - cl_adapter::cast(hQueue), - cl_adapter::cast(hBuffer), false, offset, size, HostBuffer, - numEventsInWaitList, cl_adapter::cast(phEventWaitList), - &WriteEvent); + hQueue->get(), hBuffer->get(), false, offset, size, HostBuffer, + numEventsInWaitList, CLWaitEvents.data(), &WriteEvent); if (ClErr != CL_SUCCESS) { delete[] HostBuffer; CL_RETURN_ON_FAILURE(ClErr); @@ -227,7 +353,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( } if (phEvent) { - *phEvent = cl_adapter::cast(WriteEvent); + try { + auto UREvent = std::make_unique( + WriteEvent, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } else { CL_RETURN_ON_FAILURE(clReleaseEvent(WriteEvent)); } @@ -242,14 +376,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { const size_t Origin[3] = {origin.x, origin.y, origin.z}; const size_t Region[3] = {region.width, region.height, region.depth}; - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } CL_RETURN_ON_FAILURE(clEnqueueReadImage( - cl_adapter::cast(hQueue), - cl_adapter::cast(hImage), blockingRead, Origin, Region, rowPitch, - slicePitch, pDst, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + hQueue->get(), hImage->get(), blockingRead, Origin, Region, rowPitch, + slicePitch, pDst, numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } return UR_RESULT_SUCCESS; } @@ -260,14 +405,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { const size_t Origin[3] = {origin.x, origin.y, origin.z}; const size_t Region[3] = {region.width, region.height, region.depth}; - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } CL_RETURN_ON_FAILURE(clEnqueueWriteImage( - cl_adapter::cast(hQueue), - cl_adapter::cast(hImage), blockingWrite, Origin, Region, rowPitch, - slicePitch, pSrc, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + hQueue->get(), hImage->get(), blockingWrite, Origin, Region, rowPitch, + slicePitch, pSrc, numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } return UR_RESULT_SUCCESS; } @@ -280,14 +436,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( const size_t SrcOrigin[3] = {srcOrigin.x, srcOrigin.y, srcOrigin.z}; const size_t DstOrigin[3] = {dstOrigin.x, dstOrigin.y, dstOrigin.z}; const size_t Region[3] = {region.width, region.height, region.depth}; - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } CL_RETURN_ON_FAILURE(clEnqueueCopyImage( - cl_adapter::cast(hQueue), - cl_adapter::cast(hImageSrc), cl_adapter::cast(hImageDst), - SrcOrigin, DstOrigin, Region, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + hQueue->get(), hImageSrc->get(), hImageDst->get(), SrcOrigin, DstOrigin, + Region, numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } return UR_RESULT_SUCCESS; } @@ -296,15 +463,27 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( ur_map_flags_t mapFlags, size_t offset, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, void **ppRetMap) { - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } cl_int Err; - *ppRetMap = clEnqueueMapBuffer( - cl_adapter::cast(hQueue), - cl_adapter::cast(hBuffer), blockingMap, - convertURMapFlagsToCL(mapFlags), offset, size, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent), &Err); - + *ppRetMap = clEnqueueMapBuffer(hQueue->get(), hBuffer->get(), blockingMap, + convertURMapFlagsToCL(mapFlags), offset, size, + numEventsInWaitList, CLWaitEvents.data(), + &Event, &Err); + if (phEvent) { + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } return mapCLErrorToUR(Err); } @@ -312,13 +491,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( ur_queue_handle_t hQueue, ur_mem_handle_t hMem, void *pMappedPtr, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - - CL_RETURN_ON_FAILURE(clEnqueueUnmapMemObject( - cl_adapter::cast(hQueue), - cl_adapter::cast(hMem), pMappedPtr, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } + CL_RETURN_ON_FAILURE(clEnqueueUnmapMemObject(hQueue->get(), hMem->get(), + pMappedPtr, numEventsInWaitList, + CLWaitEvents.data(), &Event)); + if (phEvent) { + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } return UR_RESULT_SUCCESS; } @@ -328,28 +519,33 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - cl_context Ctx = nullptr; - cl_int Res = - clGetCommandQueueInfo(cl_adapter::cast(hQueue), - CL_QUEUE_CONTEXT, sizeof(Ctx), &Ctx, nullptr); - - if (Res != CL_SUCCESS) - return mapCLErrorToUR(Res); - - cl_ext::clEnqueueWriteGlobalVariable_fn F = nullptr; - Res = cl_ext::getExtFuncFromContext( - Ctx, cl_ext::ExtFuncPtrCache->clEnqueueWriteGlobalVariableCache, - cl_ext::EnqueueWriteGlobalVariableName, &F); - - if (!F || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; + ur_platform_handle_t Platform = hQueue->getPlatform(); - Res = F(cl_adapter::cast(hQueue), - cl_adapter::cast(hProgram), name, blockingWrite, count, - offset, pSrc, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent)); + cl_ext::clEnqueueWriteGlobalVariable_fn clEnqueueWriteGlobalVariable = + Platform->ExtFuncPtr->clEnqueueWriteGlobalVariableCache; + UR_RETURN_ON_FAILURE( + Platform->getExtFunc(&clEnqueueWriteGlobalVariable, + cl_ext::EnqueueWriteGlobalVariableName, "")); + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } + cl_int Res = clEnqueueWriteGlobalVariable( + hQueue->get(), hProgram->get(), name, blockingWrite, count, offset, pSrc, + numEventsInWaitList, CLWaitEvents.data(), &Event); + if (phEvent) { + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } return mapCLErrorToUR(Res); } @@ -359,28 +555,32 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - cl_context Ctx = nullptr; - cl_int Res = - clGetCommandQueueInfo(cl_adapter::cast(hQueue), - CL_QUEUE_CONTEXT, sizeof(Ctx), &Ctx, nullptr); - - if (Res != CL_SUCCESS) - return mapCLErrorToUR(Res); - - cl_ext::clEnqueueReadGlobalVariable_fn F = nullptr; - Res = cl_ext::getExtFuncFromContext( - Ctx, cl_ext::ExtFuncPtrCache->clEnqueueReadGlobalVariableCache, - cl_ext::EnqueueReadGlobalVariableName, &F); + ur_platform_handle_t Platform = hQueue->getPlatform(); - if (!F || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; - - Res = F(cl_adapter::cast(hQueue), - cl_adapter::cast(hProgram), name, blockingRead, count, - offset, pDst, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent)); + cl_ext::clEnqueueReadGlobalVariable_fn clEnqueueReadGlobalVariable = + Platform->ExtFuncPtr->clEnqueueReadGlobalVariableCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc( + &clEnqueueReadGlobalVariable, cl_ext::EnqueueReadGlobalVariableName, "")); + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } + cl_int Res = clEnqueueReadGlobalVariable( + hQueue->get(), hProgram->get(), name, blockingRead, count, offset, pDst, + numEventsInWaitList, CLWaitEvents.data(), &Event); + if (phEvent) { + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } return mapCLErrorToUR(Res); } @@ -390,27 +590,32 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - cl_context CLContext; - cl_int CLErr = clGetCommandQueueInfo( - cl_adapter::cast(hQueue), CL_QUEUE_CONTEXT, - sizeof(cl_context), &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return mapCLErrorToUR(CLErr); - } + ur_platform_handle_t Platform = hQueue->getPlatform(); - cl_ext::clEnqueueReadHostPipeINTEL_fn FuncPtr = nullptr; - ur_result_t RetVal = - cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clEnqueueReadHostPipeINTELCache, - cl_ext::EnqueueReadHostPipeName, &FuncPtr); + cl_ext::clEnqueueReadHostPipeINTEL_fn clEnqueueReadHostPipe = + Platform->ExtFuncPtr->clEnqueueReadHostPipeINTELCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc( + &clEnqueueReadHostPipe, cl_ext::EnqueueReadHostPipeName, + "cl_intel_program_scope_host_pipe")); - if (FuncPtr) { - RetVal = mapCLErrorToUR( - FuncPtr(cl_adapter::cast(hQueue), - cl_adapter::cast(hProgram), pipe_symbol, blocking, - pDst, size, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } + ur_result_t RetVal = mapCLErrorToUR(clEnqueueReadHostPipe( + hQueue->get(), hProgram->get(), pipe_symbol, blocking, pDst, size, + numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } return RetVal; @@ -422,27 +627,32 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - cl_context CLContext; - cl_int CLErr = clGetCommandQueueInfo( - cl_adapter::cast(hQueue), CL_QUEUE_CONTEXT, - sizeof(cl_context), &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return mapCLErrorToUR(CLErr); - } - - cl_ext::clEnqueueWriteHostPipeINTEL_fn FuncPtr = nullptr; - ur_result_t RetVal = - cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clEnqueueWriteHostPipeINTELCache, - cl_ext::EnqueueWriteHostPipeName, &FuncPtr); - - if (FuncPtr) { - RetVal = mapCLErrorToUR( - FuncPtr(cl_adapter::cast(hQueue), - cl_adapter::cast(hProgram), pipe_symbol, blocking, - pSrc, size, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + ur_platform_handle_t Platform = hQueue->getPlatform(); + + cl_ext::clEnqueueWriteHostPipeINTEL_fn clEnqueueWriteHostPipe = + Platform->ExtFuncPtr->clEnqueueWriteHostPipeINTELCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc( + &clEnqueueWriteHostPipe, cl_ext::EnqueueWriteHostPipeName, + "cl_intel_program_scope_host_pipe")); + + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } + ur_result_t RetVal = mapCLErrorToUR(clEnqueueWriteHostPipe( + hQueue->get(), hProgram->get(), pipe_symbol, blocking, pSrc, size, + numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } return RetVal; diff --git a/source/adapters/opencl/event.cpp b/source/adapters/opencl/event.cpp index d180cfb097..018574c1ea 100644 --- a/source/adapters/opencl/event.cpp +++ b/source/adapters/opencl/event.cpp @@ -8,6 +8,7 @@ // //===----------------------------------------------------------------------===// +#include "event.hpp" #include "common.hpp" #include @@ -109,39 +110,54 @@ ur_command_t convertCLCommandTypeToUR(const cl_command_type &CommandType) { } } -UR_APIEXPORT ur_result_t UR_APICALL -urEventCreateWithNativeHandle(ur_native_handle_t hNativeEvent, - [[maybe_unused]] ur_context_handle_t hContext, - const ur_event_native_properties_t *pProperties, - ur_event_handle_t *phEvent) { - *phEvent = reinterpret_cast(hNativeEvent); +UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( + ur_native_handle_t hNativeEvent, ur_context_handle_t hContext, + const ur_event_native_properties_t *pProperties, + ur_event_handle_t *phEvent) { + cl_event NativeHandle = reinterpret_cast(hNativeEvent); + try { + auto UREvent = + std::make_unique(NativeHandle, hContext, nullptr); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + if (!pProperties || !pProperties->isNativeHandleOwned) { - return urEventRetain(*phEvent); + CL_RETURN_ON_FAILURE(clRetainEvent(NativeHandle)); } return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle( ur_event_handle_t hEvent, ur_native_handle_t *phNativeEvent) { - return getNativeHandle(hEvent, phNativeEvent); + return getNativeHandle(hEvent->get(), phNativeEvent); } UR_APIEXPORT ur_result_t UR_APICALL urEventRelease(ur_event_handle_t hEvent) { - cl_int RetErr = clReleaseEvent(cl_adapter::cast(hEvent)); - CL_RETURN_ON_FAILURE(RetErr); + if (hEvent->decrementReferenceCount() == 0) { + delete hEvent; + } else { + CL_RETURN_ON_FAILURE(clReleaseEvent(hEvent->get())); + } return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEventRetain(ur_event_handle_t hEvent) { - cl_int RetErr = clRetainEvent(cl_adapter::cast(hEvent)); - CL_RETURN_ON_FAILURE(RetErr); + CL_RETURN_ON_FAILURE(clRetainEvent(hEvent->get())); + hEvent->incrementReferenceCount(); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEventWait(uint32_t numEvents, const ur_event_handle_t *phEventWaitList) { - cl_int RetErr = clWaitForEvents( - numEvents, cl_adapter::cast(phEventWaitList)); + std::vector CLEvents(numEvents); + for (uint32_t i = 0; i < numEvents; i++) { + CLEvents[i] = phEventWaitList[i]->get(); + } + cl_int RetErr = clWaitForEvents(numEvents, CLEvents.data()); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } @@ -152,38 +168,53 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent, void *pPropValue, size_t *pPropSizeRet) { cl_event_info CLEventInfo = convertUREventInfoToCL(propName); + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); - size_t CheckPropSize = 0; - cl_int RetErr = - clGetEventInfo(cl_adapter::cast(hEvent), CLEventInfo, propSize, - pPropValue, &CheckPropSize); - if (pPropValue && CheckPropSize != propSize) { - return UR_RESULT_ERROR_INVALID_SIZE; + switch (propName) { + case UR_EVENT_INFO_CONTEXT: { + return ReturnValue(hEvent->Context); } - CL_RETURN_ON_FAILURE(RetErr); - if (pPropSizeRet) { - *pPropSizeRet = CheckPropSize; + case UR_EVENT_INFO_COMMAND_QUEUE: { + return ReturnValue(hEvent->Queue); + } + case UR_EVENT_INFO_REFERENCE_COUNT: { + return ReturnValue(hEvent->getReferenceCount()); } + default: { + size_t CheckPropSize = 0; + cl_int RetErr = clGetEventInfo(hEvent->get(), CLEventInfo, propSize, + pPropValue, &CheckPropSize); + if (pPropValue && CheckPropSize != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + CL_RETURN_ON_FAILURE(RetErr); + if (pPropSizeRet) { + *pPropSizeRet = CheckPropSize; + } - if (pPropValue) { - if (propName == UR_EVENT_INFO_COMMAND_TYPE) { - *reinterpret_cast(pPropValue) = convertCLCommandTypeToUR( - *reinterpret_cast(pPropValue)); - } else if (propName == UR_EVENT_INFO_COMMAND_EXECUTION_STATUS) { - /* If the CL_EVENT_COMMAND_EXECUTION_STATUS info value is CL_QUEUED, - * change it to CL_SUBMITTED. sycl::info::event::event_command_status has - * no equivalent to CL_QUEUED. - * - * FIXME UR Port: This should not be part of the UR adapter. Since - * PI_QUEUED exists, SYCL RT should be changed to handle this situation. - * In addition, SYCL RT is relying on PI_QUEUED status to make sure that - * the queues are flushed. */ - const auto param_value_int = static_cast(pPropValue); - if (*param_value_int == UR_EVENT_STATUS_QUEUED) { - *param_value_int = UR_EVENT_STATUS_SUBMITTED; + if (pPropValue) { + if (propName == UR_EVENT_INFO_COMMAND_TYPE) { + *reinterpret_cast(pPropValue) = + convertCLCommandTypeToUR( + *reinterpret_cast(pPropValue)); + } else if (propName == UR_EVENT_INFO_COMMAND_EXECUTION_STATUS) { + /* If the CL_EVENT_COMMAND_EXECUTION_STATUS info value is CL_QUEUED, + * change it to CL_SUBMITTED. sycl::info::event::event_command_status + * has no equivalent to CL_QUEUED. + * + * FIXME UR Port: This should not be part of the UR adapter. Since + * PI_QUEUED exists, SYCL RT should be changed to handle this situation. + * In addition, SYCL RT is relying on PI_QUEUED status to make sure that + * the queues are flushed. */ + const auto param_value_int = + static_cast(pPropValue); + if (*param_value_int == UR_EVENT_STATUS_QUEUED) { + *param_value_int = UR_EVENT_STATUS_SUBMITTED; + } } } } + } return UR_RESULT_SUCCESS; } @@ -192,9 +223,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( ur_event_handle_t hEvent, ur_profiling_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { cl_profiling_info CLProfilingInfo = convertURProfilingInfoToCL(propName); - cl_int RetErr = clGetEventProfilingInfo(cl_adapter::cast(hEvent), - CLProfilingInfo, propSize, pPropValue, - pPropSizeRet); + cl_int RetErr = clGetEventProfilingInfo(hEvent->get(), CLProfilingInfo, + propSize, pPropValue, pPropSizeRet); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } @@ -259,7 +289,7 @@ urEventSetCallback(ur_event_handle_t hEvent, ur_execution_info_t execStatus, auto *C = static_cast(pUserData); C->execute(); }; - CL_RETURN_ON_FAILURE(clSetEventCallback(cl_adapter::cast(hEvent), - CallbackType, ClCallback, Callback)); + CL_RETURN_ON_FAILURE( + clSetEventCallback(hEvent->get(), CallbackType, ClCallback, Callback)); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/event.hpp b/source/adapters/opencl/event.hpp new file mode 100644 index 0000000000..f7f17e7e1e --- /dev/null +++ b/source/adapters/opencl/event.hpp @@ -0,0 +1,48 @@ +//===--------- queue.hpp - OpenCL Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include "common.hpp" + +#include + +struct ur_event_handle_t_ { + using native_type = cl_event; + native_type Event; + ur_context_handle_t Context; + ur_queue_handle_t Queue; + std::atomic RefCount = 0; + + ur_event_handle_t_(native_type Event, ur_context_handle_t Ctx, + ur_queue_handle_t Queue) + : Event(Event), Context(Ctx), Queue(Queue) { + RefCount = 1; + urContextRetain(Context); + if (Queue) { + urQueueRetain(Queue); + } + } + + ~ur_event_handle_t_() { + urContextRelease(Context); + if (Queue) { + urQueueRelease(Queue); + } + clReleaseEvent(Event); + } + + uint32_t incrementReferenceCount() noexcept { return ++RefCount; } + + uint32_t decrementReferenceCount() noexcept { return --RefCount; } + + uint32_t getReferenceCount() const noexcept { return RefCount; } + + native_type get() { return Event; } +}; diff --git a/source/adapters/opencl/kernel.cpp b/source/adapters/opencl/kernel.cpp index 44157b826b..194824316f 100644 --- a/source/adapters/opencl/kernel.cpp +++ b/source/adapters/opencl/kernel.cpp @@ -7,7 +7,13 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +#include "kernel.hpp" #include "common.hpp" +#include "device.hpp" +#include "memory.hpp" +#include "platform.hpp" +#include "program.hpp" +#include "sampler.hpp" #include #include @@ -15,11 +21,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelCreate(ur_program_handle_t hProgram, const char *pKernelName, ur_kernel_handle_t *phKernel) { + try { + cl_int CLResult; + cl_kernel Kernel = clCreateKernel(hProgram->get(), pKernelName, &CLResult); + CL_RETURN_ON_FAILURE(CLResult); + auto URKernel = std::make_unique(Kernel, hProgram, + hProgram->Context); + *phKernel = URKernel.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } - cl_int CLResult; - *phKernel = cl_adapter::cast(clCreateKernel( - cl_adapter::cast(hProgram), pKernelName, &CLResult)); - CL_RETURN_ON_FAILURE(CLResult); return UR_RESULT_SUCCESS; } @@ -27,9 +41,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue( ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize, const ur_kernel_arg_value_properties_t *, const void *pArgValue) { - CL_RETURN_ON_FAILURE(clSetKernelArg(cl_adapter::cast(hKernel), - cl_adapter::cast(argIndex), - argSize, pArgValue)); + CL_RETURN_ON_FAILURE(clSetKernelArg( + hKernel->get(), cl_adapter::cast(argIndex), argSize, pArgValue)); return UR_RESULT_SUCCESS; } @@ -38,9 +51,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgLocal(ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize, const ur_kernel_arg_local_properties_t *) { - CL_RETURN_ON_FAILURE(clSetKernelArg(cl_adapter::cast(hKernel), - cl_adapter::cast(argIndex), - argSize, nullptr)); + CL_RETURN_ON_FAILURE(clSetKernelArg( + hKernel->get(), cl_adapter::cast(argIndex), argSize, nullptr)); return UR_RESULT_SUCCESS; } @@ -72,14 +84,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); // We need this little bit of ugliness because the UR NUM_ARGS property is // size_t whereas the CL one is cl_uint. We should consider changing that see // #1038 - if (propName == UR_KERNEL_INFO_NUM_ARGS) { + switch (propName) { + case UR_KERNEL_INFO_NUM_ARGS: { if (pPropSizeRet) *pPropSizeRet = sizeof(size_t); cl_uint NumArgs = 0; - CL_RETURN_ON_FAILURE(clGetKernelInfo(cl_adapter::cast(hKernel), + CL_RETURN_ON_FAILURE(clGetKernelInfo(hKernel->get(), mapURKernelInfoToCL(propName), sizeof(NumArgs), &NumArgs, nullptr)); if (pPropValue) { @@ -87,11 +101,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel, return UR_RESULT_ERROR_INVALID_SIZE; *static_cast(pPropValue) = static_cast(NumArgs); } - } else { + return UR_RESULT_SUCCESS; + } + case UR_KERNEL_INFO_PROGRAM: { + return ReturnValue(hKernel->Program); + } + case UR_KERNEL_INFO_CONTEXT: { + return ReturnValue(hKernel->Context); + } + case UR_KERNEL_INFO_REFERENCE_COUNT: { + return ReturnValue(hKernel->getReferenceCount()); + } + default: { size_t CheckPropSize = 0; - cl_int ClResult = clGetKernelInfo(cl_adapter::cast(hKernel), - mapURKernelInfoToCL(propName), propSize, - pPropValue, &CheckPropSize); + cl_int ClResult = + clGetKernelInfo(hKernel->get(), mapURKernelInfoToCL(propName), propSize, + pPropValue, &CheckPropSize); if (pPropValue && CheckPropSize != propSize) { return UR_RESULT_ERROR_INVALID_SIZE; } @@ -100,6 +125,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel, *pPropSizeRet = CheckPropSize; } } + } return UR_RESULT_SUCCESS; } @@ -135,17 +161,16 @@ urKernelGetGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, // to deter naive use of the query. if (propName == UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE) { cl_device_type ClDeviceType; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CL_DEVICE_TYPE, - sizeof(ClDeviceType), &ClDeviceType, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), CL_DEVICE_TYPE, + sizeof(ClDeviceType), &ClDeviceType, + nullptr)); if (ClDeviceType != CL_DEVICE_TYPE_CUSTOM) { return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } } CL_RETURN_ON_FAILURE(clGetKernelWorkGroupInfo( - cl_adapter::cast(hKernel), - cl_adapter::cast(hDevice), - mapURKernelGroupInfoToCL(propName), propSize, pPropValue, pPropSizeRet)); + hKernel->get(), hDevice->get(), mapURKernelGroupInfoToCL(propName), + propSize, pPropValue, pPropSizeRet)); return UR_RESULT_SUCCESS; } @@ -197,11 +222,9 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, InputValueSize = MaxDims * sizeof(size_t); } - cl_int Ret = clGetKernelSubGroupInfo(cl_adapter::cast(hKernel), - cl_adapter::cast(hDevice), - mapURKernelSubGroupInfoToCL(propName), - InputValueSize, InputValue.get(), - sizeof(size_t), &RetVal, pPropSizeRet); + cl_int Ret = clGetKernelSubGroupInfo( + hKernel->get(), hDevice->get(), mapURKernelSubGroupInfoToCL(propName), + InputValueSize, InputValue.get(), sizeof(size_t), &RetVal, pPropSizeRet); if (Ret == CL_INVALID_OPERATION) { // clGetKernelSubGroupInfo returns CL_INVALID_OPERATION if the device does @@ -250,13 +273,18 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, } UR_APIEXPORT ur_result_t UR_APICALL urKernelRetain(ur_kernel_handle_t hKernel) { - CL_RETURN_ON_FAILURE(clRetainKernel(cl_adapter::cast(hKernel))); + CL_RETURN_ON_FAILURE(clRetainKernel(hKernel->get())); + hKernel->incrementReferenceCount(); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urKernelRelease(ur_kernel_handle_t hKernel) { - CL_RETURN_ON_FAILURE(clReleaseKernel(cl_adapter::cast(hKernel))); + if (hKernel->decrementReferenceCount() == 0) { + delete hKernel; + } else { + CL_RETURN_ON_FAILURE(clReleaseKernel(hKernel->get())); + } return UR_RESULT_SUCCESS; } @@ -267,48 +295,41 @@ urKernelRelease(ur_kernel_handle_t hKernel) { static ur_result_t usmSetIndirectAccess(ur_kernel_handle_t hKernel) { cl_bool TrueVal = CL_TRUE; - clHostMemAllocINTEL_fn HFunc = nullptr; - clSharedMemAllocINTEL_fn SFunc = nullptr; - clDeviceMemAllocINTEL_fn DFunc = nullptr; - cl_context CLContext; - + ur_platform_handle_t Platform = hKernel->getPlatform(); /* We test that each alloc type is supported before we actually try to set * KernelExecInfo. */ - CL_RETURN_ON_FAILURE(clGetKernelInfo(cl_adapter::cast(hKernel), - CL_KERNEL_CONTEXT, sizeof(cl_context), - &CLContext, nullptr)); - - UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache, - cl_ext::HostMemAllocName, &HFunc)); - - if (HFunc) { - CL_RETURN_ON_FAILURE( - clSetKernelExecInfo(cl_adapter::cast(hKernel), - CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, - sizeof(cl_bool), &TrueVal)); + clHostMemAllocINTEL_fn clHostMemAlloc = + Platform->ExtFuncPtr->clHostMemAllocINTELCache; + ur_result_t Res = + Platform->getExtFunc(&clHostMemAlloc, cl_ext::HostMemAllocName, + "cl_intel_unified_shared_memory"); + + if (Res == UR_RESULT_SUCCESS) { + CL_RETURN_ON_FAILURE(clSetKernelExecInfo( + hKernel->get(), CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, + sizeof(cl_bool), &TrueVal)); } - UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clDeviceMemAllocINTELCache, - cl_ext::DeviceMemAllocName, &DFunc)); + clDeviceMemAllocINTEL_fn clDeviceMemAlloc = + Platform->ExtFuncPtr->clDeviceMemAllocINTELCache; + Res = Platform->getExtFunc(&clDeviceMemAlloc, cl_ext::DeviceMemAllocName, + "cl_intel_unified_shared_memory"); - if (DFunc) { - CL_RETURN_ON_FAILURE( - clSetKernelExecInfo(cl_adapter::cast(hKernel), - CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, - sizeof(cl_bool), &TrueVal)); + if (Res == UR_RESULT_SUCCESS) { + CL_RETURN_ON_FAILURE(clSetKernelExecInfo( + hKernel->get(), CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, + sizeof(cl_bool), &TrueVal)); } - UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clSharedMemAllocINTELCache, - cl_ext::SharedMemAllocName, &SFunc)); + clSharedMemAllocINTEL_fn clSharedMemAlloc = + Platform->ExtFuncPtr->clSharedMemAllocINTELCache; + Res = Platform->getExtFunc(&clSharedMemAlloc, cl_ext::SharedMemAllocName, + "cl_intel_unified_shared_memory"); - if (SFunc) { - CL_RETURN_ON_FAILURE( - clSetKernelExecInfo(cl_adapter::cast(hKernel), - CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, - sizeof(cl_bool), &TrueVal)); + if (Res == UR_RESULT_SUCCESS) { + CL_RETURN_ON_FAILURE(clSetKernelExecInfo( + hKernel->get(), CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, + sizeof(cl_bool), &TrueVal)); } return UR_RESULT_SUCCESS; } @@ -330,9 +351,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetExecInfo( return UR_RESULT_SUCCESS; } case UR_KERNEL_EXEC_INFO_USM_PTRS: { - CL_RETURN_ON_FAILURE(clSetKernelExecInfo( - cl_adapter::cast(hKernel), - CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL, propSize, pPropValue)); + CL_RETURN_ON_FAILURE(clSetKernelExecInfo(hKernel->get(), + CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL, + propSize, pPropValue)); return UR_RESULT_SUCCESS; } default: { @@ -346,43 +367,49 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( const ur_kernel_arg_pointer_properties_t *, const void *pArgValue) { cl_context CLContext; - CL_RETURN_ON_FAILURE(clGetKernelInfo(cl_adapter::cast(hKernel), - CL_KERNEL_CONTEXT, sizeof(cl_context), - &CLContext, nullptr)); - - clSetKernelArgMemPointerINTEL_fn FuncPtr = nullptr; - UR_RETURN_ON_FAILURE( - cl_ext::getExtFuncFromContext( - CLContext, - cl_ext::ExtFuncPtrCache->clSetKernelArgMemPointerINTELCache, - cl_ext::SetKernelArgMemPointerName, &FuncPtr)); - - if (FuncPtr) { + CL_RETURN_ON_FAILURE(clGetKernelInfo(hKernel->get(), CL_KERNEL_CONTEXT, + sizeof(cl_context), &CLContext, + nullptr)); + + ur_platform_handle_t Platform = hKernel->getPlatform(); + + clSetKernelArgMemPointerINTEL_fn clSetKernelArgMemPointer = + Platform->ExtFuncPtr->clSetKernelArgMemPointerINTELCache; + ur_result_t Res = Platform->getExtFunc(&clSetKernelArgMemPointer, + cl_ext::SetKernelArgMemPointerName, + "cl_intel_unified_shared_memory"); + + if (Res == UR_RESULT_SUCCESS) { /* OpenCL passes pointers by value not by reference. This means we need to * deref the arg to get the pointer value */ auto PtrToPtr = reinterpret_cast(pArgValue); auto DerefPtr = reinterpret_cast(*PtrToPtr); - CL_RETURN_ON_FAILURE(FuncPtr(cl_adapter::cast(hKernel), - cl_adapter::cast(argIndex), - DerefPtr)); + CL_RETURN_ON_FAILURE(clSetKernelArgMemPointer( + hKernel->get(), cl_adapter::cast(argIndex), DerefPtr)); } return UR_RESULT_SUCCESS; } + UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle( ur_kernel_handle_t hKernel, ur_native_handle_t *phNativeKernel) { - *phNativeKernel = reinterpret_cast(hKernel); + *phNativeKernel = reinterpret_cast(hKernel->get()); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urKernelCreateWithNativeHandle( - ur_native_handle_t hNativeKernel, ur_context_handle_t, ur_program_handle_t, - const ur_kernel_native_properties_t *pProperties, + ur_native_handle_t hNativeKernel, ur_context_handle_t hContext, + ur_program_handle_t hProgram, + [[maybe_unused]] const ur_kernel_native_properties_t *pProperties, ur_kernel_handle_t *phKernel) { - *phKernel = reinterpret_cast(hNativeKernel); + cl_kernel NativeHandle = reinterpret_cast(hNativeKernel); + + UR_RETURN_ON_FAILURE(ur_kernel_handle_t_::makeWithNative( + NativeHandle, hProgram, hContext, *phKernel)); + if (!pProperties || !pProperties->isNativeHandleOwned) { - return urKernelRetain(*phKernel); + CL_RETURN_ON_FAILURE(clRetainKernel(NativeHandle)); } return UR_RESULT_SUCCESS; } @@ -391,10 +418,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( ur_kernel_handle_t hKernel, uint32_t argIndex, const ur_kernel_arg_mem_obj_properties_t *, ur_mem_handle_t hArgValue) { - cl_int RetErr = clSetKernelArg( - cl_adapter::cast(hKernel), cl_adapter::cast(argIndex), - sizeof(hArgValue), cl_adapter::cast(&hArgValue)); - CL_RETURN_ON_FAILURE(RetErr); + cl_mem CLArgValue = hArgValue ? hArgValue->get() : nullptr; + CL_RETURN_ON_FAILURE(clSetKernelArg(hKernel->get(), + cl_adapter::cast(argIndex), + sizeof(CLArgValue), &CLArgValue)); return UR_RESULT_SUCCESS; } @@ -402,9 +429,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgSampler( ur_kernel_handle_t hKernel, uint32_t argIndex, const ur_kernel_arg_sampler_properties_t *, ur_sampler_handle_t hArgValue) { - cl_int RetErr = clSetKernelArg( - cl_adapter::cast(hKernel), cl_adapter::cast(argIndex), - sizeof(hArgValue), cl_adapter::cast(&hArgValue)); + cl_sampler CLArgSampler = hArgValue->get(); + cl_int RetErr = + clSetKernelArg(hKernel->get(), cl_adapter::cast(argIndex), + sizeof(CLArgSampler), &CLArgSampler); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/kernel.hpp b/source/adapters/opencl/kernel.hpp new file mode 100644 index 0000000000..50f48b41c8 --- /dev/null +++ b/source/adapters/opencl/kernel.hpp @@ -0,0 +1,83 @@ +//===--------- kernel.hpp - OpenCL Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include "common.hpp" +#include "context.hpp" +#include "program.hpp" + +#include + +struct ur_kernel_handle_t_ { + using native_type = cl_kernel; + native_type Kernel; + ur_program_handle_t Program; + ur_context_handle_t Context; + std::atomic RefCount = 0; + + ur_kernel_handle_t_(native_type Kernel, ur_program_handle_t Program, + ur_context_handle_t Context) + : Kernel(Kernel), Program(Program), Context(Context) { + RefCount = 1; + urProgramRetain(Program); + urContextRetain(Context); + } + + ~ur_kernel_handle_t_() { + clReleaseKernel(Kernel); + urProgramRelease(Program); + urContextRelease(Context); + } + + uint32_t incrementReferenceCount() noexcept { return ++RefCount; } + + uint32_t decrementReferenceCount() noexcept { return --RefCount; } + + uint32_t getReferenceCount() const noexcept { return RefCount; } + + static ur_result_t makeWithNative(native_type NativeKernel, + ur_program_handle_t Program, + ur_context_handle_t Context, + ur_kernel_handle_t &Kernel) { + if (!Program || !Context) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + try { + cl_context CLContext; + CL_RETURN_ON_FAILURE(clGetKernelInfo(NativeKernel, CL_KERNEL_CONTEXT, + sizeof(CLContext), &CLContext, + nullptr)); + cl_program CLProgram; + CL_RETURN_ON_FAILURE(clGetKernelInfo(NativeKernel, CL_KERNEL_PROGRAM, + sizeof(CLProgram), &CLProgram, + nullptr)); + + if (Context->get() != CLContext) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + if (Program->get() != CLProgram) { + return UR_RESULT_ERROR_INVALID_PROGRAM; + } + auto URKernel = + std::make_unique(NativeKernel, Program, Context); + Kernel = URKernel.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + + return UR_RESULT_SUCCESS; + } + + native_type get() { return Kernel; } + + ur_platform_handle_t getPlatform() { return Context->Devices[0]->Platform; } +}; diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp index 2397e2b5f9..c16f2d6b58 100644 --- a/source/adapters/opencl/memory.cpp +++ b/source/adapters/opencl/memory.cpp @@ -8,7 +8,10 @@ // //===----------------------------------------------------------------------===// +#include "memory.hpp" #include "common.hpp" +#include "context.hpp" +#include "platform.hpp" cl_image_format mapURImageFormatToCL(const ur_image_format_t *PImageFormat) { cl_image_format CLImageFormat; @@ -224,20 +227,21 @@ cl_map_flags convertURMemFlagsToCL(ur_mem_flags_t URFlags) { UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( ur_context_handle_t hContext, ur_mem_flags_t flags, size_t size, const ur_buffer_properties_t *pProperties, ur_mem_handle_t *phBuffer) { - cl_int RetErr = CL_INVALID_OPERATION; if (pProperties) { // TODO: need to check if all properties are supported by OpenCL RT and // ignore unsupported - clCreateBufferWithPropertiesINTEL_fn FuncPtr = nullptr; - cl_context CLContext = cl_adapter::cast(hContext); + // First we need to look up the function pointer - RetErr = - cl_ext::getExtFuncFromContext( - CLContext, - cl_ext::ExtFuncPtrCache->clCreateBufferWithPropertiesINTELCache, - cl_ext::CreateBufferWithPropertiesName, &FuncPtr); - if (FuncPtr) { + cl_context CLContext = hContext->get(); + ur_platform_handle_t Platform = hContext->getPlatform(); + clCreateBufferWithPropertiesINTEL_fn clCreateBufferWithProperties = + Platform->ExtFuncPtr->clCreateBufferWithPropertiesINTELCache; + ur_result_t Res = Platform->getExtFunc( + &clCreateBufferWithProperties, cl_ext::CreateBufferWithPropertiesName, + "cl_intel_create_buffer_with_properties"); + + if (Res == UR_RESULT_SUCCESS) { std::vector PropertiesIntel; auto Prop = static_cast(pProperties->pNext); while (Prop) { @@ -261,18 +265,35 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( } PropertiesIntel.push_back(0); - *phBuffer = reinterpret_cast(FuncPtr( - CLContext, PropertiesIntel.data(), static_cast(flags), - size, pProperties->pHost, cl_adapter::cast(&RetErr))); + try { + cl_mem Buffer = clCreateBufferWithProperties( + CLContext, PropertiesIntel.data(), static_cast(flags), + size, pProperties->pHost, cl_adapter::cast(&RetErr)); + CL_RETURN_ON_FAILURE(RetErr); + auto URMem = std::make_unique(Buffer, hContext); + *phBuffer = URMem.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } return mapCLErrorToUR(RetErr); } } void *HostPtr = pProperties ? pProperties->pHost : nullptr; - *phBuffer = reinterpret_cast(clCreateBuffer( - cl_adapter::cast(hContext), static_cast(flags), - size, HostPtr, cl_adapter::cast(&RetErr))); - CL_RETURN_ON_FAILURE(RetErr); + try { + cl_mem Buffer = + clCreateBuffer(hContext->get(), static_cast(flags), size, + HostPtr, cl_adapter::cast(&RetErr)); + CL_RETURN_ON_FAILURE(RetErr); + auto URMem = std::make_unique(Buffer, hContext); + *phBuffer = URMem.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } return UR_RESULT_SUCCESS; } @@ -288,10 +309,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( cl_image_desc ImageDesc = mapURImageDescToCL(pImageDesc); cl_map_flags MapFlags = convertURMemFlagsToCL(flags); - *phMem = reinterpret_cast(clCreateImage( - cl_adapter::cast(hContext), MapFlags, &ImageFormat, - &ImageDesc, pHost, cl_adapter::cast(&RetErr))); - CL_RETURN_ON_FAILURE(RetErr); + try { + cl_mem Mem = + clCreateImage(hContext->get(), MapFlags, &ImageFormat, &ImageDesc, + pHost, cl_adapter::cast(&RetErr)); + CL_RETURN_ON_FAILURE(RetErr); + auto URMem = std::make_unique(Mem, hContext); + *phMem = URMem.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } return UR_RESULT_SUCCESS; } @@ -315,47 +344,56 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( _cl_buffer_region BufferRegion; BufferRegion.origin = pRegion->origin; BufferRegion.size = pRegion->size; - - *phMem = reinterpret_cast(clCreateSubBuffer( - cl_adapter::cast(hBuffer), static_cast(flags), - BufferCreateType, &BufferRegion, cl_adapter::cast(&RetErr))); - - if (RetErr == CL_INVALID_VALUE) { - size_t BufferSize = 0; - CL_RETURN_ON_FAILURE(clGetMemObjectInfo(cl_adapter::cast(hBuffer), - CL_MEM_SIZE, sizeof(BufferSize), - &BufferSize, nullptr)); - if (BufferRegion.size + BufferRegion.origin > BufferSize) - return UR_RESULT_ERROR_INVALID_BUFFER_SIZE; + try { + cl_mem Buffer = clCreateSubBuffer( + hBuffer->get(), static_cast(flags), BufferCreateType, + &BufferRegion, cl_adapter::cast(&RetErr)); + if (RetErr == CL_INVALID_VALUE) { + size_t BufferSize = 0; + CL_RETURN_ON_FAILURE(clGetMemObjectInfo(hBuffer->get(), CL_MEM_SIZE, + sizeof(BufferSize), &BufferSize, + nullptr)); + if (BufferRegion.size + BufferRegion.origin > BufferSize) + return UR_RESULT_ERROR_INVALID_BUFFER_SIZE; + } + CL_RETURN_ON_FAILURE(RetErr); + auto URMem = std::make_unique(Buffer, hBuffer->Context); + *phMem = URMem.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; } return mapCLErrorToUR(RetErr); } UR_APIEXPORT ur_result_t UR_APICALL urMemGetNativeHandle( ur_mem_handle_t hMem, ur_device_handle_t, ur_native_handle_t *phNativeMem) { - return getNativeHandle(hMem, phNativeMem); + return getNativeHandle(hMem->get(), phNativeMem); } UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( - ur_native_handle_t hNativeMem, - [[maybe_unused]] ur_context_handle_t hContext, + ur_native_handle_t hNativeMem, ur_context_handle_t hContext, const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem) { - *phMem = reinterpret_cast(hNativeMem); + cl_mem NativeHandle = reinterpret_cast(hNativeMem); + UR_RETURN_ON_FAILURE( + ur_mem_handle_t_::makeWithNative(NativeHandle, hContext, *phMem)); if (!pProperties || !pProperties->isNativeHandleOwned) { - return urMemRetain(*phMem); + CL_RETURN_ON_FAILURE(clRetainMemObject((*phMem)->get())); } return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( - ur_native_handle_t hNativeMem, - [[maybe_unused]] ur_context_handle_t hContext, + ur_native_handle_t hNativeMem, ur_context_handle_t hContext, [[maybe_unused]] const ur_image_format_t *pImageFormat, [[maybe_unused]] const ur_image_desc_t *pImageDesc, const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem) { - *phMem = reinterpret_cast(hNativeMem); + cl_mem NativeHandle = reinterpret_cast(hNativeMem); + UR_RETURN_ON_FAILURE( + ur_mem_handle_t_::makeWithNative(NativeHandle, hContext, *phMem)); if (!pProperties || !pProperties->isNativeHandleOwned) { - return urMemRetain(*phMem); + CL_RETURN_ON_FAILURE(clRetainMemObject(NativeHandle)); } return UR_RESULT_SUCCESS; } @@ -369,17 +407,24 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); const cl_int CLPropName = mapURMemInfoToCL(propName); - size_t CheckPropSize = 0; - auto ClResult = - clGetMemObjectInfo(cl_adapter::cast(hMemory), CLPropName, - propSize, pPropValue, &CheckPropSize); - if (pPropValue && CheckPropSize != propSize) { - return UR_RESULT_ERROR_INVALID_SIZE; + switch (static_cast(propName)) { + case UR_MEM_INFO_CONTEXT: { + return ReturnValue(hMemory->Context); + } + default: { + size_t CheckPropSize = 0; + auto ClResult = clGetMemObjectInfo(hMemory->get(), CLPropName, propSize, + pPropValue, &CheckPropSize); + if (pPropValue && CheckPropSize != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + CL_RETURN_ON_FAILURE(ClResult); + if (pPropSizeRet) { + *pPropSizeRet = CheckPropSize; + } } - CL_RETURN_ON_FAILURE(ClResult); - if (pPropSizeRet) { - *pPropSizeRet = CheckPropSize; } + return UR_RESULT_SUCCESS; } @@ -393,8 +438,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory, const cl_int CLPropName = mapURMemImageInfoToCL(propName); size_t CheckPropSize = 0; - auto ClResult = clGetImageInfo(cl_adapter::cast(hMemory), CLPropName, - propSize, pPropValue, &CheckPropSize); + auto ClResult = clGetImageInfo(hMemory->get(), CLPropName, propSize, + pPropValue, &CheckPropSize); if (pPropValue && CheckPropSize != propSize) { return UR_RESULT_ERROR_INVALID_SIZE; } @@ -406,11 +451,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory, } UR_APIEXPORT ur_result_t UR_APICALL urMemRetain(ur_mem_handle_t hMem) { - CL_RETURN_ON_FAILURE(clRetainMemObject(cl_adapter::cast(hMem))); + CL_RETURN_ON_FAILURE(clRetainMemObject(hMem->get())); + hMem->incrementReferenceCount(); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urMemRelease(ur_mem_handle_t hMem) { - CL_RETURN_ON_FAILURE(clReleaseMemObject(cl_adapter::cast(hMem))); + if (hMem->decrementReferenceCount() == 0) { + delete hMem; + } else { + CL_RETURN_ON_FAILURE(clReleaseMemObject(hMem->get())); + } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/memory.hpp b/source/adapters/opencl/memory.hpp new file mode 100644 index 0000000000..df8794c897 --- /dev/null +++ b/source/adapters/opencl/memory.hpp @@ -0,0 +1,66 @@ +//===--------- memory.hpp - OpenCL Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include "common.hpp" +#include "context.hpp" + +#include + +struct ur_mem_handle_t_ { + using native_type = cl_mem; + native_type Memory; + ur_context_handle_t Context; + std::atomic RefCount = 0; + + ur_mem_handle_t_(native_type Mem, ur_context_handle_t Ctx) + : Memory(Mem), Context(Ctx) { + RefCount = 1; + urContextRetain(Context); + } + + ~ur_mem_handle_t_() { + clReleaseMemObject(Memory); + urContextRelease(Context); + } + + uint32_t incrementReferenceCount() noexcept { return ++RefCount; } + + uint32_t decrementReferenceCount() noexcept { return --RefCount; } + + uint32_t getReferenceCount() const noexcept { return RefCount; } + + static ur_result_t makeWithNative(native_type NativeMem, + ur_context_handle_t Ctx, + ur_mem_handle_t &Mem) { + if (!Ctx) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + try { + cl_context CLContext; + CL_RETURN_ON_FAILURE(clGetMemObjectInfo( + NativeMem, CL_MEM_CONTEXT, sizeof(CLContext), &CLContext, nullptr)); + + if (Ctx->get() != CLContext) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + auto URMem = std::make_unique(NativeMem, Ctx); + Mem = URMem.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + + return UR_RESULT_SUCCESS; + } + + native_type get() { return Memory; } +}; diff --git a/source/adapters/opencl/platform.cpp b/source/adapters/opencl/platform.cpp index 7188a3e8f0..7460491eb8 100644 --- a/source/adapters/opencl/platform.cpp +++ b/source/adapters/opencl/platform.cpp @@ -10,25 +10,6 @@ #include "platform.hpp" -ur_result_t cl_adapter::getPlatformVersion(cl_platform_id Plat, - oclv::OpenCLVersion &Version) { - - size_t PlatVerSize = 0; - CL_RETURN_ON_FAILURE( - clGetPlatformInfo(Plat, CL_PLATFORM_VERSION, 0, nullptr, &PlatVerSize)); - - std::string PlatVer(PlatVerSize, '\0'); - CL_RETURN_ON_FAILURE(clGetPlatformInfo(Plat, CL_PLATFORM_VERSION, PlatVerSize, - PlatVer.data(), nullptr)); - - Version = oclv::OpenCLVersion(PlatVer); - if (!Version.isValid()) { - return UR_RESULT_ERROR_INVALID_PLATFORM; - } - - return UR_RESULT_SUCCESS; -} - static cl_int mapURPlatformInfoToCL(ur_platform_info_t URPropName) { switch (URPropName) { @@ -62,9 +43,13 @@ urPlatformGetInfo(ur_platform_handle_t hPlatform, ur_platform_info_t propName, case UR_PLATFORM_INFO_VERSION: case UR_PLATFORM_INFO_EXTENSIONS: case UR_PLATFORM_INFO_PROFILE: { + cl_platform_id Plat = nullptr; + if (hPlatform) { + Plat = hPlatform->get(); + } CL_RETURN_ON_FAILURE( - clGetPlatformInfo(cl_adapter::cast(hPlatform), - CLPropName, propSize, pPropValue, pSizeRet)); + clGetPlatformInfo(Plat, CLPropName, propSize, pPropValue, pSizeRet)); + return UR_RESULT_SUCCESS; } default: @@ -83,10 +68,38 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, ur_platform_handle_t *phPlatforms, uint32_t *pNumPlatforms) { - cl_int Result = - clGetPlatformIDs(cl_adapter::cast(NumEntries), - cl_adapter::cast(phPlatforms), - cl_adapter::cast(pNumPlatforms)); + static std::vector> URPlatforms; + static std::once_flag InitFlag; + static uint32_t NumPlatforms = 0; + cl_int Result = CL_SUCCESS; + + std::call_once( + InitFlag, + [](cl_int &Result) { + Result = clGetPlatformIDs(0, nullptr, &NumPlatforms); + if (Result != CL_SUCCESS) { + return Result; + } + std::vector CLPlatforms(NumPlatforms); + Result = clGetPlatformIDs(cl_adapter::cast(NumPlatforms), + CLPlatforms.data(), nullptr); + if (Result != CL_SUCCESS) { + return Result; + } + try { + for (uint32_t i = 0; i < NumPlatforms; i++) { + auto URPlatform = + std::make_unique(CLPlatforms[i]); + URPlatforms.emplace_back(URPlatform.release()); + } + } catch (std::bad_alloc &) { + return CL_OUT_OF_RESOURCES; + } catch (...) { + return CL_INVALID_PLATFORM; + } + return Result; + }, + Result); /* Absorb the CL_PLATFORM_NOT_FOUND_KHR and just return 0 in num_platforms */ if (Result == CL_PLATFORM_NOT_FOUND_KHR) { @@ -95,21 +108,42 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, *pNumPlatforms = 0; } } - + if (pNumPlatforms != nullptr) { + *pNumPlatforms = NumPlatforms; + } + if (NumEntries && phPlatforms) { + for (uint32_t i = 0; i < NumEntries; i++) { + phPlatforms[i] = &(*URPlatforms[i]); + } + } return mapCLErrorToUR(Result); } UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetNativeHandle( ur_platform_handle_t hPlatform, ur_native_handle_t *phNativePlatform) { - *phNativePlatform = reinterpret_cast(hPlatform); + *phNativePlatform = reinterpret_cast(hPlatform->get()); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( ur_native_handle_t hNativePlatform, const ur_platform_native_properties_t *, ur_platform_handle_t *phPlatform) { - *phPlatform = reinterpret_cast(hNativePlatform); - return UR_RESULT_SUCCESS; + cl_platform_id NativeHandle = + reinterpret_cast(hNativePlatform); + + uint32_t NumPlatforms = 0; + UR_RETURN_ON_FAILURE(urPlatformGet(nullptr, 0, 0, nullptr, &NumPlatforms)); + std::vector Platforms(NumPlatforms); + UR_RETURN_ON_FAILURE( + urPlatformGet(nullptr, 0, NumPlatforms, Platforms.data(), nullptr)); + + for (uint32_t i = 0; i < NumPlatforms; i++) { + if (Platforms[i]->get() == NativeHandle) { + *phPlatform = Platforms[i]; + return UR_RESULT_SUCCESS; + } + } + return UR_RESULT_ERROR_INVALID_PLATFORM; } // Returns plugin specific backend option. diff --git a/source/adapters/opencl/platform.hpp b/source/adapters/opencl/platform.hpp index 31fd69de14..f1e1fe0c9e 100644 --- a/source/adapters/opencl/platform.hpp +++ b/source/adapters/opencl/platform.hpp @@ -10,8 +10,144 @@ #pragma once #include "common.hpp" +#include "device.hpp" -namespace cl_adapter { -ur_result_t getPlatformVersion(cl_platform_id Plat, - oclv::OpenCLVersion &Version); -} +#include + +using namespace cl_ext; + +struct ur_platform_handle_t_ { + using native_type = cl_platform_id; + native_type Platform = nullptr; + std::vector> Devices; + + ur_platform_handle_t_(native_type Plat) : Platform(Plat) { + ExtFuncPtr = std::make_unique(); + } + + ~ur_platform_handle_t_() { + for (auto &Dev : Devices) { + Dev.reset(); + } + Devices.clear(); + ExtFuncPtr.reset(); + } + + native_type get() { return Platform; } + + ur_result_t InitDevices() { + if (Devices.empty()) { + cl_uint DeviceNum = 0; + CL_RETURN_ON_FAILURE( + clGetDeviceIDs(Platform, CL_DEVICE_TYPE_ALL, 0, nullptr, &DeviceNum)); + + std::vector CLDevices(DeviceNum); + CL_RETURN_ON_FAILURE(clGetDeviceIDs( + Platform, CL_DEVICE_TYPE_ALL, DeviceNum, CLDevices.data(), nullptr)); + + try { + Devices.resize(DeviceNum); + for (size_t i = 0; i < DeviceNum; i++) { + Devices[i] = std::make_unique(CLDevices[i], this, + nullptr); + } + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } + + return UR_RESULT_SUCCESS; + } + + ur_result_t getPlatformVersion(oclv::OpenCLVersion &Version) { + size_t PlatVerSize = 0; + CL_RETURN_ON_FAILURE(clGetPlatformInfo(Platform, CL_PLATFORM_VERSION, 0, + nullptr, &PlatVerSize)); + + std::string PlatVer(PlatVerSize, '\0'); + CL_RETURN_ON_FAILURE(clGetPlatformInfo( + Platform, CL_PLATFORM_VERSION, PlatVerSize, PlatVer.data(), nullptr)); + + Version = oclv::OpenCLVersion(PlatVer); + if (!Version.isValid()) { + return UR_RESULT_ERROR_INVALID_PLATFORM; + } + + return UR_RESULT_SUCCESS; + } + + ur_result_t checkPlatformExtensions(const std::vector &Exts, + bool &Supported) { + size_t ExtSize = 0; + CL_RETURN_ON_FAILURE(clGetPlatformInfo(Platform, CL_PLATFORM_EXTENSIONS, 0, + nullptr, &ExtSize)); + + std::string ExtStr(ExtSize, '\0'); + + CL_RETURN_ON_FAILURE(clGetPlatformInfo(Platform, CL_PLATFORM_EXTENSIONS, + ExtSize, ExtStr.data(), nullptr)); + + Supported = true; + for (const std::string &Ext : Exts) { + if (!(Supported = (ExtStr.find(Ext) != std::string::npos))) { + break; + } + } + + return UR_RESULT_SUCCESS; + } + + struct ExtFuncPtrT { + clHostMemAllocINTEL_fn clHostMemAllocINTELCache = nullptr; + clDeviceMemAllocINTEL_fn clDeviceMemAllocINTELCache = nullptr; + clSharedMemAllocINTEL_fn clSharedMemAllocINTELCache = nullptr; + clGetDeviceFunctionPointer_fn clGetDeviceFunctionPointerCache = nullptr; + clCreateBufferWithPropertiesINTEL_fn + clCreateBufferWithPropertiesINTELCache = nullptr; + clMemBlockingFreeINTEL_fn clMemBlockingFreeINTELCache = nullptr; + clSetKernelArgMemPointerINTEL_fn clSetKernelArgMemPointerINTELCache = + nullptr; + clEnqueueMemFillINTEL_fn clEnqueueMemFillINTELCache = nullptr; + clEnqueueMemcpyINTEL_fn clEnqueueMemcpyINTELCache = nullptr; + clGetMemAllocInfoINTEL_fn clGetMemAllocInfoINTELCache = nullptr; + clEnqueueWriteGlobalVariable_fn clEnqueueWriteGlobalVariableCache = nullptr; + clEnqueueReadGlobalVariable_fn clEnqueueReadGlobalVariableCache = nullptr; + clEnqueueReadHostPipeINTEL_fn clEnqueueReadHostPipeINTELCache = nullptr; + clEnqueueWriteHostPipeINTEL_fn clEnqueueWriteHostPipeINTELCache = nullptr; + clSetProgramSpecializationConstant_fn + clSetProgramSpecializationConstantCache = nullptr; + clCreateCommandBufferKHR_fn clCreateCommandBufferKHRCache = nullptr; + clRetainCommandBufferKHR_fn clRetainCommandBufferKHRCache = nullptr; + clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHRCache = nullptr; + clFinalizeCommandBufferKHR_fn clFinalizeCommandBufferKHRCache = nullptr; + clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHRCache = nullptr; + clCommandCopyBufferKHR_fn clCommandCopyBufferKHRCache = nullptr; + clCommandCopyBufferRectKHR_fn clCommandCopyBufferRectKHRCache = nullptr; + clCommandFillBufferKHR_fn clCommandFillBufferKHRCache = nullptr; + clEnqueueCommandBufferKHR_fn clEnqueueCommandBufferKHRCache = nullptr; + clGetCommandBufferInfoKHR_fn clGetCommandBufferInfoKHRCache = nullptr; + }; + + std::unique_ptr ExtFuncPtr; + template + ur_result_t getExtFunc(T *CachedExtFunc, const char *FuncName, + const char *Extension) { + if (!(*CachedExtFunc)) { + // Check that the function ext is supported by the platform. + bool Supported = false; + UR_RETURN_ON_FAILURE(checkPlatformExtensions({Extension}, Supported)); + if (!Supported) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + + *CachedExtFunc = reinterpret_cast( + clGetExtensionFunctionAddressForPlatform(Platform, FuncName)); + if (!(*CachedExtFunc)) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + } + return UR_RESULT_SUCCESS; + } +}; diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index d76cd0b768..21b00b4add 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -8,60 +8,36 @@ // //===----------------------------------------------------------------------===// +#include "program.hpp" #include "common.hpp" #include "context.hpp" #include "device.hpp" #include "platform.hpp" -static ur_result_t getDevicesFromProgram( - ur_program_handle_t hProgram, - std::unique_ptr> &DevicesInProgram) { - - cl_uint DeviceCount; - CL_RETURN_ON_FAILURE(clGetProgramInfo(cl_adapter::cast(hProgram), - CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), - &DeviceCount, nullptr)); - - if (DeviceCount < 1) { - return UR_RESULT_ERROR_INVALID_CONTEXT; - } - - DevicesInProgram = std::make_unique>(DeviceCount); - - CL_RETURN_ON_FAILURE(clGetProgramInfo( - cl_adapter::cast(hProgram), CL_PROGRAM_DEVICES, - DeviceCount * sizeof(cl_device_id), (*DevicesInProgram).data(), nullptr)); - - return UR_RESULT_SUCCESS; -} +#include UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( ur_context_handle_t hContext, const void *pIL, size_t length, const ur_program_properties_t *, ur_program_handle_t *phProgram) { - std::unique_ptr> DevicesInCtx; - CL_RETURN_ON_FAILURE_AND_SET_NULL( - cl_adapter::getDevicesFromContext(hContext, DevicesInCtx), phProgram); - - cl_platform_id CurPlatform; - CL_RETURN_ON_FAILURE_AND_SET_NULL( - clGetDeviceInfo((*DevicesInCtx)[0], CL_DEVICE_PLATFORM, - sizeof(cl_platform_id), &CurPlatform, nullptr), - phProgram); + if (!hContext->DeviceCount || !hContext->Devices[0]->Platform) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + ur_platform_handle_t CurPlatform = hContext->Devices[0]->Platform; oclv::OpenCLVersion PlatVer; - CL_RETURN_ON_FAILURE_AND_SET_NULL( - cl_adapter::getPlatformVersion(CurPlatform, PlatVer), phProgram); + CL_RETURN_ON_FAILURE_AND_SET_NULL(CurPlatform->getPlatformVersion(PlatVer), + phProgram); cl_int Err = CL_SUCCESS; if (PlatVer >= oclv::V2_1) { /* Make sure all devices support CL 2.1 or newer as well. */ - for (cl_device_id Dev : *DevicesInCtx) { + for (ur_device_handle_t URDev : hContext->getDevices()) { oclv::OpenCLVersion DevVer; - CL_RETURN_ON_FAILURE_AND_SET_NULL( - cl_adapter::getDeviceVersion(Dev, DevVer), phProgram); + CL_RETURN_ON_FAILURE_AND_SET_NULL(URDev->getDeviceVersion(DevVer), + phProgram); /* If the device does not support CL 2.1 or greater, we need to make sure * it supports the cl_khr_il_program extension. @@ -69,8 +45,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( if (DevVer < oclv::V2_1) { bool Supported = false; CL_RETURN_ON_FAILURE_AND_SET_NULL( - cl_adapter::checkDeviceExtensions(Dev, {"cl_khr_il_program"}, - Supported), + URDev->checkDeviceExtensions({"cl_khr_il_program"}, Supported), phProgram); if (!Supported) { @@ -79,19 +54,26 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( } } - *phProgram = cl_adapter::cast(clCreateProgramWithIL( - cl_adapter::cast(hContext), pIL, length, &Err)); + cl_program Program = + clCreateProgramWithIL(hContext->get(), pIL, length, &Err); CL_RETURN_ON_FAILURE(Err); + try { + auto URProgram = + std::make_unique(Program, hContext); + *phProgram = URProgram.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } else { - /* If none of the devices conform with CL 2.1 or newer make sure they all * support the cl_khr_il_program extension. */ - for (cl_device_id Dev : *DevicesInCtx) { + for (ur_device_handle_t URDev : hContext->getDevices()) { bool Supported = false; CL_RETURN_ON_FAILURE_AND_SET_NULL( - cl_adapter::checkDeviceExtensions(Dev, {"cl_khr_il_program"}, - Supported), + URDev->checkDeviceExtensions({"cl_khr_il_program"}, Supported), phProgram); if (!Supported) { @@ -103,12 +85,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( cl_program(CL_API_CALL *)(cl_context, const void *, size_t, cl_int *); ApiFuncT FuncPtr = reinterpret_cast(clGetExtensionFunctionAddressForPlatform( - CurPlatform, "clCreateProgramWithILKHR")); + CurPlatform->get(), "clCreateProgramWithILKHR")); assert(FuncPtr != nullptr); - - *phProgram = cl_adapter::cast( - FuncPtr(cl_adapter::cast(hContext), pIL, length, &Err)); + try { + cl_program Program = FuncPtr(hContext->get(), pIL, length, &Err); + CL_RETURN_ON_FAILURE(Err); + auto URProgram = + std::make_unique(Program, hContext); + *phProgram = URProgram.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } CL_RETURN_ON_FAILURE(Err); } @@ -120,13 +110,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( const uint8_t *pBinary, const ur_program_properties_t *, ur_program_handle_t *phProgram) { - const cl_device_id Devices[1] = {cl_adapter::cast(hDevice)}; + const cl_device_id Devices[1] = {hDevice->get()}; const size_t Lengths[1] = {size}; cl_int BinaryStatus[1]; cl_int CLResult; - *phProgram = cl_adapter::cast(clCreateProgramWithBinary( - cl_adapter::cast(hContext), cl_adapter::cast(1u), - Devices, Lengths, &pBinary, BinaryStatus, &CLResult)); + try { + cl_program Program = clCreateProgramWithBinary( + hContext->get(), cl_adapter::cast(1u), Devices, Lengths, + &pBinary, BinaryStatus, &CLResult); + CL_RETURN_ON_FAILURE(CLResult); + auto URProgram = std::make_unique(Program, hContext); + *phProgram = URProgram.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } CL_RETURN_ON_FAILURE(BinaryStatus[0]); CL_RETURN_ON_FAILURE(CLResult); @@ -137,12 +136,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCompile([[maybe_unused]] ur_context_handle_t hContext, ur_program_handle_t hProgram, const char *pOptions) { - std::unique_ptr> DevicesInProgram; - CL_RETURN_ON_FAILURE(getDevicesFromProgram(hProgram, DevicesInProgram)); + uint32_t DeviceCount = hProgram->Context->DeviceCount; + std::vector CLDevicesInProgram(DeviceCount); + for (uint32_t i = 0; i < DeviceCount; i++) { + CLDevicesInProgram[i] = hProgram->Context->Devices[i]->get(); + } - CL_RETURN_ON_FAILURE(clCompileProgram(cl_adapter::cast(hProgram), - DevicesInProgram->size(), - DevicesInProgram->data(), pOptions, 0, + CL_RETURN_ON_FAILURE(clCompileProgram(hProgram->get(), DeviceCount, + CLDevicesInProgram.data(), pOptions, 0, nullptr, nullptr, nullptr, nullptr)); return UR_RESULT_SUCCESS; @@ -177,17 +178,42 @@ static cl_int mapURProgramInfoToCL(ur_program_info_t URPropName) { UR_APIEXPORT ur_result_t UR_APICALL urProgramGetInfo(ur_program_handle_t hProgram, ur_program_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - size_t CheckPropSize = 0; - auto ClResult = clGetProgramInfo(cl_adapter::cast(hProgram), - mapURProgramInfoToCL(propName), propSize, - pPropValue, &CheckPropSize); - if (pPropValue && CheckPropSize != propSize) { - return UR_RESULT_ERROR_INVALID_SIZE; + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + + const cl_program_info CLPropName = mapURProgramInfoToCL(propName); + + switch (static_cast(propName)) { + case UR_PROGRAM_INFO_CONTEXT: { + return ReturnValue(hProgram->Context); } - CL_RETURN_ON_FAILURE(ClResult); - if (pPropSizeRet) { - *pPropSizeRet = CheckPropSize; + case UR_PROGRAM_INFO_NUM_DEVICES: { + if (!hProgram->Context || !hProgram->Context->DeviceCount) { + return UR_RESULT_ERROR_INVALID_PROGRAM; + } + cl_uint DeviceCount = hProgram->Context->DeviceCount; + return ReturnValue(DeviceCount); + } + case UR_PROGRAM_INFO_DEVICES: { + return ReturnValue(&hProgram->Context->Devices[0], + hProgram->Context->DeviceCount); + } + case UR_PROGRAM_INFO_REFERENCE_COUNT: { + return ReturnValue(hProgram->getReferenceCount()); } + default: { + size_t CheckPropSize = 0; + auto ClResult = clGetProgramInfo(hProgram->get(), CLPropName, propSize, + pPropValue, &CheckPropSize); + if (pPropValue && CheckPropSize != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + CL_RETURN_ON_FAILURE(ClResult); + if (pPropSizeRet) { + *pPropSizeRet = CheckPropSize; + } + } + } + return UR_RESULT_SUCCESS; } @@ -195,12 +221,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild([[maybe_unused]] ur_context_handle_t hContext, ur_program_handle_t hProgram, const char *pOptions) { - std::unique_ptr> DevicesInProgram; - CL_RETURN_ON_FAILURE(getDevicesFromProgram(hProgram, DevicesInProgram)); - - CL_RETURN_ON_FAILURE(clBuildProgram( - cl_adapter::cast(hProgram), DevicesInProgram->size(), - DevicesInProgram->data(), pOptions, nullptr, nullptr)); + uint32_t DeviceCount = hProgram->Context->DeviceCount; + std::vector CLDevicesInProgram(DeviceCount); + for (uint32_t i = 0; i < DeviceCount; i++) { + CLDevicesInProgram[i] = hProgram->Context->Devices[i]->get(); + } + CL_RETURN_ON_FAILURE( + clBuildProgram(hProgram->get(), cl_adapter::cast(DeviceCount), + CLDevicesInProgram.data(), pOptions, nullptr, nullptr)); return UR_RESULT_SUCCESS; } @@ -210,12 +238,22 @@ urProgramLink(ur_context_handle_t hContext, uint32_t count, ur_program_handle_t *phProgram) { cl_int CLResult; - *phProgram = cl_adapter::cast( - clLinkProgram(cl_adapter::cast(hContext), 0, nullptr, - pOptions, cl_adapter::cast(count), - cl_adapter::cast(phPrograms), nullptr, - nullptr, &CLResult)); + std::vector CLPrograms(count); + for (uint32_t i = 0; i < count; i++) { + CLPrograms[i] = phPrograms[i]->get(); + } + cl_program Program = clLinkProgram( + hContext->get(), 0, nullptr, pOptions, cl_adapter::cast(count), + CLPrograms.data(), nullptr, nullptr, &CLResult); CL_RETURN_ON_FAILURE(CLResult); + try { + auto URProgram = std::make_unique(Program, hContext); + *phProgram = URProgram.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } return UR_RESULT_SUCCESS; } @@ -287,15 +325,12 @@ urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t hDevice, UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); cl_program_binary_type BinaryType; CL_RETURN_ON_FAILURE(clGetProgramBuildInfo( - cl_adapter::cast(hProgram), - cl_adapter::cast(hDevice), - mapURProgramBuildInfoToCL(propName), sizeof(cl_program_binary_type), - &BinaryType, nullptr)); + hProgram->get(), hDevice->get(), mapURProgramBuildInfoToCL(propName), + sizeof(cl_program_binary_type), &BinaryType, nullptr)); return ReturnValue(mapCLBinaryTypeToUR(BinaryType)); } size_t CheckPropSize = 0; - cl_int ClErr = clGetProgramBuildInfo(cl_adapter::cast(hProgram), - cl_adapter::cast(hDevice), + cl_int ClErr = clGetProgramBuildInfo(hProgram->get(), hDevice->get(), mapURProgramBuildInfoToCL(propName), propSize, pPropValue, &CheckPropSize); if (pPropValue && CheckPropSize != propSize) { @@ -311,33 +346,38 @@ urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t hDevice, UR_APIEXPORT ur_result_t UR_APICALL urProgramRetain(ur_program_handle_t hProgram) { - - CL_RETURN_ON_FAILURE(clRetainProgram(cl_adapter::cast(hProgram))); + CL_RETURN_ON_FAILURE(clRetainProgram(hProgram->get())); + hProgram->incrementReferenceCount(); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urProgramRelease(ur_program_handle_t hProgram) { - - CL_RETURN_ON_FAILURE( - clReleaseProgram(cl_adapter::cast(hProgram))); + if (hProgram->decrementReferenceCount() == 0) { + delete hProgram; + } else { + CL_RETURN_ON_FAILURE(clReleaseProgram(hProgram->get())); + } return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle( ur_program_handle_t hProgram, ur_native_handle_t *phNativeProgram) { - *phNativeProgram = reinterpret_cast(hProgram); + *phNativeProgram = reinterpret_cast(hProgram->get()); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithNativeHandle( - ur_native_handle_t hNativeProgram, ur_context_handle_t, + ur_native_handle_t hNativeProgram, ur_context_handle_t hContext, const ur_program_native_properties_t *pProperties, ur_program_handle_t *phProgram) { - *phProgram = reinterpret_cast(hNativeProgram); + cl_program NativeHandle = reinterpret_cast(hNativeProgram); + + UR_RETURN_ON_FAILURE( + ur_program_handle_t_::makeWithNative(NativeHandle, hContext, *phProgram)); if (!pProperties || !pProperties->isNativeHandleOwned) { - return urProgramRetain(*phProgram); + CL_RETURN_ON_FAILURE(clRetainProgram(NativeHandle)); } return UR_RESULT_SUCCESS; } @@ -346,33 +386,28 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( ur_program_handle_t hProgram, uint32_t count, const ur_specialization_constant_info_t *pSpecConstants) { - cl_program CLProg = cl_adapter::cast(hProgram); - cl_context Ctx = nullptr; - size_t RetSize = 0; - - CL_RETURN_ON_FAILURE(clGetProgramInfo(CLProg, CL_PROGRAM_CONTEXT, sizeof(Ctx), - &Ctx, &RetSize)); - - std::unique_ptr> DevicesInCtx; - UR_RETURN_ON_FAILURE(cl_adapter::getDevicesFromContext( - cl_adapter::cast(Ctx), DevicesInCtx)); + cl_program CLProg = hProgram->get(); + if (!hProgram->Context) { + return UR_RESULT_ERROR_INVALID_PROGRAM; + } + ur_context_handle_t Ctx = hProgram->Context; + if (!Ctx->DeviceCount || !Ctx->Devices[0]->Platform) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } - cl_platform_id CurPlatform; - CL_RETURN_ON_FAILURE(clGetDeviceInfo((*DevicesInCtx)[0], CL_DEVICE_PLATFORM, - sizeof(cl_platform_id), &CurPlatform, - nullptr)); + ur_platform_handle_t CurPlatform = Ctx->Devices[0]->Platform; oclv::OpenCLVersion PlatVer; - cl_adapter::getPlatformVersion(CurPlatform, PlatVer); + CurPlatform->getPlatformVersion(PlatVer); bool UseExtensionLookup = false; if (PlatVer < oclv::V2_2) { UseExtensionLookup = true; } else { - for (cl_device_id Dev : *DevicesInCtx) { + for (ur_device_handle_t Dev : Ctx->getDevices()) { oclv::OpenCLVersion DevVer; - UR_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion(Dev, DevVer)); + UR_RETURN_ON_FAILURE(Dev->getDeviceVersion(DevVer)); if (DevVer < oclv::V2_2) { UseExtensionLookup = true; @@ -389,16 +424,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( } } else { cl_ext::clSetProgramSpecializationConstant_fn - SetProgramSpecializationConstant = nullptr; - const ur_result_t URResult = cl_ext::getExtFuncFromContext< - decltype(SetProgramSpecializationConstant)>( - Ctx, cl_ext::ExtFuncPtrCache->clSetProgramSpecializationConstantCache, - cl_ext::SetProgramSpecializationConstantName, - &SetProgramSpecializationConstant); - - if (URResult != UR_RESULT_SUCCESS) { - return URResult; - } + SetProgramSpecializationConstant = + CurPlatform->ExtFuncPtr->clSetProgramSpecializationConstantCache; + UR_RETURN_ON_FAILURE(CurPlatform->getExtFunc( + &SetProgramSpecializationConstant, + cl_ext::SetProgramSpecializationConstantName, "")); for (uint32_t i = 0; i < count; ++i) { CL_RETURN_ON_FAILURE(SetProgramSpecializationConstant( @@ -439,19 +469,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( ur_device_handle_t hDevice, ur_program_handle_t hProgram, const char *pFunctionName, void **ppFunctionPointer) { - cl_context CLContext = nullptr; - CL_RETURN_ON_FAILURE(clGetProgramInfo(cl_adapter::cast(hProgram), - CL_PROGRAM_CONTEXT, sizeof(CLContext), - &CLContext, nullptr)); - - cl_ext::clGetDeviceFunctionPointer_fn FuncT = nullptr; - - UR_RETURN_ON_FAILURE( - cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clGetDeviceFunctionPointerCache, - cl_ext::GetDeviceFunctionPointerName, &FuncT)); + ur_platform_handle_t Platform = hDevice->Platform; + cl_ext::clGetDeviceFunctionPointer_fn clGetDeviceFunctionPointer = + Platform->ExtFuncPtr->clGetDeviceFunctionPointerCache; + ur_result_t Res = Platform->getExtFunc( + &clGetDeviceFunctionPointer, cl_ext::GetDeviceFunctionPointerName, ""); - if (!FuncT) { + if (Res != UR_RESULT_SUCCESS) { return UR_RESULT_ERROR_INVALID_FUNCTION_NAME; } @@ -462,15 +486,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( // throws exceptions. *ppFunctionPointer = 0; size_t Size; - CL_RETURN_ON_FAILURE(clGetProgramInfo(cl_adapter::cast(hProgram), - CL_PROGRAM_KERNEL_NAMES, 0, nullptr, - &Size)); + CL_RETURN_ON_FAILURE(clGetProgramInfo( + hProgram->get(), CL_PROGRAM_KERNEL_NAMES, 0, nullptr, &Size)); std::string KernelNames(Size, ' '); - CL_RETURN_ON_FAILURE(clGetProgramInfo( - cl_adapter::cast(hProgram), CL_PROGRAM_KERNEL_NAMES, - KernelNames.size(), &KernelNames[0], nullptr)); + CL_RETURN_ON_FAILURE( + clGetProgramInfo(hProgram->get(), CL_PROGRAM_KERNEL_NAMES, + KernelNames.size(), &KernelNames[0], nullptr)); // Get rid of the null terminator and search for the kernel name. If the // function cannot be found, return an error code to indicate it exists. @@ -479,10 +502,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( return UR_RESULT_ERROR_INVALID_KERNEL_NAME; } - const cl_int CLResult = - FuncT(cl_adapter::cast(hDevice), - cl_adapter::cast(hProgram), pFunctionName, - reinterpret_cast(ppFunctionPointer)); + const cl_int CLResult = clGetDeviceFunctionPointer( + hDevice->get(), hProgram->get(), pFunctionName, + reinterpret_cast(ppFunctionPointer)); // GPU runtime sometimes returns CL_INVALID_ARG_VALUE if the function address // cannot be found but the kernel exists. As the kernel does exist, return // that the function name is invalid. diff --git a/source/adapters/opencl/program.hpp b/source/adapters/opencl/program.hpp new file mode 100644 index 0000000000..1c6bae2e8d --- /dev/null +++ b/source/adapters/opencl/program.hpp @@ -0,0 +1,67 @@ +//===--------- program.hpp - OpenCL Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include "common.hpp" +#include "context.hpp" + +#include + +struct ur_program_handle_t_ { + using native_type = cl_program; + native_type Program; + ur_context_handle_t Context; + std::atomic RefCount = 0; + + ur_program_handle_t_(native_type Prog, ur_context_handle_t Ctx) + : Program(Prog), Context(Ctx) { + RefCount = 1; + urContextRetain(Context); + } + + ~ur_program_handle_t_() { + clReleaseProgram(Program); + urContextRelease(Context); + } + + uint32_t incrementReferenceCount() noexcept { return ++RefCount; } + + uint32_t decrementReferenceCount() noexcept { return --RefCount; } + + uint32_t getReferenceCount() const noexcept { return RefCount; } + + static ur_result_t makeWithNative(native_type NativeProg, + ur_context_handle_t Context, + ur_program_handle_t &Program) { + if (!Context) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + try { + cl_context CLContext; + CL_RETURN_ON_FAILURE(clGetProgramInfo(NativeProg, CL_PROGRAM_CONTEXT, + sizeof(CLContext), &CLContext, + nullptr)); + if (Context->get() != CLContext) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + auto URProgram = + std::make_unique(NativeProg, Context); + Program = URProgram.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + + return UR_RESULT_SUCCESS; + } + + native_type get() { return Program; } +}; diff --git a/source/adapters/opencl/queue.cpp b/source/adapters/opencl/queue.cpp index 4a39a91ef5..2cbeb6f738 100644 --- a/source/adapters/opencl/queue.cpp +++ b/source/adapters/opencl/queue.cpp @@ -6,7 +6,10 @@ // //===-----------------------------------------------------------------===// +#include "queue.hpp" #include "common.hpp" +#include "context.hpp" +#include "device.hpp" #include "platform.hpp" cl_command_queue_info mapURQueueInfoToCL(const ur_queue_info_t PropName) { @@ -71,12 +74,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_queue_properties_t *pProperties, ur_queue_handle_t *phQueue) { - cl_platform_id CurPlatform; - CL_RETURN_ON_FAILURE_AND_SET_NULL( - clGetDeviceInfo(cl_adapter::cast(hDevice), - CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &CurPlatform, - nullptr), - phQueue); + ur_platform_handle_t CurPlatform = hDevice->Platform; cl_command_queue_properties CLProperties = pProperties ? convertURQueuePropertiesToCL(pProperties) : 0; @@ -87,29 +85,44 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT; oclv::OpenCLVersion Version; - CL_RETURN_ON_FAILURE_AND_SET_NULL( - cl_adapter::getPlatformVersion(CurPlatform, Version), phQueue); + CL_RETURN_ON_FAILURE_AND_SET_NULL(CurPlatform->getPlatformVersion(Version), + phQueue); cl_int RetErr = CL_INVALID_OPERATION; if (Version < oclv::V2_0) { - *phQueue = cl_adapter::cast( - clCreateCommandQueue(cl_adapter::cast(hContext), - cl_adapter::cast(hDevice), - CLProperties & SupportByOpenCL, &RetErr)); + cl_command_queue Queue = + clCreateCommandQueue(hContext->get(), hDevice->get(), + CLProperties & SupportByOpenCL, &RetErr); CL_RETURN_ON_FAILURE(RetErr); + try { + auto URQueue = + std::make_unique(Queue, hContext, hDevice); + *phQueue = URQueue.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + return UR_RESULT_SUCCESS; } /* TODO: Add support for CL_QUEUE_PRIORITY_KHR */ cl_queue_properties CreationFlagProperties[] = { CL_QUEUE_PROPERTIES, CLProperties & SupportByOpenCL, 0}; - *phQueue = - cl_adapter::cast(clCreateCommandQueueWithProperties( - cl_adapter::cast(hContext), - cl_adapter::cast(hDevice), CreationFlagProperties, - &RetErr)); + cl_command_queue Queue = clCreateCommandQueueWithProperties( + hContext->get(), hDevice->get(), CreationFlagProperties, &RetErr); CL_RETURN_ON_FAILURE(RetErr); + try { + auto URQueue = + std::make_unique(Queue, hContext, hDevice); + *phQueue = URQueue.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } return UR_RESULT_SUCCESS; } @@ -123,23 +136,31 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } cl_command_queue_info CLCommandQueueInfo = mapURQueueInfoToCL(propName); - + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + switch (propName) { + case UR_QUEUE_INFO_CONTEXT: { + return ReturnValue(hQueue->Context); + } + case UR_QUEUE_INFO_DEVICE: { + return ReturnValue(hQueue->Device); + } // Unfortunately the size of cl_bitfield (unsigned long) doesn't line up with // our enums (forced to be sizeof(uint32_t)) so this needs special handling. - if (propName == UR_QUEUE_INFO_FLAGS) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); - + case UR_QUEUE_INFO_FLAGS: { cl_command_queue_properties QueueProperties = 0; CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( - cl_adapter::cast(hQueue), CLCommandQueueInfo, - sizeof(QueueProperties), &QueueProperties, nullptr)); + hQueue->get(), CLCommandQueueInfo, sizeof(QueueProperties), + &QueueProperties, nullptr)); return ReturnValue(mapCLQueuePropsToUR(QueueProperties)); - } else { + } + case UR_QUEUE_INFO_REFERENCE_COUNT: { + return ReturnValue(hQueue->getReferenceCount()); + } + default: { size_t CheckPropSize = 0; - cl_int RetErr = clGetCommandQueueInfo( - cl_adapter::cast(hQueue), CLCommandQueueInfo, - propSize, pPropValue, &CheckPropSize); + cl_int RetErr = clGetCommandQueueInfo(hQueue->get(), CLCommandQueueInfo, + propSize, pPropValue, &CheckPropSize); if (pPropValue && CheckPropSize != propSize) { return UR_RESULT_ERROR_INVALID_SIZE; } @@ -148,6 +169,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, *pPropSizeRet = CheckPropSize; } } + } return UR_RESULT_SUCCESS; } @@ -155,45 +177,50 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, UR_APIEXPORT ur_result_t UR_APICALL urQueueGetNativeHandle(ur_queue_handle_t hQueue, ur_queue_native_desc_t *, ur_native_handle_t *phNativeQueue) { - return getNativeHandle(hQueue, phNativeQueue); + return getNativeHandle(hQueue->get(), phNativeQueue); } UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( - ur_native_handle_t hNativeQueue, - [[maybe_unused]] ur_context_handle_t hContext, - [[maybe_unused]] ur_device_handle_t hDevice, - [[maybe_unused]] const ur_queue_native_properties_t *pProperties, + ur_native_handle_t hNativeQueue, ur_context_handle_t hContext, + ur_device_handle_t hDevice, const ur_queue_native_properties_t *pProperties, ur_queue_handle_t *phQueue) { - *phQueue = reinterpret_cast(hNativeQueue); - cl_int RetErr = - clRetainCommandQueue(cl_adapter::cast(hNativeQueue)); - CL_RETURN_ON_FAILURE(RetErr); + cl_command_queue NativeHandle = + reinterpret_cast(hNativeQueue); + + UR_RETURN_ON_FAILURE(ur_queue_handle_t_::makeWithNative( + NativeHandle, hContext, hDevice, *phQueue)); + + if (!pProperties || !pProperties->isNativeHandleOwned) { + CL_RETURN_ON_FAILURE(clRetainCommandQueue(NativeHandle)); + } + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish(ur_queue_handle_t hQueue) { - cl_int RetErr = clFinish(cl_adapter::cast(hQueue)); + cl_int RetErr = clFinish(hQueue->get()); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urQueueFlush(ur_queue_handle_t hQueue) { - cl_int RetErr = clFinish(cl_adapter::cast(hQueue)); + cl_int RetErr = clFinish(hQueue->get()); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urQueueRetain(ur_queue_handle_t hQueue) { - cl_int RetErr = - clRetainCommandQueue(cl_adapter::cast(hQueue)); - CL_RETURN_ON_FAILURE(RetErr); + CL_RETURN_ON_FAILURE(clRetainCommandQueue(hQueue->get())); + hQueue->incrementReferenceCount(); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(ur_queue_handle_t hQueue) { - cl_int RetErr = - clReleaseCommandQueue(cl_adapter::cast(hQueue)); - CL_RETURN_ON_FAILURE(RetErr); + if (hQueue->decrementReferenceCount() == 0) { + delete hQueue; + } else { + CL_RETURN_ON_FAILURE(clReleaseCommandQueue(hQueue->get())); + } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/queue.hpp b/source/adapters/opencl/queue.hpp new file mode 100644 index 0000000000..e5723f3204 --- /dev/null +++ b/source/adapters/opencl/queue.hpp @@ -0,0 +1,80 @@ +//===--------- queue.hpp - OpenCL Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include "common.hpp" +#include "context.hpp" +#include "device.hpp" + +#include + +struct ur_queue_handle_t_ { + using native_type = cl_command_queue; + native_type Queue; + ur_context_handle_t Context; + ur_device_handle_t Device; + std::atomic RefCount = 0; + + ur_queue_handle_t_(native_type Queue, ur_context_handle_t Ctx, + ur_device_handle_t Dev) + : Queue(Queue), Context(Ctx), Device(Dev) { + RefCount = 1; + urDeviceRetain(Device); + urContextRetain(Context); + } + + static ur_result_t makeWithNative(native_type NativeQueue, + ur_context_handle_t Context, + ur_device_handle_t Device, + ur_queue_handle_t &Queue) { + if (!Context || !Device) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + try { + cl_context CLContext; + CL_RETURN_ON_FAILURE(clGetCommandQueueInfo(NativeQueue, CL_QUEUE_CONTEXT, + sizeof(CLContext), &CLContext, + nullptr)); + cl_device_id CLDevice; + CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( + NativeQueue, CL_QUEUE_DEVICE, sizeof(CLDevice), &CLDevice, nullptr)); + if (Context->get() != CLContext) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + if (Device->get() != CLDevice) { + return UR_RESULT_ERROR_INVALID_DEVICE; + } + auto URQueue = + std::make_unique(NativeQueue, Context, Device); + Queue = URQueue.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + return UR_RESULT_SUCCESS; + } + + ~ur_queue_handle_t_() { + clReleaseCommandQueue(Queue); + urDeviceRelease(Device); + urContextRelease(Context); + } + + uint32_t incrementReferenceCount() noexcept { return ++RefCount; } + + uint32_t decrementReferenceCount() noexcept { return --RefCount; } + + uint32_t getReferenceCount() const noexcept { return RefCount; } + + native_type get() { return Queue; } + + ur_platform_handle_t getPlatform() { return Device->Platform; } +}; diff --git a/source/adapters/opencl/sampler.cpp b/source/adapters/opencl/sampler.cpp index 49f31b37fd..d52c0d6f42 100644 --- a/source/adapters/opencl/sampler.cpp +++ b/source/adapters/opencl/sampler.cpp @@ -8,7 +8,9 @@ // //===----------------------------------------------------------------------===// +#include "sampler.hpp" #include "common.hpp" +#include "context.hpp" namespace { @@ -137,16 +139,23 @@ ur_result_t urSamplerCreate(ur_context_handle_t hContext, ur_sampler_handle_t *phSampler) { // Initialize properties according to OpenCL 2.1 spec. - ur_result_t ErrorCode; + cl_int ErrorCode; cl_addressing_mode AddressingMode = ur2CLAddressingMode(pDesc->addressingMode); cl_filter_mode FilterMode = ur2CLFilterMode(pDesc->filterMode); - - // Always call OpenCL 1.0 API - *phSampler = cl_adapter::cast(clCreateSampler( - cl_adapter::cast(hContext), - static_cast(pDesc->normalizedCoords), AddressingMode, FilterMode, - cl_adapter::cast(&ErrorCode))); + try { + // Always call OpenCL 1.0 API + cl_sampler Sampler = clCreateSampler( + hContext->get(), static_cast(pDesc->normalizedCoords), + AddressingMode, FilterMode, &ErrorCode); + CL_RETURN_ON_FAILURE(ErrorCode); + auto URSampler = std::make_unique(Sampler, hContext); + *phSampler = URSampler.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } return mapCLErrorToUR(ErrorCode); } @@ -158,51 +167,75 @@ urSamplerGetInfo(ur_sampler_handle_t hSampler, ur_sampler_info_t propName, static_assert(sizeof(cl_addressing_mode) == sizeof(ur_sampler_addressing_mode_t)); - size_t CheckPropSize = 0; - ur_result_t Err = mapCLErrorToUR( - clGetSamplerInfo(cl_adapter::cast(hSampler), SamplerInfo, - propSize, pPropValue, &CheckPropSize)); - if (pPropValue && CheckPropSize != propSize) { - return UR_RESULT_ERROR_INVALID_SIZE; + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + switch (propName) { + case UR_SAMPLER_INFO_CONTEXT: { + return ReturnValue(hSampler->Context); + } + case UR_SAMPLER_INFO_REFERENCE_COUNT: { + return ReturnValue(hSampler->getReferenceCount()); + } + default: { + size_t CheckPropSize = 0; + ur_result_t Err = mapCLErrorToUR(clGetSamplerInfo( + hSampler->get(), SamplerInfo, propSize, pPropValue, &CheckPropSize)); + if (pPropValue && CheckPropSize != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + CL_RETURN_ON_FAILURE(Err); + if (pPropSizeRet) { + *pPropSizeRet = CheckPropSize; + } + + // Convert OpenCL returns to UR + cl2URSamplerInfoValue(SamplerInfo, pPropValue); } - CL_RETURN_ON_FAILURE(Err); - if (pPropSizeRet) { - *pPropSizeRet = CheckPropSize; } - - // Convert OpenCL returns to UR - cl2URSamplerInfoValue(SamplerInfo, pPropValue); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urSamplerRetain(ur_sampler_handle_t hSampler) { - return mapCLErrorToUR( - clRetainSampler(cl_adapter::cast(hSampler))); + CL_RETURN_ON_FAILURE(clRetainSampler(hSampler->get())); + hSampler->incrementReferenceCount(); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urSamplerRelease(ur_sampler_handle_t hSampler) { - return mapCLErrorToUR( - clReleaseSampler(cl_adapter::cast(hSampler))); + if (hSampler->decrementReferenceCount() == 0) { + delete hSampler; + } else { + CL_RETURN_ON_FAILURE(clRetainSampler(hSampler->get())); + } + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetNativeHandle( ur_sampler_handle_t hSampler, ur_native_handle_t *phNativeSampler) { - *phNativeSampler = reinterpret_cast( - cl_adapter::cast(hSampler)); + *phNativeSampler = reinterpret_cast(hSampler->get()); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( - ur_native_handle_t hNativeSampler, ur_context_handle_t, + ur_native_handle_t hNativeSampler, ur_context_handle_t hContext, const ur_sampler_native_properties_t *pProperties, ur_sampler_handle_t *phSampler) { - *phSampler = reinterpret_cast( - cl_adapter::cast(hNativeSampler)); + cl_sampler NativeHandle = reinterpret_cast(hNativeSampler); + try { + auto URSampler = + std::make_unique(NativeHandle, hContext); + *phSampler = URSampler.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + if (!pProperties || !pProperties->isNativeHandleOwned) { - return urSamplerRetain(*phSampler); + CL_RETURN_ON_FAILURE(clRetainSampler(NativeHandle)); } + return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/sampler.hpp b/source/adapters/opencl/sampler.hpp new file mode 100644 index 0000000000..238ee1cecc --- /dev/null +++ b/source/adapters/opencl/sampler.hpp @@ -0,0 +1,40 @@ +//===--------- sampler.hpp - OpenCL Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include "common.hpp" + +#include + +struct ur_sampler_handle_t_ { + using native_type = cl_sampler; + native_type Sampler; + ur_context_handle_t Context; + std::atomic RefCount = 0; + + ur_sampler_handle_t_(native_type Sampler, ur_context_handle_t Ctx) + : Sampler(Sampler), Context(Ctx) { + RefCount = 1; + urContextRetain(Context); + } + + ~ur_sampler_handle_t_() { + clReleaseSampler(Sampler); + urContextRelease(Context); + } + + uint32_t incrementReferenceCount() noexcept { return ++RefCount; } + + uint32_t decrementReferenceCount() noexcept { return --RefCount; } + + uint32_t getReferenceCount() const noexcept { return RefCount; } + + native_type get() { return Sampler; } +}; diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index 0d64f23d13..57920e4674 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -9,6 +9,11 @@ //===----------------------------------------------------------------------===// #include "common.hpp" +#include "context.hpp" +#include "device.hpp" +#include "event.hpp" +#include "platform.hpp" +#include "queue.hpp" inline cl_mem_alloc_flags_intel hostDescToClFlags(const ur_usm_host_desc_t &desc) { @@ -80,7 +85,7 @@ usmDescToCLMemProperties(const ur_base_desc_t *Desc, } UR_APIEXPORT ur_result_t UR_APICALL -urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, +urUSMHostAlloc(ur_context_handle_t Context, const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t, size_t size, void **ppMem) { void *Ptr = nullptr; @@ -93,24 +98,22 @@ urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, } // First we need to look up the function pointer - clHostMemAllocINTEL_fn FuncPtr = nullptr; - cl_context CLContext = cl_adapter::cast(hContext); - if (auto UrResult = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache, - cl_ext::HostMemAllocName, &FuncPtr)) { - return UrResult; - } - - if (FuncPtr) { - cl_int ClResult = CL_SUCCESS; - Ptr = FuncPtr(CLContext, - AllocProperties.empty() ? nullptr : AllocProperties.data(), - size, Alignment, &ClResult); - if (ClResult == CL_INVALID_BUFFER_SIZE) { - return UR_RESULT_ERROR_INVALID_USM_SIZE; - } - CL_RETURN_ON_FAILURE(ClResult); + cl_context CLContext = Context->get(); + ur_platform_handle_t Platform = Context->getPlatform(); + clHostMemAllocINTEL_fn clHostMemAlloc = + Platform->ExtFuncPtr->clHostMemAllocINTELCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clHostMemAlloc, + cl_ext::HostMemAllocName, + "cl_intel_unified_shared_memory")); + + cl_int ClResult = CL_SUCCESS; + Ptr = clHostMemAlloc( + CLContext, AllocProperties.empty() ? nullptr : AllocProperties.data(), + size, Alignment, &ClResult); + if (ClResult == CL_INVALID_BUFFER_SIZE) { + return UR_RESULT_ERROR_INVALID_USM_SIZE; } + CL_RETURN_ON_FAILURE(ClResult); *ppMem = Ptr; @@ -122,7 +125,7 @@ urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, } UR_APIEXPORT ur_result_t UR_APICALL -urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, +urUSMDeviceAlloc(ur_context_handle_t Context, ur_device_handle_t hDevice, const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t, size_t size, void **ppMem) { @@ -136,24 +139,23 @@ urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, } // First we need to look up the function pointer - clDeviceMemAllocINTEL_fn FuncPtr = nullptr; - cl_context CLContext = cl_adapter::cast(hContext); - if (auto UrResult = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clDeviceMemAllocINTELCache, - cl_ext::DeviceMemAllocName, &FuncPtr)) { - return UrResult; - } - - if (FuncPtr) { - cl_int ClResult = CL_SUCCESS; - Ptr = FuncPtr(CLContext, cl_adapter::cast(hDevice), - AllocProperties.empty() ? nullptr : AllocProperties.data(), - size, Alignment, &ClResult); - if (ClResult == CL_INVALID_BUFFER_SIZE) { - return UR_RESULT_ERROR_INVALID_USM_SIZE; - } - CL_RETURN_ON_FAILURE(ClResult); + cl_context CLContext = Context->get(); + ur_platform_handle_t Platform = hDevice->Platform; + clDeviceMemAllocINTEL_fn clDeviceMemAlloc = + Platform->ExtFuncPtr->clDeviceMemAllocINTELCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clDeviceMemAlloc, + cl_ext::DeviceMemAllocName, + "cl_intel_unified_shared_memory")); + + cl_int ClResult = CL_SUCCESS; + Ptr = clDeviceMemAlloc(CLContext, hDevice->get(), + AllocProperties.empty() ? nullptr + : AllocProperties.data(), + size, Alignment, &ClResult); + if (ClResult == CL_INVALID_BUFFER_SIZE) { + return UR_RESULT_ERROR_INVALID_USM_SIZE; } + CL_RETURN_ON_FAILURE(ClResult); *ppMem = Ptr; @@ -165,7 +167,7 @@ urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, } UR_APIEXPORT ur_result_t UR_APICALL -urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, +urUSMSharedAlloc(ur_context_handle_t Context, ur_device_handle_t hDevice, const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t, size_t size, void **ppMem) { @@ -179,24 +181,23 @@ urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, } // First we need to look up the function pointer - clSharedMemAllocINTEL_fn FuncPtr = nullptr; - cl_context CLContext = cl_adapter::cast(hContext); - if (auto UrResult = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clSharedMemAllocINTELCache, - cl_ext::SharedMemAllocName, &FuncPtr)) { - return UrResult; - } - - if (FuncPtr) { - cl_int ClResult = CL_SUCCESS; - Ptr = FuncPtr(CLContext, cl_adapter::cast(hDevice), - AllocProperties.empty() ? nullptr : AllocProperties.data(), - size, Alignment, cl_adapter::cast(&ClResult)); - if (ClResult == CL_INVALID_BUFFER_SIZE) { - return UR_RESULT_ERROR_INVALID_USM_SIZE; - } - CL_RETURN_ON_FAILURE(ClResult); + cl_context CLContext = Context->get(); + ur_platform_handle_t Platform = hDevice->Platform; + clSharedMemAllocINTEL_fn clSharedMemAlloc = + Platform->ExtFuncPtr->clSharedMemAllocINTELCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clSharedMemAlloc, + cl_ext::SharedMemAllocName, + "cl_intel_unified_shared_memory")); + + cl_int ClResult = CL_SUCCESS; + Ptr = clSharedMemAlloc( + CLContext, hDevice->get(), + AllocProperties.empty() ? nullptr : AllocProperties.data(), size, + Alignment, cl_adapter::cast(&ClResult)); + if (ClResult == CL_INVALID_BUFFER_SIZE) { + return UR_RESULT_ERROR_INVALID_USM_SIZE; } + CL_RETURN_ON_FAILURE(ClResult); *ppMem = Ptr; @@ -206,24 +207,20 @@ urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMFree(ur_context_handle_t hContext, +UR_APIEXPORT ur_result_t UR_APICALL urUSMFree(ur_context_handle_t Context, void *pMem) { // Use a blocking free to avoid issues with indirect access from kernels that // might be still running. - clMemBlockingFreeINTEL_fn FuncPtr = nullptr; - - cl_context CLContext = cl_adapter::cast(hContext); - ur_result_t RetVal = UR_RESULT_ERROR_INVALID_OPERATION; - RetVal = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clMemBlockingFreeINTELCache, - cl_ext::MemBlockingFreeName, &FuncPtr); - - if (FuncPtr) { - RetVal = mapCLErrorToUR(FuncPtr(CLContext, pMem)); - } - - return RetVal; + cl_context CLContext = Context->get(); + ur_platform_handle_t Platform = Context->getPlatform(); + clMemBlockingFreeINTEL_fn clMemBlockingFree = + Platform->ExtFuncPtr->clMemBlockingFreeINTELCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clMemBlockingFree, + cl_ext::MemBlockingFreeName, + "cl_intel_unified_shared_memory")); + + return mapCLErrorToUR(clMemBlockingFree(CLContext, pMem)); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( @@ -231,46 +228,55 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( const void *pPattern, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { // Have to look up the context from the kernel - cl_context CLContext; - cl_int CLErr = clGetCommandQueueInfo( - cl_adapter::cast(hQueue), CL_QUEUE_CONTEXT, - sizeof(cl_context), &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return mapCLErrorToUR(CLErr); - } - + cl_context CLContext = hQueue->Context->get(); + ur_platform_handle_t Platform = hQueue->Context->getPlatform(); if (patternSize <= 128) { - clEnqueueMemFillINTEL_fn EnqueueMemFill = nullptr; + clEnqueueMemFillINTEL_fn EnqueueMemFill = + Platform->ExtFuncPtr->clEnqueueMemFillINTELCache; UR_RETURN_ON_FAILURE( - cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clEnqueueMemFillINTELCache, - cl_ext::EnqueueMemFillName, &EnqueueMemFill)); - - CL_RETURN_ON_FAILURE( - EnqueueMemFill(cl_adapter::cast(hQueue), ptr, - pPattern, patternSize, size, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + Platform->getExtFunc(&EnqueueMemFill, cl_ext::EnqueueMemFillName, + "cl_intel_unified_shared_memory")); + + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } + CL_RETURN_ON_FAILURE(EnqueueMemFill(hQueue->get(), ptr, pPattern, + patternSize, size, numEventsInWaitList, + CLWaitEvents.data(), &Event)); + if (phEvent) { + try { + auto UREvent = std::make_unique( + Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } return UR_RESULT_SUCCESS; } // OpenCL only supports pattern sizes as large as the largest CL type // (double16/long16 - 128 bytes), anything larger we need to do on the host // side and copy it into the target allocation. - clHostMemAllocINTEL_fn HostMemAlloc = nullptr; - UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache, - cl_ext::HostMemAllocName, &HostMemAlloc)); - - clEnqueueMemcpyINTEL_fn USMMemcpy = nullptr; - UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clEnqueueMemcpyINTELCache, - cl_ext::EnqueueMemcpyName, &USMMemcpy)); - - clMemBlockingFreeINTEL_fn USMFree = nullptr; - UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clMemBlockingFreeINTELCache, - cl_ext::MemBlockingFreeName, &USMFree)); + clHostMemAllocINTEL_fn HostMemAlloc = + Platform->ExtFuncPtr->clHostMemAllocINTELCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&HostMemAlloc, + cl_ext::HostMemAllocName, + "cl_intel_unified_shared_memory")); + + clEnqueueMemcpyINTEL_fn USMMemcpy = + Platform->ExtFuncPtr->clEnqueueMemcpyINTELCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc( + &USMMemcpy, cl_ext::EnqueueMemcpyName, "cl_intel_unified_shared_memory")); + + clMemBlockingFreeINTEL_fn USMFree = + Platform->ExtFuncPtr->clMemBlockingFreeINTELCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc( + &USMFree, cl_ext::MemBlockingFreeName, "cl_intel_unified_shared_memory")); cl_int ClErr = CL_SUCCESS; auto HostBuffer = static_cast( @@ -284,10 +290,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( } cl_event CopyEvent = nullptr; - CL_RETURN_ON_FAILURE(USMMemcpy( - cl_adapter::cast(hQueue), false, ptr, HostBuffer, size, - numEventsInWaitList, cl_adapter::cast(phEventWaitList), - &CopyEvent)); + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } + CL_RETURN_ON_FAILURE(USMMemcpy(hQueue->get(), false, ptr, HostBuffer, size, + numEventsInWaitList, CLWaitEvents.data(), + &CopyEvent)); struct DeleteCallbackInfo { DeleteCallbackInfo(clMemBlockingFreeINTEL_fn USMFree, cl_context CLContext, @@ -324,7 +333,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( CL_RETURN_ON_FAILURE(ClErr); } if (phEvent) { - *phEvent = cl_adapter::cast(CopyEvent); + try { + auto UREvent = std::make_unique( + CopyEvent, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } else { CL_RETURN_ON_FAILURE(clReleaseEvent(CopyEvent)); } @@ -338,25 +355,31 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { // Have to look up the context from the kernel - cl_context CLContext; - cl_int CLErr = clGetCommandQueueInfo( - cl_adapter::cast(hQueue), CL_QUEUE_CONTEXT, - sizeof(cl_context), &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return mapCLErrorToUR(CLErr); - } - - clEnqueueMemcpyINTEL_fn FuncPtr = nullptr; - ur_result_t RetVal = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clEnqueueMemcpyINTELCache, - cl_ext::EnqueueMemcpyName, &FuncPtr); - - if (FuncPtr) { - RetVal = mapCLErrorToUR( - FuncPtr(cl_adapter::cast(hQueue), blocking, pDst, - pSrc, size, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + ur_platform_handle_t Platform = hQueue->Context->getPlatform(); + clEnqueueMemcpyINTEL_fn clEnqueueMemcpy = + Platform->ExtFuncPtr->clEnqueueMemcpyINTELCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clEnqueueMemcpy, + cl_ext::EnqueueMemcpyName, + "cl_intel_unified_shared_memory")); + + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } + ur_result_t RetVal = mapCLErrorToUR( + clEnqueueMemcpy(hQueue->get(), blocking, pDst, pSrc, size, + numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } return RetVal; @@ -368,23 +391,29 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( [[maybe_unused]] ur_usm_migration_flags_t flags, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - - return mapCLErrorToUR(clEnqueueMarkerWithWaitList( - cl_adapter::cast(hQueue), numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } + CL_RETURN_ON_FAILURE(clEnqueueMarkerWithWaitList( + hQueue->get(), numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } + return UR_RESULT_SUCCESS; /* // Use this once impls support it. // Have to look up the context from the kernel - cl_context CLContext; - cl_int CLErr = - clGetCommandQueueInfo(cl_adapter::cast(hQueue), - CL_QUEUE_CONTEXT, sizeof(cl_context), - &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return map_cl_error_to_ur(CLErr); - } + cl_context CLContext = hQueue->Context; clEnqueueMigrateMemINTEL_fn FuncPtr; ur_result_t Err = cl_ext::getExtFuncFromContext( @@ -395,7 +424,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( RetVal = Err; } else { RetVal = map_cl_error_to_ur( - FuncPtr(cl_adapter::cast(hQueue), pMem, size, flags, + FuncPtr(hQueue->get(), pMem, size, flags, numEventsInWaitList, reinterpret_cast(phEventWaitList), reinterpret_cast(phEvent))); @@ -407,23 +436,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMAdvise( ur_queue_handle_t hQueue, [[maybe_unused]] const void *pMem, [[maybe_unused]] size_t size, [[maybe_unused]] ur_usm_advice_flags_t advice, ur_event_handle_t *phEvent) { - - return mapCLErrorToUR(clEnqueueMarkerWithWaitList( - cl_adapter::cast(hQueue), 0, nullptr, - reinterpret_cast(phEvent))); - + cl_event Event; + CL_RETURN_ON_FAILURE( + clEnqueueMarkerWithWaitList(hQueue->get(), 0, nullptr, &Event)); + if (phEvent) { + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } + return UR_RESULT_SUCCESS; /* // Change to use this once drivers support it. // Have to look up the context from the kernel - cl_context CLContext; - cl_int CLErr = - clGetCommandQueueInfo(cl_adapter::cast(hQueue), - CL_QUEUE_CONTEXT, - sizeof(cl_context), - &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return map_cl_error_to_ur(CLErr); - } + cl_context CLContext = hQueue->Context; clEnqueueMemAdviseINTEL_fn FuncPtr; ur_result_t Err = @@ -435,7 +466,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMAdvise( RetVal = Err; } else { RetVal = - map_cl_error_to_ur(FuncPtr(cl_adapter::cast(hQueue), pMem, + map_cl_error_to_ur(FuncPtr(hQueue->get(), pMem, size, advice, 0, nullptr, reinterpret_cast(phEvent))); } */ @@ -457,29 +488,26 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( const void *pSrc, size_t srcPitch, size_t width, size_t height, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - cl_context CLContext; - CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( - cl_adapter::cast(hQueue), CL_QUEUE_CONTEXT, - sizeof(cl_context), &CLContext, nullptr)); - - clEnqueueMemcpyINTEL_fn FuncPtr = nullptr; - ur_result_t RetVal = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clEnqueueMemcpyINTELCache, - cl_ext::EnqueueMemcpyName, &FuncPtr); - if (!FuncPtr) { - return RetVal; - } + ur_platform_handle_t Platform = hQueue->Context->getPlatform(); + clEnqueueMemcpyINTEL_fn clEnqueueMemcpy = + Platform->ExtFuncPtr->clEnqueueMemcpyINTELCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clEnqueueMemcpy, + cl_ext::EnqueueMemcpyName, + "cl_intel_unified_shared_memory")); std::vector Events(height); for (size_t HeightIndex = 0; HeightIndex < height; HeightIndex++) { cl_event Event = nullptr; - auto ClResult = - FuncPtr(cl_adapter::cast(hQueue), false, - static_cast(pDst) + dstPitch * HeightIndex, - static_cast(pSrc) + srcPitch * HeightIndex, - width, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), &Event); + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } + auto ClResult = clEnqueueMemcpy( + hQueue->get(), false, + static_cast(pDst) + dstPitch * HeightIndex, + static_cast(pSrc) + srcPitch * HeightIndex, width, + numEventsInWaitList, CLWaitEvents.data(), &Event); Events[HeightIndex] = Event; if (ClResult != CL_SUCCESS) { for (const auto &E : Events) { @@ -493,9 +521,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( ClResult = clWaitForEvents(Events.size(), Events.data()); } if (phEvent && ClResult == CL_SUCCESS) { - ClResult = clEnqueueBarrierWithWaitList( - cl_adapter::cast(hQueue), Events.size(), - Events.data(), cl_adapter::cast(phEvent)); + cl_event Event; + ClResult = clEnqueueBarrierWithWaitList(hQueue->get(), Events.size(), + Events.data(), &Event); + if (phEvent) { + try { + auto UREvent = std::make_unique( + Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } } for (const auto &E : Events) { CL_RETURN_ON_FAILURE(clReleaseEvent(E)); @@ -519,16 +558,16 @@ mapCLUSMTypeToUR(const cl_unified_shared_memory_type_intel &Type) { } } -UR_APIEXPORT ur_result_t UR_APICALL -urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, - ur_usm_alloc_info_t propName, size_t propSize, - void *pPropValue, size_t *pPropSizeRet) { +UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo( + ur_context_handle_t Context, const void *pMem, ur_usm_alloc_info_t propName, + size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - clGetMemAllocInfoINTEL_fn GetMemAllocInfo = nullptr; - cl_context CLContext = cl_adapter::cast(hContext); - UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clGetMemAllocInfoINTELCache, - cl_ext::GetMemAllocInfoName, &GetMemAllocInfo)); + ur_platform_handle_t Platform = Context->getPlatform(); + clGetMemAllocInfoINTEL_fn GetMemAllocInfo = + Platform->ExtFuncPtr->clGetMemAllocInfoINTELCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&GetMemAllocInfo, + cl_ext::GetMemAllocInfoName, + "cl_intel_unified_shared_memory")); cl_mem_info_intel PropNameCL; switch (propName) { @@ -547,11 +586,13 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, default: return UR_RESULT_ERROR_INVALID_VALUE; } - + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + if (propName == UR_USM_ALLOC_INFO_DEVICE) { + return ReturnValue(Context->Devices[0]); + } size_t CheckPropSize = 0; - cl_int ClErr = - GetMemAllocInfo(cl_adapter::cast(hContext), pMem, PropNameCL, - propSize, pPropValue, &CheckPropSize); + cl_int ClErr = GetMemAllocInfo(Context->get(), pMem, PropNameCL, propSize, + pPropValue, &CheckPropSize); if (pPropValue && CheckPropSize != propSize) { return UR_RESULT_ERROR_INVALID_SIZE; }