From dc9661fc17cc76b5b791902c550ce599a1dd59c5 Mon Sep 17 00:00:00 2001 From: omarahmed1111 Date: Thu, 30 Nov 2023 15:36:16 +0000 Subject: [PATCH 01/19] Add platform handle to opencl --- source/adapters/opencl/adapter.cpp | 9 +- source/adapters/opencl/device.cpp | 2 +- source/adapters/opencl/platform.cpp | 34 ++++- source/adapters/opencl/platform.hpp | 188 ++++++++++++++++++++++++++++ 4 files changed, 219 insertions(+), 14 deletions(-) diff --git a/source/adapters/opencl/adapter.cpp b/source/adapters/opencl/adapter.cpp index 8ae1e77755..31b564ae35 100644 --- a/source/adapters/opencl/adapter.cpp +++ b/source/adapters/opencl/adapter.cpp @@ -22,10 +22,7 @@ urAdapterGet(uint32_t NumEntries, ur_adapter_handle_t *phAdapters, uint32_t *pNumAdapters) { if (NumEntries > 0 && phAdapters) { std::lock_guard Lock{adapter.Mutex}; - if (adapter.RefCount++ == 0) { - cl_ext::ExtFuncPtrCache = std::make_unique(); - } - + adapter.RefCount++; *phAdapters = &adapter; } @@ -43,9 +40,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) { UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) { std::lock_guard Lock{adapter.Mutex}; - if (--adapter.RefCount == 0) { - cl_ext::ExtFuncPtrCache.reset(); - } + --adapter.RefCount; return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index 115b9b2e09..d8f01e2261 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -81,7 +81,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform, return UR_RESULT_ERROR_INVALID_ENUMERATION; } - cl_int Result = clGetDeviceIDs(cl_adapter::cast(hPlatform), + cl_int Result = clGetDeviceIDs(hPlatform->get(), Type, cl_adapter::cast(NumEntries), cl_adapter::cast(phDevices), cl_adapter::cast(pNumDevices)); diff --git a/source/adapters/opencl/platform.cpp b/source/adapters/opencl/platform.cpp index 7188a3e8f0..edfbe9237a 100644 --- a/source/adapters/opencl/platform.cpp +++ b/source/adapters/opencl/platform.cpp @@ -28,6 +28,23 @@ ur_result_t cl_adapter::getPlatformVersion(cl_platform_id Plat, return UR_RESULT_SUCCESS; } +ur_result_t ur_platform_handle_t_::getPlatformVersion(oclv::OpenCLVersion &Version) { + + size_t PlatVerSize = 0; + CL_RETURN_ON_FAILURE( + clGetPlatformInfo(Platform, CL_PLATFORM_VERSION, 0, nullptr, &PlatVerSize)); + + std::string PlatVer(PlatVerSize, '\0'); + CL_RETURN_ON_FAILURE(clGetPlatformInfo(Platform, CL_PLATFORM_VERSION, PlatVerSize, + PlatVer.data(), nullptr)); + + Version = oclv::OpenCLVersion(PlatVer); + if (!Version.isValid()) { + return UR_RESULT_ERROR_INVALID_PLATFORM; + } + + return UR_RESULT_SUCCESS; +} static cl_int mapURPlatformInfoToCL(ur_platform_info_t URPropName) { @@ -63,7 +80,7 @@ urPlatformGetInfo(ur_platform_handle_t hPlatform, ur_platform_info_t propName, case UR_PLATFORM_INFO_EXTENSIONS: case UR_PLATFORM_INFO_PROFILE: { CL_RETURN_ON_FAILURE( - clGetPlatformInfo(cl_adapter::cast(hPlatform), + clGetPlatformInfo(hPlatform->get(), CLPropName, propSize, pPropValue, pSizeRet)); return UR_RESULT_SUCCESS; } @@ -83,11 +100,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, ur_platform_handle_t *phPlatforms, uint32_t *pNumPlatforms) { + std::vector CLPlatforms(NumEntries); cl_int Result = clGetPlatformIDs(cl_adapter::cast(NumEntries), - cl_adapter::cast(phPlatforms), + CLPlatforms.data(), cl_adapter::cast(pNumPlatforms)); - /* Absorb the CL_PLATFORM_NOT_FOUND_KHR and just return 0 in num_platforms */ if (Result == CL_PLATFORM_NOT_FOUND_KHR) { Result = CL_SUCCESS; @@ -95,20 +112,25 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, *pNumPlatforms = 0; } } - + if (NumEntries) { + for (uint32_t i = 0; i < NumEntries; i++) { + phPlatforms[i] = new ur_platform_handle_t_(CLPlatforms[i]); + } + } return mapCLErrorToUR(Result); } UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetNativeHandle( ur_platform_handle_t hPlatform, ur_native_handle_t *phNativePlatform) { - *phNativePlatform = reinterpret_cast(hPlatform); + *phNativePlatform = reinterpret_cast(hPlatform->get()); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( ur_native_handle_t hNativePlatform, const ur_platform_native_properties_t *, ur_platform_handle_t *phPlatform) { - *phPlatform = reinterpret_cast(hNativePlatform); + cl_platform_id NativeHandle = reinterpret_cast(hNativePlatform); + *phPlatform = new ur_platform_handle_t_(NativeHandle); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/platform.hpp b/source/adapters/opencl/platform.hpp index 31fd69de14..86eab8ad48 100644 --- a/source/adapters/opencl/platform.hpp +++ b/source/adapters/opencl/platform.hpp @@ -14,4 +14,192 @@ namespace cl_adapter { ur_result_t getPlatformVersion(cl_platform_id Plat, oclv::OpenCLVersion &Version); + +// Older versions of GCC don't like "const" here +#if defined(__GNUC__) && (__GNUC__ < 7 || (__GNU__C == 7 && __GNUC_MINOR__ < 2)) +#define CONSTFIX constexpr +#else +#define CONSTFIX const +#endif + +// Names of USM functions that are queried from OpenCL +CONSTFIX char HostMemAllocName[] = "clHostMemAllocINTEL"; +CONSTFIX char DeviceMemAllocName[] = "clDeviceMemAllocINTEL"; +CONSTFIX char SharedMemAllocName[] = "clSharedMemAllocINTEL"; +CONSTFIX char MemBlockingFreeName[] = "clMemBlockingFreeINTEL"; +CONSTFIX char CreateBufferWithPropertiesName[] = + "clCreateBufferWithPropertiesINTEL"; +CONSTFIX char SetKernelArgMemPointerName[] = "clSetKernelArgMemPointerINTEL"; +CONSTFIX char EnqueueMemFillName[] = "clEnqueueMemFillINTEL"; +CONSTFIX char EnqueueMemcpyName[] = "clEnqueueMemcpyINTEL"; +CONSTFIX char GetMemAllocInfoName[] = "clGetMemAllocInfoINTEL"; +CONSTFIX char SetProgramSpecializationConstantName[] = + "clSetProgramSpecializationConstant"; +CONSTFIX char GetDeviceFunctionPointerName[] = + "clGetDeviceFunctionPointerINTEL"; +CONSTFIX char EnqueueWriteGlobalVariableName[] = + "clEnqueueWriteGlobalVariableINTEL"; +CONSTFIX char EnqueueReadGlobalVariableName[] = + "clEnqueueReadGlobalVariableINTEL"; +// Names of host pipe functions queried from OpenCL +CONSTFIX char EnqueueReadHostPipeName[] = "clEnqueueReadHostPipeINTEL"; +CONSTFIX char EnqueueWriteHostPipeName[] = "clEnqueueWriteHostPipeINTEL"; +// Names of command buffer functions queried from OpenCL +CONSTFIX char CreateCommandBufferName[] = "clCreateCommandBufferKHR"; +CONSTFIX char RetainCommandBufferName[] = "clRetainCommandBufferKHR"; +CONSTFIX char ReleaseCommandBufferName[] = "clReleaseCommandBufferKHR"; +CONSTFIX char FinalizeCommandBufferName[] = "clFinalizeCommandBufferKHR"; +CONSTFIX char CommandNRRangeKernelName[] = "clCommandNDRangeKernelKHR"; +CONSTFIX char CommandCopyBufferName[] = "clCommandCopyBufferKHR"; +CONSTFIX char CommandCopyBufferRectName[] = "clCommandCopyBufferRectKHR"; +CONSTFIX char CommandFillBufferName[] = "clCommandFillBufferKHR"; +CONSTFIX char EnqueueCommandBufferName[] = "clEnqueueCommandBufferKHR"; + +#undef CONSTFIX + +using clGetDeviceFunctionPointer_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_device_id device, cl_program program, + const char *FuncName, cl_ulong *ret_ptr); + +using clEnqueueWriteGlobalVariable_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_command_queue, cl_program, const char *, cl_bool, + size_t, size_t, const void *, cl_uint, const cl_event *, + cl_event *); + +using clEnqueueReadGlobalVariable_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_command_queue, cl_program, const char *, cl_bool, + size_t, size_t, void *, cl_uint, const cl_event *, + cl_event *); + +using clSetProgramSpecializationConstant_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_program program, cl_uint spec_id, size_t spec_size, + const void *spec_value); + +using clEnqueueReadHostPipeINTEL_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_command_queue queue, cl_program program, + const char *pipe_symbol, cl_bool blocking, void *ptr, + size_t size, cl_uint num_events_in_waitlist, + const cl_event *events_waitlist, cl_event *event); + +using clEnqueueWriteHostPipeINTEL_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_command_queue queue, cl_program program, + const char *pipe_symbol, cl_bool blocking, + const void *ptr, size_t size, + cl_uint num_events_in_waitlist, + const cl_event *events_waitlist, cl_event *event); + +using clCreateCommandBufferKHR_fn = CL_API_ENTRY cl_command_buffer_khr( + CL_API_CALL *)(cl_uint num_queues, const cl_command_queue *queues, + const cl_command_buffer_properties_khr *properties, + cl_int *errcode_ret); + +using clRetainCommandBufferKHR_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_command_buffer_khr command_buffer); + +using clReleaseCommandBufferKHR_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_command_buffer_khr command_buffer); + +using clFinalizeCommandBufferKHR_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_command_buffer_khr command_buffer); + +using clCommandNDRangeKernelKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)( + cl_command_buffer_khr command_buffer, cl_command_queue command_queue, + const cl_ndrange_kernel_command_properties_khr *properties, + cl_kernel kernel, cl_uint work_dim, const size_t *global_work_offset, + const size_t *global_work_size, const size_t *local_work_size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr *sync_point_wait_list, + cl_sync_point_khr *sync_point, cl_mutable_command_khr *mutable_handle); + +using clCommandCopyBufferKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)( + cl_command_buffer_khr command_buffer, cl_command_queue command_queue, + cl_mem src_buffer, cl_mem dst_buffer, size_t src_offset, size_t dst_offset, + size_t size, cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr *sync_point_wait_list, + cl_sync_point_khr *sync_point, cl_mutable_command_khr *mutable_handle); + +using clCommandCopyBufferRectKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)( + cl_command_buffer_khr command_buffer, cl_command_queue command_queue, + cl_mem src_buffer, cl_mem dst_buffer, const size_t *src_origin, + const size_t *dst_origin, const size_t *region, size_t src_row_pitch, + size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr *sync_point_wait_list, + cl_sync_point_khr *sync_point, cl_mutable_command_khr *mutable_handle); + +using clCommandFillBufferKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)( + cl_command_buffer_khr command_buffer, cl_command_queue command_queue, + cl_mem buffer, const void *pattern, size_t pattern_size, size_t offset, + size_t size, cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr *sync_point_wait_list, + cl_sync_point_khr *sync_point, cl_mutable_command_khr *mutable_handle); + +using clEnqueueCommandBufferKHR_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_uint num_queues, cl_command_queue *queues, + cl_command_buffer_khr command_buffer, + cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + +struct ExtFuncPtrT { + clHostMemAllocINTEL_fn clHostMemAllocINTELCache; + clDeviceMemAllocINTEL_fn clDeviceMemAllocINTELCache; + clSharedMemAllocINTEL_fn clSharedMemAllocINTELCache; + clGetDeviceFunctionPointer_fn clGetDeviceFunctionPointerCache; + clCreateBufferWithPropertiesINTEL_fn + clCreateBufferWithPropertiesINTELCache; + clMemBlockingFreeINTEL_fn clMemBlockingFreeINTELCache; + clSetKernelArgMemPointerINTEL_fn + clSetKernelArgMemPointerINTELCache; + clEnqueueMemFillINTEL_fn clEnqueueMemFillINTELCache; + clEnqueueMemcpyINTEL_fn clEnqueueMemcpyINTELCache; + clGetMemAllocInfoINTEL_fn clGetMemAllocInfoINTELCache; + clEnqueueWriteGlobalVariable_fn + clEnqueueWriteGlobalVariableCache; + clEnqueueReadGlobalVariable_fn clEnqueueReadGlobalVariableCache; + clEnqueueReadHostPipeINTEL_fn clEnqueueReadHostPipeINTELCache; + clEnqueueWriteHostPipeINTEL_fn clEnqueueWriteHostPipeINTELCache; + clSetProgramSpecializationConstant_fn + clSetProgramSpecializationConstantCache; + clCreateCommandBufferKHR_fn clCreateCommandBufferKHRCache; + clRetainCommandBufferKHR_fn clRetainCommandBufferKHRCache; + clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHRCache; + clFinalizeCommandBufferKHR_fn clFinalizeCommandBufferKHRCache; + clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHRCache; + clCommandCopyBufferKHR_fn clCommandCopyBufferKHRCache; + clCommandCopyBufferRectKHR_fn clCommandCopyBufferRectKHRCache; + clCommandFillBufferKHR_fn clCommandFillBufferKHRCache; + clEnqueueCommandBufferKHR_fn clEnqueueCommandBufferKHRCache; +}; } + +struct ur_platform_handle_t_ { + using native_type = cl_platform_id; + native_type Platform; + std::unique_ptr ExtFuncPtr; + + ur_platform_handle_t_(native_type Plat):Platform(Plat) { + std::make_unique(); + } + + ~ur_platform_handle_t_() { + ExtFuncPtr.reset(); + } + + ur_result_t getPlatformVersion(oclv::OpenCLVersion &Version); + + template + ur_result_t getExtFunc(T CachedExtFunc, const char *FuncName, T *Fptr) { + if (!CachedExtFunc) { + // TODO: check that the function is available + CachedExtFunc = reinterpret_cast( + clGetExtensionFunctionAddressForPlatform(Platform, FuncName)); + if (!CachedExtFunc) { + return UR_RESULT_ERROR_INVALID_VALUE; + } + } + *Fptr = CachedExtFunc; + return UR_RESULT_SUCCESS; + } + + native_type get() { return Platform; } +}; From d36b13ba7a11852121212a886056840f86155e7d Mon Sep 17 00:00:00 2001 From: omarahmed1111 Date: Thu, 30 Nov 2023 17:48:46 +0000 Subject: [PATCH 02/19] Add device handle to opencl --- source/adapters/opencl/context.hpp | 19 ++++ source/adapters/opencl/device.cpp | 159 ++++++++++++---------------- source/adapters/opencl/device.hpp | 13 +++ source/adapters/opencl/kernel.cpp | 19 ++-- source/adapters/opencl/platform.cpp | 39 +++---- source/adapters/opencl/platform.hpp | 64 +++++------ source/adapters/opencl/program.cpp | 18 ++-- source/adapters/opencl/queue.cpp | 18 ++-- source/adapters/opencl/usm.cpp | 13 +-- 9 files changed, 175 insertions(+), 187 deletions(-) diff --git a/source/adapters/opencl/context.hpp b/source/adapters/opencl/context.hpp index 5319f68b55..2cbdb2a292 100644 --- a/source/adapters/opencl/context.hpp +++ b/source/adapters/opencl/context.hpp @@ -16,3 +16,22 @@ ur_result_t getDevicesFromContext(ur_context_handle_t hContext, std::unique_ptr> &DevicesInCtx); } + +// struct ur_context_handle_t_ { +// using native_type = cl_context; +// native_type Context; +// std::atomic_uint32_t RefCount; +// ur_platform_handle_t Platform; + +// ur_context_handle_t_(native_type Ctx):Context(Ctx) {} + +// ~ur_context_handle_t_() {} + +// native_type get() { return Context; } + +// uint32_t incrementReferenceCount() noexcept { return ++RefCount; } + +// uint32_t decrementReferenceCount() noexcept { return --RefCount; } + +// uint32_t getReferenceCount() const noexcept { return RefCount; } +// }; diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index d8f01e2261..d76192a4e0 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -75,16 +75,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform, Type = CL_DEVICE_TYPE_ACCELERATOR; break; case UR_DEVICE_TYPE_DEFAULT: - Type = UR_DEVICE_TYPE_DEFAULT; + Type = CL_DEVICE_TYPE_DEFAULT; break; default: return UR_RESULT_ERROR_INVALID_ENUMERATION; } - cl_int Result = clGetDeviceIDs(hPlatform->get(), - Type, cl_adapter::cast(NumEntries), - cl_adapter::cast(phDevices), - cl_adapter::cast(pNumDevices)); + std::vector CLDevices(NumEntries); + cl_int Result = clGetDeviceIDs( + hPlatform->get(), Type, cl_adapter::cast(NumEntries), + CLDevices.data(), cl_adapter::cast(pNumDevices)); // Absorb the CL_DEVICE_NOT_FOUND and just return 0 in num_devices if (Result == CL_DEVICE_NOT_FOUND) { @@ -93,6 +93,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform, *pNumDevices = 0; } } + if (NumEntries && phDevices) { + for (uint32_t i = 0; i < NumEntries; i++) { + phDevices[i] = new ur_device_handle_t_(CLDevices[i], hPlatform); + } + } return mapCLErrorToUR(Result); } @@ -325,9 +330,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, switch (static_cast(propName)) { case UR_DEVICE_INFO_TYPE: { cl_device_type CLType; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, - sizeof(cl_device_type), &CLType, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo( + hDevice->get(), CLPropName, sizeof(cl_device_type), &CLType, nullptr)); /* TODO UR: If the device is an Accelerator (FPGA, VPU, etc.), there is not * enough information in the OpenCL runtime to know exactly which type it @@ -348,24 +352,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_DEVICE_ID: { bool Supported = false; CL_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( - cl_adapter::cast(hDevice), {"cl_khr_pci_bus_info"}, - Supported)); + hDevice->get(), {"cl_khr_pci_bus_info"}, Supported)); if (!Supported) { return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } cl_device_pci_bus_info_khr PciInfo = {}; - CL_RETURN_ON_FAILURE(clGetDeviceInfo( - cl_adapter::cast(hDevice), CL_DEVICE_PCI_BUS_INFO_KHR, - sizeof(PciInfo), &PciInfo, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), + CL_DEVICE_PCI_BUS_INFO_KHR, + sizeof(PciInfo), &PciInfo, nullptr)); return ReturnValue(PciInfo.pci_device); } case UR_DEVICE_INFO_BACKEND_RUNTIME_VERSION: { oclv::OpenCLVersion Version; - CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( - cl_adapter::cast(hDevice), Version)); + CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion(hDevice->get(), Version)); const std::string Results = std::to_string(Version.getMajor()) + "." + std::to_string(Version.getMinor()); @@ -374,14 +376,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_SUPPORTED_PARTITIONS: { size_t CLSize; CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, 0, - nullptr, &CLSize)); + clGetDeviceInfo(hDevice->get(), CLPropName, 0, nullptr, &CLSize)); const size_t NProperties = CLSize / sizeof(cl_device_partition_property); std::vector CLValue(NProperties); - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, - CLSize, CLValue.data(), nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), CLPropName, CLSize, + CLValue.data(), nullptr)); /* The OpenCL implementation returns a value of 0 if no properties are * supported. UR will return a size of 0 for now. @@ -403,8 +403,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, size_t CLSize; CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, 0, - nullptr, &CLSize)); + clGetDeviceInfo(hDevice->get(), CLPropName, 0, nullptr, &CLSize)); const size_t NProperties = CLSize / sizeof(cl_device_partition_property); /* The OpenCL implementation returns either a size of 0 or a value of 0 if @@ -419,8 +418,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, auto CLValue = reinterpret_cast(alloca(CLSize)); CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, - CLSize, CLValue, nullptr)); + clGetDeviceInfo(hDevice->get(), CLPropName, CLSize, CLValue, nullptr)); std::vector URValue(NProperties - 1); @@ -472,14 +470,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, /* Corresponding OpenCL query is only available starting with OpenCL 2.1 * and we have to emulate it on older OpenCL runtimes. */ oclv::OpenCLVersion DevVer; - CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( - cl_adapter::cast(hDevice), DevVer)); + CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion(hDevice->get(), DevVer)); if (DevVer >= oclv::V2_1) { cl_uint CLValue; - CL_RETURN_ON_FAILURE(clGetDeviceInfo( - cl_adapter::cast(hDevice), CL_DEVICE_MAX_NUM_SUB_GROUPS, - sizeof(cl_uint), &CLValue, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), + CL_DEVICE_MAX_NUM_SUB_GROUPS, + sizeof(cl_uint), &CLValue, nullptr)); if (CLValue == 0u) { /* OpenCL returns 0 if sub-groups are not supported, but SYCL 2020 @@ -503,7 +500,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, if (propName == UR_DEVICE_INFO_HALF_FP_CONFIG) { bool Supported; CL_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( - cl_adapter::cast(hDevice), {"cl_khr_fp16"}, Supported)); + hDevice->get(), {"cl_khr_fp16"}, Supported)); if (!Supported) { return UR_RESULT_ERROR_INVALID_ENUMERATION; @@ -511,9 +508,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } cl_device_fp_config CLValue; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, - sizeof(cl_device_fp_config), &CLValue, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), CLPropName, + sizeof(cl_device_fp_config), &CLValue, + nullptr)); return ReturnValue(mapCLDeviceFpConfigToUR(CLValue)); } @@ -522,8 +519,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, /* This query is missing before OpenCL 3.0. Check version and handle * appropriately */ oclv::OpenCLVersion DevVer; - CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( - cl_adapter::cast(hDevice), DevVer)); + CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion(hDevice->get(), DevVer)); /* Minimum required capability to be returned. For OpenCL 1.2, this is all * that is required */ @@ -534,8 +530,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, /* For OpenCL >=3.0, the query should be implemented */ cl_device_atomic_capabilities CLCapabilities; CL_RETURN_ON_FAILURE(clGetDeviceInfo( - cl_adapter::cast(hDevice), - CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, + hDevice->get(), CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, sizeof(cl_device_atomic_capabilities), &CLCapabilities, nullptr)); /* Mask operation to only consider atomic_memory_order* capabilities */ @@ -581,14 +576,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP; oclv::OpenCLVersion DevVer; - CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( - cl_adapter::cast(hDevice), DevVer)); + CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion(hDevice->get(), DevVer)); cl_device_atomic_capabilities CLCapabilities; if (DevVer >= oclv::V3_0) { CL_RETURN_ON_FAILURE(clGetDeviceInfo( - cl_adapter::cast(hDevice), - CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, + hDevice->get(), CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, sizeof(cl_device_atomic_capabilities), &CLCapabilities, nullptr)); assert((CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) && @@ -634,14 +627,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL; oclv::OpenCLVersion DevVer; - CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( - cl_adapter::cast(hDevice), DevVer)); + CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion(hDevice->get(), DevVer)); cl_device_atomic_capabilities CLCapabilities; if (DevVer >= oclv::V3_0) { CL_RETURN_ON_FAILURE(clGetDeviceInfo( - cl_adapter::cast(hDevice), - CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, + hDevice->get(), CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, sizeof(cl_device_atomic_capabilities), &CLCapabilities, nullptr)); assert((CLCapabilities & CL_DEVICE_ATOMIC_ORDER_RELAXED) && @@ -683,14 +674,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP; oclv::OpenCLVersion DevVer; - CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( - cl_adapter::cast(hDevice), DevVer)); + CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion(hDevice->get(), DevVer)); cl_device_atomic_capabilities CLCapabilities; if (DevVer >= oclv::V3_0) { CL_RETURN_ON_FAILURE(clGetDeviceInfo( - cl_adapter::cast(hDevice), - CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, + hDevice->get(), CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, sizeof(cl_device_atomic_capabilities), &CLCapabilities, nullptr)); assert((CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) && @@ -737,7 +726,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_ATOMIC_64: { bool Supported = false; CL_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( - cl_adapter::cast(hDevice), + hDevice->get(), {"cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics"}, Supported)); @@ -746,31 +735,29 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_BUILD_ON_SUBDEVICE: { cl_device_type DevType = CL_DEVICE_TYPE_DEFAULT; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CL_DEVICE_TYPE, - sizeof(cl_device_type), &DevType, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), CL_DEVICE_TYPE, + sizeof(cl_device_type), &DevType, + nullptr)); return ReturnValue(DevType == CL_DEVICE_TYPE_GPU); } case UR_DEVICE_INFO_MEM_CHANNEL_SUPPORT: { bool Supported = false; CL_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( - cl_adapter::cast(hDevice), - {"cl_intel_mem_channel_property"}, Supported)); + hDevice->get(), {"cl_intel_mem_channel_property"}, Supported)); return ReturnValue(Supported); } case UR_DEVICE_INFO_ESIMD_SUPPORT: { bool Supported = false; cl_device_type DevType = CL_DEVICE_TYPE_DEFAULT; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CL_DEVICE_TYPE, - sizeof(cl_device_type), &DevType, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), CL_DEVICE_TYPE, + sizeof(cl_device_type), &DevType, + nullptr)); cl_uint VendorID = 0; - CL_RETURN_ON_FAILURE(clGetDeviceInfo( - cl_adapter::cast(hDevice), CL_DEVICE_VENDOR_ID, - sizeof(VendorID), &VendorID, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), CL_DEVICE_VENDOR_ID, + sizeof(VendorID), &VendorID, nullptr)); /* ESIMD is only supported by Intel GPUs. */ Supported = DevType == CL_DEVICE_TYPE_GPU && VendorID == 0x8086; @@ -783,8 +770,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_HOST_PIPE_READ_WRITE_SUPPORTED: { bool Supported = false; CL_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( - cl_adapter::cast(hDevice), - {"cl_intel_program_scope_host_pipe"}, Supported)); + hDevice->get(), {"cl_intel_program_scope_host_pipe"}, Supported)); return ReturnValue(Supported); } case UR_DEVICE_INFO_QUEUE_PROPERTIES: @@ -803,9 +789,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * UR type: ur_flags_t (uint32_t) */ cl_bitfield CLValue = 0; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, - sizeof(cl_bitfield), &CLValue, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo( + hDevice->get(), CLPropName, sizeof(cl_bitfield), &CLValue, nullptr)); /* We can just static_cast the output because OpenCL and UR bitfields * map 1 to 1 for these properties. cl_bitfield is uint64_t and ur_flags_t @@ -826,9 +811,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * UR type: ur_bool_t */ cl_bool CLValue; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, - sizeof(cl_bool), &CLValue, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), CLPropName, + sizeof(cl_bool), &CLValue, nullptr)); /* cl_bool is uint32_t and ur_bool_t is bool */ return ReturnValue(static_cast(CLValue)); @@ -901,14 +885,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * | ur_device_handle_t | cl_device_id | 8 | */ - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, - propSize, pPropValue, pPropSizeRet)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), CLPropName, propSize, + pPropValue, pPropSizeRet)); return UR_RESULT_SUCCESS; } case UR_DEVICE_INFO_EXTENSIONS: { - cl_device_id Dev = cl_adapter::cast(hDevice); + cl_device_id Dev = hDevice->get(); size_t ExtSize = 0; CL_RETURN_ON_FAILURE( clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize)); @@ -1017,9 +1000,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( CLProperties[CLProperties.size() - 1] = 0; cl_uint CLNumDevicesRet; - CL_RETURN_ON_FAILURE( - clCreateSubDevices(cl_adapter::cast(hDevice), - CLProperties.data(), 0, nullptr, &CLNumDevicesRet)); + CL_RETURN_ON_FAILURE(clCreateSubDevices(hDevice->get(), CLProperties.data(), + 0, nullptr, &CLNumDevicesRet)); if (pNumDevicesRet) { *pNumDevicesRet = CLNumDevicesRet; @@ -1029,9 +1011,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( * function shall only retrieve that number of sub-devices. */ if (phSubDevices) { std::vector CLSubDevices(CLNumDevicesRet); - CL_RETURN_ON_FAILURE(clCreateSubDevices( - cl_adapter::cast(hDevice), CLProperties.data(), - CLNumDevicesRet, CLSubDevices.data(), nullptr)); + CL_RETURN_ON_FAILURE(clCreateSubDevices(hDevice->get(), CLProperties.data(), + CLNumDevicesRet, + CLSubDevices.data(), nullptr)); std::memcpy(phSubDevices, CLSubDevices.data(), sizeof(cl_device_id) * NumDevices); @@ -1042,7 +1024,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( UR_APIEXPORT ur_result_t UR_APICALL urDeviceRetain(ur_device_handle_t hDevice) { - cl_int Result = clRetainDevice(cl_adapter::cast(hDevice)); + cl_int Result = clRetainDevice(hDevice->get()); return mapCLErrorToUR(Result); } @@ -1050,7 +1032,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceRetain(ur_device_handle_t hDevice) { UR_APIEXPORT ur_result_t UR_APICALL urDeviceRelease(ur_device_handle_t hDevice) { - cl_int Result = clReleaseDevice(cl_adapter::cast(hDevice)); + cl_int Result = clReleaseDevice(hDevice->get()); return mapCLErrorToUR(Result); } @@ -1058,15 +1040,15 @@ urDeviceRelease(ur_device_handle_t hDevice) { UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetNativeHandle( ur_device_handle_t hDevice, ur_native_handle_t *phNativeDevice) { - *phNativeDevice = reinterpret_cast(hDevice); + *phNativeDevice = reinterpret_cast(hDevice->get()); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( - ur_native_handle_t hNativeDevice, ur_platform_handle_t, + ur_native_handle_t hNativeDevice, ur_platform_handle_t hPlatform, const ur_device_native_properties_t *, ur_device_handle_t *phDevice) { - - *phDevice = reinterpret_cast(hNativeDevice); + cl_device_id NativeHandle = reinterpret_cast(hNativeDevice); + *phDevice = new ur_device_handle_t_(NativeHandle, hPlatform); return UR_RESULT_SUCCESS; } @@ -1075,7 +1057,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetGlobalTimestamps( uint64_t *pHostTimestamp) { oclv::OpenCLVersion DevVer, PlatVer; cl_platform_id Platform; - cl_device_id DeviceId = cl_adapter::cast(hDevice); + cl_device_id DeviceId = hDevice->get(); // TODO: Cache OpenCL version for each device and platform auto RetErr = clGetDeviceInfo(DeviceId, CL_DEVICE_PLATFORM, @@ -1125,9 +1107,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceSelectBinary( // Get the type of the device cl_device_type DeviceType; constexpr uint32_t InvalidInd = std::numeric_limits::max(); - cl_int RetErr = - clGetDeviceInfo(cl_adapter::cast(hDevice), CL_DEVICE_TYPE, - sizeof(cl_device_type), &DeviceType, nullptr); + cl_int RetErr = clGetDeviceInfo(hDevice->get(), CL_DEVICE_TYPE, + sizeof(cl_device_type), &DeviceType, nullptr); if (RetErr != CL_SUCCESS) { *pSelectedBinary = InvalidInd; CL_RETURN_ON_FAILURE(RetErr); diff --git a/source/adapters/opencl/device.hpp b/source/adapters/opencl/device.hpp index 548a5012f9..5d30bf72c9 100644 --- a/source/adapters/opencl/device.hpp +++ b/source/adapters/opencl/device.hpp @@ -18,3 +18,16 @@ ur_result_t checkDeviceExtensions(cl_device_id Dev, const std::vector &Exts, bool &Supported); } // namespace cl_adapter + +struct ur_device_handle_t_ { + using native_type = cl_device_id; + native_type Device; + ur_platform_handle_t Platform; + + ur_device_handle_t_(native_type Dev, ur_platform_handle_t Plat) + : Device(Dev), Platform(Plat) {} + + ~ur_device_handle_t_() {} + + native_type get() { return Device; } +}; \ No newline at end of file diff --git a/source/adapters/opencl/kernel.cpp b/source/adapters/opencl/kernel.cpp index 44157b826b..adf87279d0 100644 --- a/source/adapters/opencl/kernel.cpp +++ b/source/adapters/opencl/kernel.cpp @@ -8,6 +8,7 @@ // //===----------------------------------------------------------------------===// #include "common.hpp" +#include "device.hpp" #include #include @@ -135,16 +136,15 @@ urKernelGetGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, // to deter naive use of the query. if (propName == UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE) { cl_device_type ClDeviceType; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CL_DEVICE_TYPE, - sizeof(ClDeviceType), &ClDeviceType, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), CL_DEVICE_TYPE, + sizeof(ClDeviceType), &ClDeviceType, + nullptr)); if (ClDeviceType != CL_DEVICE_TYPE_CUSTOM) { return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } } CL_RETURN_ON_FAILURE(clGetKernelWorkGroupInfo( - cl_adapter::cast(hKernel), - cl_adapter::cast(hDevice), + cl_adapter::cast(hKernel), hDevice->get(), mapURKernelGroupInfoToCL(propName), propSize, pPropValue, pPropSizeRet)); return UR_RESULT_SUCCESS; @@ -197,11 +197,10 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, InputValueSize = MaxDims * sizeof(size_t); } - cl_int Ret = clGetKernelSubGroupInfo(cl_adapter::cast(hKernel), - cl_adapter::cast(hDevice), - mapURKernelSubGroupInfoToCL(propName), - InputValueSize, InputValue.get(), - sizeof(size_t), &RetVal, pPropSizeRet); + cl_int Ret = clGetKernelSubGroupInfo( + cl_adapter::cast(hKernel), hDevice->get(), + mapURKernelSubGroupInfoToCL(propName), InputValueSize, InputValue.get(), + sizeof(size_t), &RetVal, pPropSizeRet); if (Ret == CL_INVALID_OPERATION) { // clGetKernelSubGroupInfo returns CL_INVALID_OPERATION if the device does diff --git a/source/adapters/opencl/platform.cpp b/source/adapters/opencl/platform.cpp index edfbe9237a..e715783ac3 100644 --- a/source/adapters/opencl/platform.cpp +++ b/source/adapters/opencl/platform.cpp @@ -28,23 +28,6 @@ ur_result_t cl_adapter::getPlatformVersion(cl_platform_id Plat, return UR_RESULT_SUCCESS; } -ur_result_t ur_platform_handle_t_::getPlatformVersion(oclv::OpenCLVersion &Version) { - - size_t PlatVerSize = 0; - CL_RETURN_ON_FAILURE( - clGetPlatformInfo(Platform, CL_PLATFORM_VERSION, 0, nullptr, &PlatVerSize)); - - std::string PlatVer(PlatVerSize, '\0'); - CL_RETURN_ON_FAILURE(clGetPlatformInfo(Platform, CL_PLATFORM_VERSION, PlatVerSize, - PlatVer.data(), nullptr)); - - Version = oclv::OpenCLVersion(PlatVer); - if (!Version.isValid()) { - return UR_RESULT_ERROR_INVALID_PLATFORM; - } - - return UR_RESULT_SUCCESS; -} static cl_int mapURPlatformInfoToCL(ur_platform_info_t URPropName) { @@ -79,9 +62,13 @@ urPlatformGetInfo(ur_platform_handle_t hPlatform, ur_platform_info_t propName, case UR_PLATFORM_INFO_VERSION: case UR_PLATFORM_INFO_EXTENSIONS: case UR_PLATFORM_INFO_PROFILE: { - CL_RETURN_ON_FAILURE( - clGetPlatformInfo(hPlatform->get(), - CLPropName, propSize, pPropValue, pSizeRet)); + cl_platform_id Plat = nullptr; + if (hPlatform) { + Plat = hPlatform->get(); + } + CL_RETURN_ON_FAILURE(clGetPlatformInfo(Plat, CLPropName, + propSize, pPropValue, pSizeRet)); + return UR_RESULT_SUCCESS; } default: @@ -101,10 +88,9 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, ur_platform_handle_t *phPlatforms, uint32_t *pNumPlatforms) { std::vector CLPlatforms(NumEntries); - cl_int Result = - clGetPlatformIDs(cl_adapter::cast(NumEntries), - CLPlatforms.data(), - cl_adapter::cast(pNumPlatforms)); + cl_int Result = clGetPlatformIDs(cl_adapter::cast(NumEntries), + CLPlatforms.data(), + cl_adapter::cast(pNumPlatforms)); /* Absorb the CL_PLATFORM_NOT_FOUND_KHR and just return 0 in num_platforms */ if (Result == CL_PLATFORM_NOT_FOUND_KHR) { Result = CL_SUCCESS; @@ -112,7 +98,7 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, *pNumPlatforms = 0; } } - if (NumEntries) { + if (NumEntries && phPlatforms) { for (uint32_t i = 0; i < NumEntries; i++) { phPlatforms[i] = new ur_platform_handle_t_(CLPlatforms[i]); } @@ -129,7 +115,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetNativeHandle( UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( ur_native_handle_t hNativePlatform, const ur_platform_native_properties_t *, ur_platform_handle_t *phPlatform) { - cl_platform_id NativeHandle = reinterpret_cast(hNativePlatform); + cl_platform_id NativeHandle = + reinterpret_cast(hNativePlatform); *phPlatform = new ur_platform_handle_t_(NativeHandle); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/platform.hpp b/source/adapters/opencl/platform.hpp index 86eab8ad48..9de49ac063 100644 --- a/source/adapters/opencl/platform.hpp +++ b/source/adapters/opencl/platform.hpp @@ -145,21 +145,17 @@ struct ExtFuncPtrT { clDeviceMemAllocINTEL_fn clDeviceMemAllocINTELCache; clSharedMemAllocINTEL_fn clSharedMemAllocINTELCache; clGetDeviceFunctionPointer_fn clGetDeviceFunctionPointerCache; - clCreateBufferWithPropertiesINTEL_fn - clCreateBufferWithPropertiesINTELCache; + clCreateBufferWithPropertiesINTEL_fn clCreateBufferWithPropertiesINTELCache; clMemBlockingFreeINTEL_fn clMemBlockingFreeINTELCache; - clSetKernelArgMemPointerINTEL_fn - clSetKernelArgMemPointerINTELCache; + clSetKernelArgMemPointerINTEL_fn clSetKernelArgMemPointerINTELCache; clEnqueueMemFillINTEL_fn clEnqueueMemFillINTELCache; clEnqueueMemcpyINTEL_fn clEnqueueMemcpyINTELCache; clGetMemAllocInfoINTEL_fn clGetMemAllocInfoINTELCache; - clEnqueueWriteGlobalVariable_fn - clEnqueueWriteGlobalVariableCache; + clEnqueueWriteGlobalVariable_fn clEnqueueWriteGlobalVariableCache; clEnqueueReadGlobalVariable_fn clEnqueueReadGlobalVariableCache; clEnqueueReadHostPipeINTEL_fn clEnqueueReadHostPipeINTELCache; clEnqueueWriteHostPipeINTEL_fn clEnqueueWriteHostPipeINTELCache; - clSetProgramSpecializationConstant_fn - clSetProgramSpecializationConstantCache; + clSetProgramSpecializationConstant_fn clSetProgramSpecializationConstantCache; clCreateCommandBufferKHR_fn clCreateCommandBufferKHRCache; clRetainCommandBufferKHR_fn clRetainCommandBufferKHRCache; clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHRCache; @@ -170,36 +166,32 @@ struct ExtFuncPtrT { clCommandFillBufferKHR_fn clCommandFillBufferKHRCache; clEnqueueCommandBufferKHR_fn clEnqueueCommandBufferKHRCache; }; -} +} // namespace cl_adapter struct ur_platform_handle_t_ { - using native_type = cl_platform_id; - native_type Platform; - std::unique_ptr ExtFuncPtr; - - ur_platform_handle_t_(native_type Plat):Platform(Plat) { - std::make_unique(); - } - - ~ur_platform_handle_t_() { - ExtFuncPtr.reset(); - } - - ur_result_t getPlatformVersion(oclv::OpenCLVersion &Version); - - template - ur_result_t getExtFunc(T CachedExtFunc, const char *FuncName, T *Fptr) { - if (!CachedExtFunc) { - // TODO: check that the function is available - CachedExtFunc = reinterpret_cast( - clGetExtensionFunctionAddressForPlatform(Platform, FuncName)); - if (!CachedExtFunc) { - return UR_RESULT_ERROR_INVALID_VALUE; - } - } - *Fptr = CachedExtFunc; - return UR_RESULT_SUCCESS; + using native_type = cl_platform_id; + native_type Platform; + std::unique_ptr ExtFuncPtr; + + ur_platform_handle_t_(native_type Plat) : Platform(Plat) { + std::make_unique(); + } + + ~ur_platform_handle_t_() { ExtFuncPtr.reset(); } + + template + ur_result_t getExtFunc(T CachedExtFunc, const char *FuncName, T *Fptr) { + if (!CachedExtFunc) { + // TODO: check that the function is available + CachedExtFunc = reinterpret_cast( + clGetExtensionFunctionAddressForPlatform(Platform, FuncName)); + if (!CachedExtFunc) { + return UR_RESULT_ERROR_INVALID_VALUE; + } } + *Fptr = CachedExtFunc; + return UR_RESULT_SUCCESS; + } - native_type get() { return Platform; } + native_type get() { return Platform; } }; diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index d76cd0b768..b2a027e640 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -120,7 +120,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( const uint8_t *pBinary, const ur_program_properties_t *, ur_program_handle_t *phProgram) { - const cl_device_id Devices[1] = {cl_adapter::cast(hDevice)}; + const cl_device_id Devices[1] = {hDevice->get()}; const size_t Lengths[1] = {size}; cl_int BinaryStatus[1]; cl_int CLResult; @@ -287,17 +287,16 @@ urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t hDevice, UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); cl_program_binary_type BinaryType; CL_RETURN_ON_FAILURE(clGetProgramBuildInfo( - cl_adapter::cast(hProgram), - cl_adapter::cast(hDevice), + cl_adapter::cast(hProgram), hDevice->get(), mapURProgramBuildInfoToCL(propName), sizeof(cl_program_binary_type), &BinaryType, nullptr)); return ReturnValue(mapCLBinaryTypeToUR(BinaryType)); } size_t CheckPropSize = 0; - cl_int ClErr = clGetProgramBuildInfo(cl_adapter::cast(hProgram), - cl_adapter::cast(hDevice), - mapURProgramBuildInfoToCL(propName), - propSize, pPropValue, &CheckPropSize); + cl_int ClErr = + clGetProgramBuildInfo(cl_adapter::cast(hProgram), + hDevice->get(), mapURProgramBuildInfoToCL(propName), + propSize, pPropValue, &CheckPropSize); if (pPropValue && CheckPropSize != propSize) { return UR_RESULT_ERROR_INVALID_SIZE; } @@ -480,9 +479,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( } const cl_int CLResult = - FuncT(cl_adapter::cast(hDevice), - cl_adapter::cast(hProgram), pFunctionName, - reinterpret_cast(ppFunctionPointer)); + FuncT(hDevice->get(), cl_adapter::cast(hProgram), + pFunctionName, reinterpret_cast(ppFunctionPointer)); // GPU runtime sometimes returns CL_INVALID_ARG_VALUE if the function address // cannot be found but the kernel exists. As the kernel does exist, return // that the function name is invalid. diff --git a/source/adapters/opencl/queue.cpp b/source/adapters/opencl/queue.cpp index 4a39a91ef5..ecdcd97b08 100644 --- a/source/adapters/opencl/queue.cpp +++ b/source/adapters/opencl/queue.cpp @@ -7,6 +7,7 @@ //===-----------------------------------------------------------------===// #include "common.hpp" +#include "device.hpp" #include "platform.hpp" cl_command_queue_info mapURQueueInfoToCL(const ur_queue_info_t PropName) { @@ -73,9 +74,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( cl_platform_id CurPlatform; CL_RETURN_ON_FAILURE_AND_SET_NULL( - clGetDeviceInfo(cl_adapter::cast(hDevice), - CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &CurPlatform, - nullptr), + clGetDeviceInfo(hDevice->get(), CL_DEVICE_PLATFORM, + sizeof(cl_platform_id), &CurPlatform, nullptr), phQueue); cl_command_queue_properties CLProperties = @@ -93,10 +93,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( cl_int RetErr = CL_INVALID_OPERATION; if (Version < oclv::V2_0) { - *phQueue = cl_adapter::cast( - clCreateCommandQueue(cl_adapter::cast(hContext), - cl_adapter::cast(hDevice), - CLProperties & SupportByOpenCL, &RetErr)); + *phQueue = cl_adapter::cast(clCreateCommandQueue( + cl_adapter::cast(hContext), hDevice->get(), + CLProperties & SupportByOpenCL, &RetErr)); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } @@ -106,9 +105,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( CL_QUEUE_PROPERTIES, CLProperties & SupportByOpenCL, 0}; *phQueue = cl_adapter::cast(clCreateCommandQueueWithProperties( - cl_adapter::cast(hContext), - cl_adapter::cast(hDevice), CreationFlagProperties, - &RetErr)); + cl_adapter::cast(hContext), hDevice->get(), + CreationFlagProperties, &RetErr)); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index 0d64f23d13..3c3e07a98c 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -9,6 +9,7 @@ //===----------------------------------------------------------------------===// #include "common.hpp" +#include "device.hpp" inline cl_mem_alloc_flags_intel hostDescToClFlags(const ur_usm_host_desc_t &desc) { @@ -146,9 +147,9 @@ urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, if (FuncPtr) { cl_int ClResult = CL_SUCCESS; - Ptr = FuncPtr(CLContext, cl_adapter::cast(hDevice), - AllocProperties.empty() ? nullptr : AllocProperties.data(), - size, Alignment, &ClResult); + Ptr = FuncPtr(CLContext, hDevice->get(), + AllocProperties.empty() ? nullptr : AllocProperties.data(), size, + Alignment, &ClResult); if (ClResult == CL_INVALID_BUFFER_SIZE) { return UR_RESULT_ERROR_INVALID_USM_SIZE; } @@ -189,9 +190,9 @@ urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, if (FuncPtr) { cl_int ClResult = CL_SUCCESS; - Ptr = FuncPtr(CLContext, cl_adapter::cast(hDevice), - AllocProperties.empty() ? nullptr : AllocProperties.data(), - size, Alignment, cl_adapter::cast(&ClResult)); + Ptr = FuncPtr(CLContext, hDevice->get(), + AllocProperties.empty() ? nullptr : AllocProperties.data(), size, + Alignment, cl_adapter::cast(&ClResult)); if (ClResult == CL_INVALID_BUFFER_SIZE) { return UR_RESULT_ERROR_INVALID_USM_SIZE; } From cc70b3cba050213ef1b701f3278c8dd6b2605095 Mon Sep 17 00:00:00 2001 From: omarahmed1111 Date: Tue, 5 Dec 2023 10:52:29 +0000 Subject: [PATCH 03/19] Match cuda --- source/adapters/opencl/adapter.cpp | 10 ++++- source/adapters/opencl/command_buffer.cpp | 19 +++++----- source/adapters/opencl/context.cpp | 46 ++++++++++++----------- source/adapters/opencl/context.hpp | 31 +++++++-------- source/adapters/opencl/device.cpp | 39 ++++++++++--------- source/adapters/opencl/memory.cpp | 7 ++-- source/adapters/opencl/platform.hpp | 23 +++++++++++- source/adapters/opencl/program.cpp | 8 ++-- source/adapters/opencl/queue.cpp | 5 ++- source/adapters/opencl/sampler.cpp | 3 +- source/adapters/opencl/usm.cpp | 13 ++++--- 11 files changed, 121 insertions(+), 83 deletions(-) diff --git a/source/adapters/opencl/adapter.cpp b/source/adapters/opencl/adapter.cpp index 31b564ae35..763c6d532d 100644 --- a/source/adapters/opencl/adapter.cpp +++ b/source/adapters/opencl/adapter.cpp @@ -22,7 +22,10 @@ urAdapterGet(uint32_t NumEntries, ur_adapter_handle_t *phAdapters, uint32_t *pNumAdapters) { if (NumEntries > 0 && phAdapters) { std::lock_guard Lock{adapter.Mutex}; - adapter.RefCount++; + // adapter.RefCount++; + if (adapter.RefCount++ == 0) { + cl_ext::ExtFuncPtrCache = std::make_unique(); + } *phAdapters = &adapter; } @@ -40,7 +43,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) { UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) { std::lock_guard Lock{adapter.Mutex}; - --adapter.RefCount; + // --adapter.RefCount; + if (--adapter.RefCount == 0) { + cl_ext::ExtFuncPtrCache.reset(); + } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 88c661b4ae..97c91e4672 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -10,6 +10,7 @@ #include "command_buffer.hpp" #include "common.hpp" +#include "context.hpp" UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, @@ -19,7 +20,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( ur_queue_handle_t Queue = nullptr; UR_RETURN_ON_FAILURE(urQueueCreate(hContext, hDevice, nullptr, &Queue)); - cl_context CLContext = cl_adapter::cast(hContext); + cl_context CLContext = hContext->get(); cl_ext::clCreateCommandBufferKHR_fn clCreateCommandBufferKHR = nullptr; cl_int Res = cl_ext::getExtFuncFromContext( @@ -49,7 +50,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer) { UR_RETURN_ON_FAILURE(urQueueRetain(hCommandBuffer->hInternalQueue)); - cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_context CLContext = hCommandBuffer->hContext->get(); cl_ext::clRetainCommandBufferKHR_fn clRetainCommandBuffer = nullptr; cl_int Res = cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clRetainCommandBufferKHRCache, @@ -66,7 +67,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) { UR_RETURN_ON_FAILURE(urQueueRelease(hCommandBuffer->hInternalQueue)); - cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_context CLContext = hCommandBuffer->hContext->get(); cl_ext::clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHR = nullptr; cl_int Res = cl_ext::getExtFuncFromContext( @@ -83,7 +84,7 @@ urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) { UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) { - cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_context CLContext = hCommandBuffer->hContext->get(); cl_ext::clFinalizeCommandBufferKHR_fn clFinalizeCommandBufferKHR = nullptr; cl_int Res = cl_ext::getExtFuncFromContext( @@ -107,7 +108,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_exp_command_buffer_command_handle_t *) { - cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_context CLContext = hCommandBuffer->hContext->get(); cl_ext::clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR = nullptr; cl_int Res = cl_ext::getExtFuncFromContext( @@ -155,7 +156,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ur_exp_command_buffer_sync_point_t *pSyncPoint) { - cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_context CLContext = hCommandBuffer->hContext->get(); cl_ext::clCommandCopyBufferKHR_fn clCommandCopyBufferKHR = nullptr; cl_int Res = cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferKHRCache, @@ -191,7 +192,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( size_t OpenCLDstRect[3]{dstOrigin.x, dstOrigin.y, dstOrigin.z}; size_t OpenCLRegion[3]{region.width, region.height, region.depth}; - cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_context CLContext = hCommandBuffer->hContext->get(); cl_ext::clCommandCopyBufferRectKHR_fn clCommandCopyBufferRectKHR = nullptr; cl_int Res = cl_ext::getExtFuncFromContext( @@ -281,7 +282,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ur_exp_command_buffer_sync_point_t *pSyncPoint) { - cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_context CLContext = hCommandBuffer->hContext->get(); cl_ext::clCommandFillBufferKHR_fn clCommandFillBufferKHR = nullptr; cl_int Res = cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clCommandFillBufferKHRCache, @@ -337,7 +338,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_context CLContext = hCommandBuffer->hContext->get(); cl_ext::clEnqueueCommandBufferKHR_fn clEnqueueCommandBufferKHR = nullptr; cl_int Res = cl_ext::getExtFuncFromContext( diff --git a/source/adapters/opencl/context.cpp b/source/adapters/opencl/context.cpp index fc7dc144e3..a28e766e3d 100644 --- a/source/adapters/opencl/context.cpp +++ b/source/adapters/opencl/context.cpp @@ -18,20 +18,16 @@ ur_result_t cl_adapter::getDevicesFromContext( ur_context_handle_t hContext, std::unique_ptr> &DevicesInCtx) { - cl_uint DeviceCount; - CL_RETURN_ON_FAILURE(clGetContextInfo(cl_adapter::cast(hContext), - CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), - &DeviceCount, nullptr)); + cl_uint DeviceCount = hContext->DeviceCount; if (DeviceCount < 1) { return UR_RESULT_ERROR_INVALID_CONTEXT; } DevicesInCtx = std::make_unique>(DeviceCount); - - CL_RETURN_ON_FAILURE(clGetContextInfo( - cl_adapter::cast(hContext), CL_CONTEXT_DEVICES, - DeviceCount * sizeof(cl_device_id), (*DevicesInCtx).data(), nullptr)); + for (size_t i = 0; i < DeviceCount; i++) { + (*DevicesInCtx)[i] = hContext->Devices[i]->get(); + } return UR_RESULT_SUCCESS; } @@ -41,11 +37,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( const ur_context_properties_t *, ur_context_handle_t *phContext) { cl_int Ret; - *phContext = cl_adapter::cast( - clCreateContext(nullptr, cl_adapter::cast(DeviceCount), - cl_adapter::cast(phDevices), - nullptr, nullptr, cl_adapter::cast(&Ret))); + std::vector CLDevices(DeviceCount); + for (size_t i = 0; i < DeviceCount; i++) { + CLDevices[i] = phDevices[i]->get(); + } + + cl_context Ctx = clCreateContext(nullptr, cl_adapter::cast(DeviceCount), + CLDevices.data(), + nullptr, nullptr, cl_adapter::cast(&Ret)); + *phContext = new ur_context_handle_t_(Ctx, DeviceCount, phDevices); return mapCLErrorToUR(Ret); } @@ -95,7 +96,7 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, case UR_CONTEXT_INFO_REFERENCE_COUNT: { size_t CheckPropSize = 0; auto ClResult = - clGetContextInfo(cl_adapter::cast(hContext), CLPropName, + clGetContextInfo(hContext->get(), CLPropName, propSize, pPropValue, &CheckPropSize); if (pPropValue && CheckPropSize != propSize) { return UR_RESULT_ERROR_INVALID_SIZE; @@ -114,32 +115,33 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, UR_APIEXPORT ur_result_t UR_APICALL urContextRelease(ur_context_handle_t hContext) { - cl_int Ret = clReleaseContext(cl_adapter::cast(hContext)); + cl_int Ret = clReleaseContext(hContext->get()); return mapCLErrorToUR(Ret); } UR_APIEXPORT ur_result_t UR_APICALL urContextRetain(ur_context_handle_t hContext) { - cl_int Ret = clRetainContext(cl_adapter::cast(hContext)); + cl_int Ret = clRetainContext(hContext->get()); return mapCLErrorToUR(Ret); } UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle( ur_context_handle_t hContext, ur_native_handle_t *phNativeContext) { - *phNativeContext = reinterpret_cast(hContext); + *phNativeContext = reinterpret_cast(hContext->get()); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle( - ur_native_handle_t hNativeContext, uint32_t, const ur_device_handle_t *, - const ur_context_native_properties_t *pProperties, - ur_context_handle_t *phContext) { + ur_native_handle_t hNativeContext, uint32_t numDevices, const ur_device_handle_t *phDevices, + const ur_context_native_properties_t *pProperties, ur_context_handle_t *phContext) { - *phContext = reinterpret_cast(hNativeContext); + cl_context NativeHandle = + reinterpret_cast(hNativeContext); + *phContext = new ur_context_handle_t_(NativeHandle, numDevices, phDevices); if (!pProperties || !pProperties->isNativeHandleOwned) { - return urContextRetain(*phContext); + return clRetainContext(NativeHandle); } return UR_RESULT_SUCCESS; } @@ -191,7 +193,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextSetExtendedDeleter( C->execute(); }; CL_RETURN_ON_FAILURE(clSetContextDestructorCallback( - cl_adapter::cast(hContext), ClCallback, Callback)); + hContext->get(), ClCallback, Callback)); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/context.hpp b/source/adapters/opencl/context.hpp index 2cbdb2a292..7156372a87 100644 --- a/source/adapters/opencl/context.hpp +++ b/source/adapters/opencl/context.hpp @@ -10,6 +10,9 @@ #pragma once #include "common.hpp" +#include "device.hpp" + +#include namespace cl_adapter { ur_result_t @@ -17,21 +20,19 @@ getDevicesFromContext(ur_context_handle_t hContext, std::unique_ptr> &DevicesInCtx); } -// struct ur_context_handle_t_ { -// using native_type = cl_context; -// native_type Context; -// std::atomic_uint32_t RefCount; -// ur_platform_handle_t Platform; - -// ur_context_handle_t_(native_type Ctx):Context(Ctx) {} - -// ~ur_context_handle_t_() {} - -// native_type get() { return Context; } +struct ur_context_handle_t_ { + using native_type = cl_context; + native_type Context; + std::vector Devices; + uint32_t DeviceCount; -// uint32_t incrementReferenceCount() noexcept { return ++RefCount; } + ur_context_handle_t_(native_type Ctx, uint32_t DevCount, const ur_device_handle_t *phDevices) : Context(Ctx), DeviceCount(DevCount) { + for (uint32_t i = 0; i < DeviceCount; i++) { + Devices.emplace_back(phDevices[i]); + } + } -// uint32_t decrementReferenceCount() noexcept { return --RefCount; } + ~ur_context_handle_t_() {} -// uint32_t getReferenceCount() const noexcept { return RefCount; } -// }; + native_type get() { return Context; } +}; diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index d76192a4e0..6b0bd6fc21 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -75,31 +75,31 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform, Type = CL_DEVICE_TYPE_ACCELERATOR; break; case UR_DEVICE_TYPE_DEFAULT: - Type = CL_DEVICE_TYPE_DEFAULT; + Type = UR_DEVICE_TYPE_DEFAULT; break; default: return UR_RESULT_ERROR_INVALID_ENUMERATION; } - std::vector CLDevices(NumEntries); - cl_int Result = clGetDeviceIDs( - hPlatform->get(), Type, cl_adapter::cast(NumEntries), - CLDevices.data(), cl_adapter::cast(pNumDevices)); - - // Absorb the CL_DEVICE_NOT_FOUND and just return 0 in num_devices - if (Result == CL_DEVICE_NOT_FOUND) { - Result = CL_SUCCESS; + CL_RETURN_ON_FAILURE(hPlatform->GetDevices(Type)); + size_t NumDevices = hPlatform->Devices.size(); + try { if (pNumDevices) { - *pNumDevices = 0; + *pNumDevices = NumDevices; } - } - if (NumEntries && phDevices) { - for (uint32_t i = 0; i < NumEntries; i++) { - phDevices[i] = new ur_device_handle_t_(CLDevices[i], hPlatform); + + if (phDevices) { + for (size_t i = 0; i < std::min(size_t(NumEntries), NumDevices); ++i) { + phDevices[i] = hPlatform->Devices[i]; + } } - } - return mapCLErrorToUR(Result); + return UR_RESULT_SUCCESS; + } catch (ur_result_t Err) { + return Err; + } catch (...) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } } static ur_device_fp_capability_flags_t @@ -861,7 +861,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_MAX_PARAMETER_SIZE: case UR_DEVICE_INFO_PROFILING_TIMER_RESOLUTION: case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: - case UR_DEVICE_INFO_PLATFORM: case UR_DEVICE_INFO_PARENT_DEVICE: case UR_DEVICE_INFO_IL_VERSION: case UR_DEVICE_INFO_NAME: @@ -890,6 +889,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return UR_RESULT_SUCCESS; } + case UR_DEVICE_INFO_PLATFORM: { + if (hDevice->Platform && hDevice->Platform->get()) { + return ReturnValue(hDevice->Platform); + } + return UR_RESULT_ERROR_INVALID_DEVICE; + } case UR_DEVICE_INFO_EXTENSIONS: { cl_device_id Dev = hDevice->get(); size_t ExtSize = 0; diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp index 2397e2b5f9..ae712e3315 100644 --- a/source/adapters/opencl/memory.cpp +++ b/source/adapters/opencl/memory.cpp @@ -9,6 +9,7 @@ //===----------------------------------------------------------------------===// #include "common.hpp" +#include "context.hpp" cl_image_format mapURImageFormatToCL(const ur_image_format_t *PImageFormat) { cl_image_format CLImageFormat; @@ -230,7 +231,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( // TODO: need to check if all properties are supported by OpenCL RT and // ignore unsupported clCreateBufferWithPropertiesINTEL_fn FuncPtr = nullptr; - cl_context CLContext = cl_adapter::cast(hContext); + cl_context CLContext = hContext->get(); // First we need to look up the function pointer RetErr = cl_ext::getExtFuncFromContext( @@ -270,7 +271,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( void *HostPtr = pProperties ? pProperties->pHost : nullptr; *phBuffer = reinterpret_cast(clCreateBuffer( - cl_adapter::cast(hContext), static_cast(flags), + hContext->get(), static_cast(flags), size, HostPtr, cl_adapter::cast(&RetErr))); CL_RETURN_ON_FAILURE(RetErr); @@ -289,7 +290,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( cl_map_flags MapFlags = convertURMemFlagsToCL(flags); *phMem = reinterpret_cast(clCreateImage( - cl_adapter::cast(hContext), MapFlags, &ImageFormat, + hContext->get(), MapFlags, &ImageFormat, &ImageDesc, pHost, cl_adapter::cast(&RetErr))); CL_RETURN_ON_FAILURE(RetErr); diff --git a/source/adapters/opencl/platform.hpp b/source/adapters/opencl/platform.hpp index 9de49ac063..75f7577856 100644 --- a/source/adapters/opencl/platform.hpp +++ b/source/adapters/opencl/platform.hpp @@ -10,6 +10,9 @@ #pragma once #include "common.hpp" +#include "device.hpp" + +#include namespace cl_adapter { ur_result_t getPlatformVersion(cl_platform_id Plat, @@ -170,11 +173,12 @@ struct ExtFuncPtrT { struct ur_platform_handle_t_ { using native_type = cl_platform_id; - native_type Platform; + native_type Platform = nullptr; std::unique_ptr ExtFuncPtr; + std::vector Devices; ur_platform_handle_t_(native_type Plat) : Platform(Plat) { - std::make_unique(); + ExtFuncPtr = std::make_unique(); } ~ur_platform_handle_t_() { ExtFuncPtr.reset(); } @@ -194,4 +198,19 @@ struct ur_platform_handle_t_ { } native_type get() { return Platform; } + + ur_result_t GetDevices(cl_device_type Type) { + cl_uint DeviceNum = 0; + CL_RETURN_ON_FAILURE(clGetDeviceIDs(Platform, Type, 0, nullptr, &DeviceNum)); + + std::vector CLDevices(DeviceNum); + CL_RETURN_ON_FAILURE(clGetDeviceIDs(Platform, Type, DeviceNum, CLDevices.data(), nullptr)); + + Devices = std::vector(DeviceNum); + for (size_t i = 0; i < DeviceNum; i++) { + Devices[i] = new ur_device_handle_t_(CLDevices[i], this); + } + + return UR_RESULT_SUCCESS; + } }; diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index b2a027e640..bbfa630abc 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -80,7 +80,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( } *phProgram = cl_adapter::cast(clCreateProgramWithIL( - cl_adapter::cast(hContext), pIL, length, &Err)); + hContext->get(), pIL, length, &Err)); CL_RETURN_ON_FAILURE(Err); } else { @@ -108,7 +108,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( assert(FuncPtr != nullptr); *phProgram = cl_adapter::cast( - FuncPtr(cl_adapter::cast(hContext), pIL, length, &Err)); + FuncPtr(hContext->get(), pIL, length, &Err)); CL_RETURN_ON_FAILURE(Err); } @@ -125,7 +125,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( cl_int BinaryStatus[1]; cl_int CLResult; *phProgram = cl_adapter::cast(clCreateProgramWithBinary( - cl_adapter::cast(hContext), cl_adapter::cast(1u), + hContext->get(), cl_adapter::cast(1u), Devices, Lengths, &pBinary, BinaryStatus, &CLResult)); CL_RETURN_ON_FAILURE(BinaryStatus[0]); CL_RETURN_ON_FAILURE(CLResult); @@ -211,7 +211,7 @@ urProgramLink(ur_context_handle_t hContext, uint32_t count, cl_int CLResult; *phProgram = cl_adapter::cast( - clLinkProgram(cl_adapter::cast(hContext), 0, nullptr, + clLinkProgram(hContext->get(), 0, nullptr, pOptions, cl_adapter::cast(count), cl_adapter::cast(phPrograms), nullptr, nullptr, &CLResult)); diff --git a/source/adapters/opencl/queue.cpp b/source/adapters/opencl/queue.cpp index ecdcd97b08..276c179f95 100644 --- a/source/adapters/opencl/queue.cpp +++ b/source/adapters/opencl/queue.cpp @@ -9,6 +9,7 @@ #include "common.hpp" #include "device.hpp" #include "platform.hpp" +#include "context.hpp" cl_command_queue_info mapURQueueInfoToCL(const ur_queue_info_t PropName) { @@ -94,7 +95,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( if (Version < oclv::V2_0) { *phQueue = cl_adapter::cast(clCreateCommandQueue( - cl_adapter::cast(hContext), hDevice->get(), + hContext->get(), hDevice->get(), CLProperties & SupportByOpenCL, &RetErr)); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; @@ -105,7 +106,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( CL_QUEUE_PROPERTIES, CLProperties & SupportByOpenCL, 0}; *phQueue = cl_adapter::cast(clCreateCommandQueueWithProperties( - cl_adapter::cast(hContext), hDevice->get(), + hContext->get(), hDevice->get(), CreationFlagProperties, &RetErr)); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; diff --git a/source/adapters/opencl/sampler.cpp b/source/adapters/opencl/sampler.cpp index 49f31b37fd..187b87a7f1 100644 --- a/source/adapters/opencl/sampler.cpp +++ b/source/adapters/opencl/sampler.cpp @@ -9,6 +9,7 @@ //===----------------------------------------------------------------------===// #include "common.hpp" +#include "context.hpp" namespace { @@ -144,7 +145,7 @@ ur_result_t urSamplerCreate(ur_context_handle_t hContext, // Always call OpenCL 1.0 API *phSampler = cl_adapter::cast(clCreateSampler( - cl_adapter::cast(hContext), + hContext->get(), static_cast(pDesc->normalizedCoords), AddressingMode, FilterMode, cl_adapter::cast(&ErrorCode))); diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index 3c3e07a98c..935c099b2c 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -10,6 +10,7 @@ #include "common.hpp" #include "device.hpp" +#include "context.hpp" inline cl_mem_alloc_flags_intel hostDescToClFlags(const ur_usm_host_desc_t &desc) { @@ -95,7 +96,7 @@ urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, // First we need to look up the function pointer clHostMemAllocINTEL_fn FuncPtr = nullptr; - cl_context CLContext = cl_adapter::cast(hContext); + cl_context CLContext = hContext->get(); if (auto UrResult = cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache, cl_ext::HostMemAllocName, &FuncPtr)) { @@ -138,7 +139,7 @@ urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, // First we need to look up the function pointer clDeviceMemAllocINTEL_fn FuncPtr = nullptr; - cl_context CLContext = cl_adapter::cast(hContext); + cl_context CLContext = hContext->get(); if (auto UrResult = cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clDeviceMemAllocINTELCache, cl_ext::DeviceMemAllocName, &FuncPtr)) { @@ -181,7 +182,7 @@ urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, // First we need to look up the function pointer clSharedMemAllocINTEL_fn FuncPtr = nullptr; - cl_context CLContext = cl_adapter::cast(hContext); + cl_context CLContext = hContext->get(); if (auto UrResult = cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clSharedMemAllocINTELCache, cl_ext::SharedMemAllocName, &FuncPtr)) { @@ -214,7 +215,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMFree(ur_context_handle_t hContext, // might be still running. clMemBlockingFreeINTEL_fn FuncPtr = nullptr; - cl_context CLContext = cl_adapter::cast(hContext); + cl_context CLContext = hContext->get(); ur_result_t RetVal = UR_RESULT_ERROR_INVALID_OPERATION; RetVal = cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clMemBlockingFreeINTELCache, @@ -526,7 +527,7 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, void *pPropValue, size_t *pPropSizeRet) { clGetMemAllocInfoINTEL_fn GetMemAllocInfo = nullptr; - cl_context CLContext = cl_adapter::cast(hContext); + cl_context CLContext = hContext->get(); UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clGetMemAllocInfoINTELCache, cl_ext::GetMemAllocInfoName, &GetMemAllocInfo)); @@ -551,7 +552,7 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, size_t CheckPropSize = 0; cl_int ClErr = - GetMemAllocInfo(cl_adapter::cast(hContext), pMem, PropNameCL, + GetMemAllocInfo(hContext->get(), pMem, PropNameCL, propSize, pPropValue, &CheckPropSize); if (pPropValue && CheckPropSize != propSize) { return UR_RESULT_ERROR_INVALID_SIZE; From 70df01a57efd5f95cebdcc98dcbc4bb6d5c87c5b Mon Sep 17 00:00:00 2001 From: omarahmed1111 Date: Thu, 7 Dec 2023 10:54:44 +0000 Subject: [PATCH 04/19] Add devices init to platform --- source/adapters/opencl/device.cpp | 42 +++++++++++++++-------------- source/adapters/opencl/device.hpp | 12 +++++++-- source/adapters/opencl/platform.cpp | 36 +++++++++++++++++++++---- source/adapters/opencl/platform.hpp | 9 ++++--- 4 files changed, 68 insertions(+), 31 deletions(-) diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index 6b0bd6fc21..7969fe8ab3 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -54,7 +54,7 @@ ur_result_t cl_adapter::checkDeviceExtensions( UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform, ur_device_type_t DeviceType, - uint32_t NumEntries, + [[maybe_unused]] uint32_t NumEntries, ur_device_handle_t *phDevices, uint32_t *pNumDevices) { @@ -75,24 +75,26 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform, Type = CL_DEVICE_TYPE_ACCELERATOR; break; case UR_DEVICE_TYPE_DEFAULT: - Type = UR_DEVICE_TYPE_DEFAULT; + Type = CL_DEVICE_TYPE_DEFAULT; break; default: return UR_RESULT_ERROR_INVALID_ENUMERATION; } - - CL_RETURN_ON_FAILURE(hPlatform->GetDevices(Type)); - size_t NumDevices = hPlatform->Devices.size(); try { - if (pNumDevices) { - *pNumDevices = NumDevices; - } - - if (phDevices) { - for (size_t i = 0; i < std::min(size_t(NumEntries), NumDevices); ++i) { - phDevices[i] = hPlatform->Devices[i]; + uint32_t AllDevicesNum = hPlatform->Devices.size(); + uint32_t DeviceNumIter = 0; + for (uint32_t i = 0; i < AllDevicesNum; i++) { + cl_device_type DeviceType = hPlatform->Devices[i]->Type; + if (DeviceType == Type || Type == CL_DEVICE_TYPE_ALL) { + if (phDevices) { + phDevices[DeviceNumIter] = hPlatform->Devices[i]; + } + DeviceNumIter++; } } + if (pNumDevices) { + *pNumDevices = DeviceNumIter; + } return UR_RESULT_SUCCESS; } catch (ur_result_t Err) { @@ -329,9 +331,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * to UR */ switch (static_cast(propName)) { case UR_DEVICE_INFO_TYPE: { - cl_device_type CLType; - CL_RETURN_ON_FAILURE(clGetDeviceInfo( - hDevice->get(), CLPropName, sizeof(cl_device_type), &CLType, nullptr)); + cl_device_type CLType = hDevice->Type; /* TODO UR: If the device is an Accelerator (FPGA, VPU, etc.), there is not * enough information in the OpenCL runtime to know exactly which type it @@ -861,7 +861,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_MAX_PARAMETER_SIZE: case UR_DEVICE_INFO_PROFILING_TIMER_RESOLUTION: case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: - case UR_DEVICE_INFO_PARENT_DEVICE: case UR_DEVICE_INFO_IL_VERSION: case UR_DEVICE_INFO_NAME: case UR_DEVICE_INFO_VENDOR: @@ -895,6 +894,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } return UR_RESULT_ERROR_INVALID_DEVICE; } + case UR_DEVICE_INFO_PARENT_DEVICE: { + return ReturnValue(hDevice->ParentDevice); + } case UR_DEVICE_INFO_EXTENSIONS: { cl_device_id Dev = hDevice->get(); size_t ExtSize = 0; @@ -1019,9 +1021,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( CL_RETURN_ON_FAILURE(clCreateSubDevices(hDevice->get(), CLProperties.data(), CLNumDevicesRet, CLSubDevices.data(), nullptr)); - - std::memcpy(phSubDevices, CLSubDevices.data(), - sizeof(cl_device_id) * NumDevices); + for (uint32_t i = 0; i < NumDevices; i++) { + phSubDevices[i] = new ur_device_handle_t_(CLSubDevices[i], hDevice->Platform, hDevice); + } } return UR_RESULT_SUCCESS; @@ -1053,7 +1055,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( ur_native_handle_t hNativeDevice, ur_platform_handle_t hPlatform, const ur_device_native_properties_t *, ur_device_handle_t *phDevice) { cl_device_id NativeHandle = reinterpret_cast(hNativeDevice); - *phDevice = new ur_device_handle_t_(NativeHandle, hPlatform); + *phDevice = new ur_device_handle_t_(NativeHandle, hPlatform, nullptr); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/device.hpp b/source/adapters/opencl/device.hpp index 5d30bf72c9..cedff1a82e 100644 --- a/source/adapters/opencl/device.hpp +++ b/source/adapters/opencl/device.hpp @@ -23,9 +23,17 @@ struct ur_device_handle_t_ { using native_type = cl_device_id; native_type Device; ur_platform_handle_t Platform; + cl_device_type Type = 0; + ur_device_handle_t ParentDevice = nullptr; - ur_device_handle_t_(native_type Dev, ur_platform_handle_t Plat) - : Device(Dev), Platform(Plat) {} + ur_device_handle_t_(native_type Dev, ur_platform_handle_t Plat, ur_device_handle_t Parent) + : Device(Dev), Platform(Plat), ParentDevice(Parent) { + if (Parent) { + Type = Parent->Type; + } else { + clGetDeviceInfo(Device, CL_DEVICE_TYPE, sizeof(cl_device_type), &Type, nullptr); + } + } ~ur_device_handle_t_() {} diff --git a/source/adapters/opencl/platform.cpp b/source/adapters/opencl/platform.cpp index e715783ac3..86027d5beb 100644 --- a/source/adapters/opencl/platform.cpp +++ b/source/adapters/opencl/platform.cpp @@ -87,10 +87,33 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, ur_platform_handle_t *phPlatforms, uint32_t *pNumPlatforms) { - std::vector CLPlatforms(NumEntries); - cl_int Result = clGetPlatformIDs(cl_adapter::cast(NumEntries), - CLPlatforms.data(), - cl_adapter::cast(pNumPlatforms)); + static std::vector URPlatforms; + static std::once_flag InitFlag; + static uint32_t NumPlatforms = 0; + cl_int Result = CL_SUCCESS; + + std::call_once( + InitFlag, + [](cl_int &Result) { + Result = clGetPlatformIDs(0, nullptr, &NumPlatforms); + if (Result != CL_SUCCESS) { + return Result; + } + std::vector CLPlatforms(NumPlatforms); + Result = clGetPlatformIDs(cl_adapter::cast(NumPlatforms), + CLPlatforms.data(), + nullptr); + if (Result != CL_SUCCESS) { + return Result; + } + URPlatforms.resize(NumPlatforms); + for (uint32_t i = 0; i < NumPlatforms; i++) { + URPlatforms[i] = new ur_platform_handle_t_(CLPlatforms[i]); + } + return Result; + }, + Result); + /* Absorb the CL_PLATFORM_NOT_FOUND_KHR and just return 0 in num_platforms */ if (Result == CL_PLATFORM_NOT_FOUND_KHR) { Result = CL_SUCCESS; @@ -98,9 +121,12 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, *pNumPlatforms = 0; } } + if (pNumPlatforms != nullptr) { + *pNumPlatforms = NumPlatforms; + } if (NumEntries && phPlatforms) { for (uint32_t i = 0; i < NumEntries; i++) { - phPlatforms[i] = new ur_platform_handle_t_(CLPlatforms[i]); + phPlatforms[i] = URPlatforms[i]; } } return mapCLErrorToUR(Result); diff --git a/source/adapters/opencl/platform.hpp b/source/adapters/opencl/platform.hpp index 75f7577856..ebdcf4d134 100644 --- a/source/adapters/opencl/platform.hpp +++ b/source/adapters/opencl/platform.hpp @@ -179,6 +179,7 @@ struct ur_platform_handle_t_ { ur_platform_handle_t_(native_type Plat) : Platform(Plat) { ExtFuncPtr = std::make_unique(); + InitDevices(); } ~ur_platform_handle_t_() { ExtFuncPtr.reset(); } @@ -199,16 +200,16 @@ struct ur_platform_handle_t_ { native_type get() { return Platform; } - ur_result_t GetDevices(cl_device_type Type) { + ur_result_t InitDevices() { cl_uint DeviceNum = 0; - CL_RETURN_ON_FAILURE(clGetDeviceIDs(Platform, Type, 0, nullptr, &DeviceNum)); + CL_RETURN_ON_FAILURE(clGetDeviceIDs(Platform, CL_DEVICE_TYPE_ALL, 0, nullptr, &DeviceNum)); std::vector CLDevices(DeviceNum); - CL_RETURN_ON_FAILURE(clGetDeviceIDs(Platform, Type, DeviceNum, CLDevices.data(), nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceIDs(Platform, CL_DEVICE_TYPE_ALL, DeviceNum, CLDevices.data(), nullptr)); Devices = std::vector(DeviceNum); for (size_t i = 0; i < DeviceNum; i++) { - Devices[i] = new ur_device_handle_t_(CLDevices[i], this); + Devices[i] = new ur_device_handle_t_(CLDevices[i], this, nullptr); } return UR_RESULT_SUCCESS; From d22d6e0509f141674a787339135210a1cf7e0447 Mon Sep 17 00:00:00 2001 From: omarahmed1111 Date: Mon, 11 Dec 2023 13:53:03 +0000 Subject: [PATCH 05/19] Add program handle --- source/adapters/opencl/enqueue.cpp | 9 ++- source/adapters/opencl/kernel.cpp | 3 +- source/adapters/opencl/program.cpp | 121 ++++++++++++++--------------- source/adapters/opencl/program.hpp | 26 +++++++ 4 files changed, 91 insertions(+), 68 deletions(-) create mode 100644 source/adapters/opencl/program.hpp diff --git a/source/adapters/opencl/enqueue.cpp b/source/adapters/opencl/enqueue.cpp index 6830a28eec..420a070653 100644 --- a/source/adapters/opencl/enqueue.cpp +++ b/source/adapters/opencl/enqueue.cpp @@ -9,6 +9,7 @@ //===----------------------------------------------------------------------===// #include "common.hpp" +#include "program.hpp" cl_map_flags convertURMapFlagsToCL(ur_map_flags_t URFlags) { cl_map_flags CLFlags = 0; @@ -345,7 +346,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( return UR_RESULT_ERROR_INVALID_OPERATION; Res = F(cl_adapter::cast(hQueue), - cl_adapter::cast(hProgram), name, blockingWrite, count, + hProgram->get(), name, blockingWrite, count, offset, pSrc, numEventsInWaitList, cl_adapter::cast(phEventWaitList), cl_adapter::cast(phEvent)); @@ -376,7 +377,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( return UR_RESULT_ERROR_INVALID_OPERATION; Res = F(cl_adapter::cast(hQueue), - cl_adapter::cast(hProgram), name, blockingRead, count, + hProgram->get(), name, blockingRead, count, offset, pDst, numEventsInWaitList, cl_adapter::cast(phEventWaitList), cl_adapter::cast(phEvent)); @@ -407,7 +408,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( if (FuncPtr) { RetVal = mapCLErrorToUR( FuncPtr(cl_adapter::cast(hQueue), - cl_adapter::cast(hProgram), pipe_symbol, blocking, + hProgram->get(), pipe_symbol, blocking, pDst, size, numEventsInWaitList, cl_adapter::cast(phEventWaitList), cl_adapter::cast(phEvent))); @@ -439,7 +440,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( if (FuncPtr) { RetVal = mapCLErrorToUR( FuncPtr(cl_adapter::cast(hQueue), - cl_adapter::cast(hProgram), pipe_symbol, blocking, + hProgram->get(), pipe_symbol, blocking, pSrc, size, numEventsInWaitList, cl_adapter::cast(phEventWaitList), cl_adapter::cast(phEvent))); diff --git a/source/adapters/opencl/kernel.cpp b/source/adapters/opencl/kernel.cpp index adf87279d0..71a728a506 100644 --- a/source/adapters/opencl/kernel.cpp +++ b/source/adapters/opencl/kernel.cpp @@ -9,6 +9,7 @@ //===----------------------------------------------------------------------===// #include "common.hpp" #include "device.hpp" +#include "program.hpp" #include #include @@ -19,7 +20,7 @@ urKernelCreate(ur_program_handle_t hProgram, const char *pKernelName, cl_int CLResult; *phKernel = cl_adapter::cast(clCreateKernel( - cl_adapter::cast(hProgram), pKernelName, &CLResult)); + hProgram->get(), pKernelName, &CLResult)); CL_RETURN_ON_FAILURE(CLResult); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index bbfa630abc..6c34abbb82 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -12,26 +12,20 @@ #include "context.hpp" #include "device.hpp" #include "platform.hpp" +#include "program.hpp" static ur_result_t getDevicesFromProgram( ur_program_handle_t hProgram, std::unique_ptr> &DevicesInProgram) { - cl_uint DeviceCount; - CL_RETURN_ON_FAILURE(clGetProgramInfo(cl_adapter::cast(hProgram), - CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), - &DeviceCount, nullptr)); - - if (DeviceCount < 1) { - return UR_RESULT_ERROR_INVALID_CONTEXT; + if (!hProgram->Context || !hProgram->Context->DeviceCount) { + return UR_RESULT_ERROR_INVALID_PROGRAM; } - + cl_uint DeviceCount = hProgram->Context->DeviceCount; DevicesInProgram = std::make_unique>(DeviceCount); - - CL_RETURN_ON_FAILURE(clGetProgramInfo( - cl_adapter::cast(hProgram), CL_PROGRAM_DEVICES, - DeviceCount * sizeof(cl_device_id), (*DevicesInProgram).data(), nullptr)); - + for (uint32_t i = 0; i < DeviceCount; i++) { + (*DevicesInProgram)[i] = hProgram->Context->Devices[i]->get(); + } return UR_RESULT_SUCCESS; } @@ -39,15 +33,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( ur_context_handle_t hContext, const void *pIL, size_t length, const ur_program_properties_t *, ur_program_handle_t *phProgram) { - std::unique_ptr> DevicesInCtx; - CL_RETURN_ON_FAILURE_AND_SET_NULL( - cl_adapter::getDevicesFromContext(hContext, DevicesInCtx), phProgram); - - cl_platform_id CurPlatform; - CL_RETURN_ON_FAILURE_AND_SET_NULL( - clGetDeviceInfo((*DevicesInCtx)[0], CL_DEVICE_PLATFORM, - sizeof(cl_platform_id), &CurPlatform, nullptr), - phProgram); + if (!hContext->DeviceCount || !hContext->Devices[0]->Platform) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + cl_platform_id CurPlatform = hContext->Devices[0]->Platform->get(); oclv::OpenCLVersion PlatVer; CL_RETURN_ON_FAILURE_AND_SET_NULL( @@ -57,7 +46,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( if (PlatVer >= oclv::V2_1) { /* Make sure all devices support CL 2.1 or newer as well. */ - for (cl_device_id Dev : *DevicesInCtx) { + for (ur_device_handle_t URDev : hContext->Devices) { + cl_device_id Dev = URDev->get(); oclv::OpenCLVersion DevVer; CL_RETURN_ON_FAILURE_AND_SET_NULL( @@ -79,15 +69,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( } } - *phProgram = cl_adapter::cast(clCreateProgramWithIL( - hContext->get(), pIL, length, &Err)); + cl_program Program = clCreateProgramWithIL(hContext->get(), pIL, length, &Err); CL_RETURN_ON_FAILURE(Err); + + *phProgram = new ur_program_handle_t_(Program, hContext); } else { /* If none of the devices conform with CL 2.1 or newer make sure they all * support the cl_khr_il_program extension. */ - for (cl_device_id Dev : *DevicesInCtx) { + for (ur_device_handle_t URDev : hContext->Devices) { + cl_device_id Dev = URDev->get(); bool Supported = false; CL_RETURN_ON_FAILURE_AND_SET_NULL( cl_adapter::checkDeviceExtensions(Dev, {"cl_khr_il_program"}, @@ -106,9 +98,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( CurPlatform, "clCreateProgramWithILKHR")); assert(FuncPtr != nullptr); + cl_program Program = FuncPtr(hContext->get(), pIL, length, &Err); + *phProgram = new ur_program_handle_t_(Program, hContext); - *phProgram = cl_adapter::cast( - FuncPtr(hContext->get(), pIL, length, &Err)); CL_RETURN_ON_FAILURE(Err); } @@ -124,9 +116,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( const size_t Lengths[1] = {size}; cl_int BinaryStatus[1]; cl_int CLResult; - *phProgram = cl_adapter::cast(clCreateProgramWithBinary( + cl_program Program = clCreateProgramWithBinary( hContext->get(), cl_adapter::cast(1u), - Devices, Lengths, &pBinary, BinaryStatus, &CLResult)); + Devices, Lengths, &pBinary, BinaryStatus, &CLResult); + *phProgram = new ur_program_handle_t_(Program, hContext); CL_RETURN_ON_FAILURE(BinaryStatus[0]); CL_RETURN_ON_FAILURE(CLResult); @@ -140,7 +133,7 @@ urProgramCompile([[maybe_unused]] ur_context_handle_t hContext, std::unique_ptr> DevicesInProgram; CL_RETURN_ON_FAILURE(getDevicesFromProgram(hProgram, DevicesInProgram)); - CL_RETURN_ON_FAILURE(clCompileProgram(cl_adapter::cast(hProgram), + CL_RETURN_ON_FAILURE(clCompileProgram(hProgram->get(), DevicesInProgram->size(), DevicesInProgram->data(), pOptions, 0, nullptr, nullptr, nullptr, nullptr)); @@ -178,7 +171,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetInfo(ur_program_handle_t hProgram, ur_program_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { size_t CheckPropSize = 0; - auto ClResult = clGetProgramInfo(cl_adapter::cast(hProgram), + auto ClResult = clGetProgramInfo(hProgram->get(), mapURProgramInfoToCL(propName), propSize, pPropValue, &CheckPropSize); if (pPropValue && CheckPropSize != propSize) { @@ -199,7 +192,7 @@ urProgramBuild([[maybe_unused]] ur_context_handle_t hContext, CL_RETURN_ON_FAILURE(getDevicesFromProgram(hProgram, DevicesInProgram)); CL_RETURN_ON_FAILURE(clBuildProgram( - cl_adapter::cast(hProgram), DevicesInProgram->size(), + hProgram->get(), DevicesInProgram->size(), DevicesInProgram->data(), pOptions, nullptr, nullptr)); return UR_RESULT_SUCCESS; } @@ -210,11 +203,16 @@ urProgramLink(ur_context_handle_t hContext, uint32_t count, ur_program_handle_t *phProgram) { cl_int CLResult; - *phProgram = cl_adapter::cast( + std::vector CLPrograms(count); + for (uint32_t i = 0; i < count; i++) { + CLPrograms[i] = phPrograms[i]->get(); + } + cl_program Program = clLinkProgram(hContext->get(), 0, nullptr, pOptions, cl_adapter::cast(count), - cl_adapter::cast(phPrograms), nullptr, - nullptr, &CLResult)); + CLPrograms.data(), nullptr, + nullptr, &CLResult); + *phProgram = new ur_program_handle_t_(Program, hContext); CL_RETURN_ON_FAILURE(CLResult); return UR_RESULT_SUCCESS; @@ -287,14 +285,14 @@ urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t hDevice, UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); cl_program_binary_type BinaryType; CL_RETURN_ON_FAILURE(clGetProgramBuildInfo( - cl_adapter::cast(hProgram), hDevice->get(), + hProgram->get(), hDevice->get(), mapURProgramBuildInfoToCL(propName), sizeof(cl_program_binary_type), &BinaryType, nullptr)); return ReturnValue(mapCLBinaryTypeToUR(BinaryType)); } size_t CheckPropSize = 0; cl_int ClErr = - clGetProgramBuildInfo(cl_adapter::cast(hProgram), + clGetProgramBuildInfo(hProgram->get(), hDevice->get(), mapURProgramBuildInfoToCL(propName), propSize, pPropValue, &CheckPropSize); if (pPropValue && CheckPropSize != propSize) { @@ -311,7 +309,7 @@ urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t hDevice, UR_APIEXPORT ur_result_t UR_APICALL urProgramRetain(ur_program_handle_t hProgram) { - CL_RETURN_ON_FAILURE(clRetainProgram(cl_adapter::cast(hProgram))); + CL_RETURN_ON_FAILURE(clRetainProgram(hProgram->get())); return UR_RESULT_SUCCESS; } @@ -319,22 +317,24 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramRelease(ur_program_handle_t hProgram) { CL_RETURN_ON_FAILURE( - clReleaseProgram(cl_adapter::cast(hProgram))); + clReleaseProgram(hProgram->get())); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle( ur_program_handle_t hProgram, ur_native_handle_t *phNativeProgram) { - *phNativeProgram = reinterpret_cast(hProgram); + *phNativeProgram = reinterpret_cast(hProgram->get()); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithNativeHandle( - ur_native_handle_t hNativeProgram, ur_context_handle_t, + ur_native_handle_t hNativeProgram, ur_context_handle_t hContext, const ur_program_native_properties_t *pProperties, ur_program_handle_t *phProgram) { - *phProgram = reinterpret_cast(hNativeProgram); + cl_program NativeHandle = + reinterpret_cast(hNativeProgram); + *phProgram = new ur_program_handle_t_(NativeHandle, hContext); if (!pProperties || !pProperties->isNativeHandleOwned) { return urProgramRetain(*phProgram); } @@ -345,21 +345,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( ur_program_handle_t hProgram, uint32_t count, const ur_specialization_constant_info_t *pSpecConstants) { - cl_program CLProg = cl_adapter::cast(hProgram); - cl_context Ctx = nullptr; - size_t RetSize = 0; - - CL_RETURN_ON_FAILURE(clGetProgramInfo(CLProg, CL_PROGRAM_CONTEXT, sizeof(Ctx), - &Ctx, &RetSize)); + cl_program CLProg = hProgram->get(); + if (!hProgram->Context) { + return UR_RESULT_ERROR_INVALID_PROGRAM; + } + ur_context_handle_t Ctx = hProgram->Context; + if (!Ctx->DeviceCount || !Ctx->Devices[0]->Platform) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } std::unique_ptr> DevicesInCtx; - UR_RETURN_ON_FAILURE(cl_adapter::getDevicesFromContext( - cl_adapter::cast(Ctx), DevicesInCtx)); + UR_RETURN_ON_FAILURE(cl_adapter::getDevicesFromContext(Ctx, DevicesInCtx)); - cl_platform_id CurPlatform; - CL_RETURN_ON_FAILURE(clGetDeviceInfo((*DevicesInCtx)[0], CL_DEVICE_PLATFORM, - sizeof(cl_platform_id), &CurPlatform, - nullptr)); + cl_platform_id CurPlatform = Ctx->Devices[0]->Platform->get(); oclv::OpenCLVersion PlatVer; cl_adapter::getPlatformVersion(CurPlatform, PlatVer); @@ -391,7 +389,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( SetProgramSpecializationConstant = nullptr; const ur_result_t URResult = cl_ext::getExtFuncFromContext< decltype(SetProgramSpecializationConstant)>( - Ctx, cl_ext::ExtFuncPtrCache->clSetProgramSpecializationConstantCache, + Ctx->get(), cl_ext::ExtFuncPtrCache->clSetProgramSpecializationConstantCache, cl_ext::SetProgramSpecializationConstantName, &SetProgramSpecializationConstant); @@ -438,10 +436,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( ur_device_handle_t hDevice, ur_program_handle_t hProgram, const char *pFunctionName, void **ppFunctionPointer) { - cl_context CLContext = nullptr; - CL_RETURN_ON_FAILURE(clGetProgramInfo(cl_adapter::cast(hProgram), - CL_PROGRAM_CONTEXT, sizeof(CLContext), - &CLContext, nullptr)); + cl_context CLContext = hProgram->Context->get(); cl_ext::clGetDeviceFunctionPointer_fn FuncT = nullptr; @@ -461,14 +456,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( // throws exceptions. *ppFunctionPointer = 0; size_t Size; - CL_RETURN_ON_FAILURE(clGetProgramInfo(cl_adapter::cast(hProgram), + CL_RETURN_ON_FAILURE(clGetProgramInfo(hProgram->get(), CL_PROGRAM_KERNEL_NAMES, 0, nullptr, &Size)); std::string KernelNames(Size, ' '); CL_RETURN_ON_FAILURE(clGetProgramInfo( - cl_adapter::cast(hProgram), CL_PROGRAM_KERNEL_NAMES, + hProgram->get(), CL_PROGRAM_KERNEL_NAMES, KernelNames.size(), &KernelNames[0], nullptr)); // Get rid of the null terminator and search for the kernel name. If the @@ -479,7 +474,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( } const cl_int CLResult = - FuncT(hDevice->get(), cl_adapter::cast(hProgram), + FuncT(hDevice->get(), hProgram->get(), pFunctionName, reinterpret_cast(ppFunctionPointer)); // GPU runtime sometimes returns CL_INVALID_ARG_VALUE if the function address // cannot be found but the kernel exists. As the kernel does exist, return diff --git a/source/adapters/opencl/program.hpp b/source/adapters/opencl/program.hpp new file mode 100644 index 0000000000..d8e8197331 --- /dev/null +++ b/source/adapters/opencl/program.hpp @@ -0,0 +1,26 @@ +//===--------- program.hpp - OpenCL Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include "common.hpp" + +#include + +struct ur_program_handle_t_ { + using native_type = cl_program; + native_type Program; + ur_context_handle_t Context; + + ur_program_handle_t_(native_type Prog, ur_context_handle_t Ctx) : Program(Prog), Context(Ctx) {} + + ~ur_program_handle_t_() {} + + native_type get() { return Program; } +}; From f463e8027f8127d5606ef4c6ec576115b13c2029 Mon Sep 17 00:00:00 2001 From: omarahmed1111 Date: Mon, 11 Dec 2023 14:05:10 +0000 Subject: [PATCH 06/19] Formating --- source/adapters/opencl/context.cpp | 18 ++++---- source/adapters/opencl/context.hpp | 22 +++++---- source/adapters/opencl/device.cpp | 12 ++--- source/adapters/opencl/device.hpp | 8 ++-- source/adapters/opencl/enqueue.cpp | 20 ++++---- source/adapters/opencl/kernel.cpp | 4 +- source/adapters/opencl/memory.cpp | 12 ++--- source/adapters/opencl/platform.cpp | 43 +++++++++-------- source/adapters/opencl/platform.hpp | 8 ++-- source/adapters/opencl/program.cpp | 72 +++++++++++++---------------- source/adapters/opencl/program.hpp | 13 +++--- source/adapters/opencl/queue.cpp | 11 ++--- source/adapters/opencl/sampler.cpp | 5 +- source/adapters/opencl/usm.cpp | 15 +++--- 14 files changed, 127 insertions(+), 136 deletions(-) diff --git a/source/adapters/opencl/context.cpp b/source/adapters/opencl/context.cpp index a28e766e3d..4264b438f0 100644 --- a/source/adapters/opencl/context.cpp +++ b/source/adapters/opencl/context.cpp @@ -42,9 +42,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( CLDevices[i] = phDevices[i]->get(); } - cl_context Ctx = clCreateContext(nullptr, cl_adapter::cast(DeviceCount), - CLDevices.data(), - nullptr, nullptr, cl_adapter::cast(&Ret)); + cl_context Ctx = clCreateContext( + nullptr, cl_adapter::cast(DeviceCount), CLDevices.data(), + nullptr, nullptr, cl_adapter::cast(&Ret)); *phContext = new ur_context_handle_t_(Ctx, DeviceCount, phDevices); return mapCLErrorToUR(Ret); @@ -95,9 +95,8 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, case UR_CONTEXT_INFO_DEVICES: case UR_CONTEXT_INFO_REFERENCE_COUNT: { size_t CheckPropSize = 0; - auto ClResult = - clGetContextInfo(hContext->get(), CLPropName, - propSize, pPropValue, &CheckPropSize); + auto ClResult = clGetContextInfo(hContext->get(), CLPropName, propSize, + pPropValue, &CheckPropSize); if (pPropValue && CheckPropSize != propSize) { return UR_RESULT_ERROR_INVALID_SIZE; } @@ -137,8 +136,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle( ur_native_handle_t hNativeContext, uint32_t numDevices, const ur_device_handle_t *phDevices, const ur_context_native_properties_t *pProperties, ur_context_handle_t *phContext) { - cl_context NativeHandle = - reinterpret_cast(hNativeContext); + cl_context NativeHandle = reinterpret_cast(hNativeContext); *phContext = new ur_context_handle_t_(NativeHandle, numDevices, phDevices); if (!pProperties || !pProperties->isNativeHandleOwned) { return clRetainContext(NativeHandle); @@ -192,8 +190,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextSetExtendedDeleter( auto *C = static_cast(pUserData); C->execute(); }; - CL_RETURN_ON_FAILURE(clSetContextDestructorCallback( - hContext->get(), ClCallback, Callback)); + CL_RETURN_ON_FAILURE( + clSetContextDestructorCallback(hContext->get(), ClCallback, Callback)); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/context.hpp b/source/adapters/opencl/context.hpp index 7156372a87..46cbd2167f 100644 --- a/source/adapters/opencl/context.hpp +++ b/source/adapters/opencl/context.hpp @@ -21,18 +21,20 @@ getDevicesFromContext(ur_context_handle_t hContext, } struct ur_context_handle_t_ { - using native_type = cl_context; - native_type Context; - std::vector Devices; - uint32_t DeviceCount; + using native_type = cl_context; + native_type Context; + std::vector Devices; + uint32_t DeviceCount; - ur_context_handle_t_(native_type Ctx, uint32_t DevCount, const ur_device_handle_t *phDevices) : Context(Ctx), DeviceCount(DevCount) { - for (uint32_t i = 0; i < DeviceCount; i++) { - Devices.emplace_back(phDevices[i]); - } + ur_context_handle_t_(native_type Ctx, uint32_t DevCount, + const ur_device_handle_t *phDevices) + : Context(Ctx), DeviceCount(DevCount) { + for (uint32_t i = 0; i < DeviceCount; i++) { + Devices.emplace_back(phDevices[i]); } + } - ~ur_context_handle_t_() {} + ~ur_context_handle_t_() {} - native_type get() { return Context; } + native_type get() { return Context; } }; diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index 7969fe8ab3..6ad0d129b1 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -52,11 +52,10 @@ ur_result_t cl_adapter::checkDeviceExtensions( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform, - ur_device_type_t DeviceType, - [[maybe_unused]] uint32_t NumEntries, - ur_device_handle_t *phDevices, - uint32_t *pNumDevices) { +UR_APIEXPORT ur_result_t UR_APICALL +urDeviceGet(ur_platform_handle_t hPlatform, ur_device_type_t DeviceType, + [[maybe_unused]] uint32_t NumEntries, ur_device_handle_t *phDevices, + uint32_t *pNumDevices) { cl_device_type Type; switch (DeviceType) { @@ -1022,7 +1021,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( CLNumDevicesRet, CLSubDevices.data(), nullptr)); for (uint32_t i = 0; i < NumDevices; i++) { - phSubDevices[i] = new ur_device_handle_t_(CLSubDevices[i], hDevice->Platform, hDevice); + phSubDevices[i] = + new ur_device_handle_t_(CLSubDevices[i], hDevice->Platform, hDevice); } } diff --git a/source/adapters/opencl/device.hpp b/source/adapters/opencl/device.hpp index cedff1a82e..dc29a1692e 100644 --- a/source/adapters/opencl/device.hpp +++ b/source/adapters/opencl/device.hpp @@ -24,14 +24,16 @@ struct ur_device_handle_t_ { native_type Device; ur_platform_handle_t Platform; cl_device_type Type = 0; - ur_device_handle_t ParentDevice = nullptr; + ur_device_handle_t ParentDevice = nullptr; - ur_device_handle_t_(native_type Dev, ur_platform_handle_t Plat, ur_device_handle_t Parent) + ur_device_handle_t_(native_type Dev, ur_platform_handle_t Plat, + ur_device_handle_t Parent) : Device(Dev), Platform(Plat), ParentDevice(Parent) { if (Parent) { Type = Parent->Type; } else { - clGetDeviceInfo(Device, CL_DEVICE_TYPE, sizeof(cl_device_type), &Type, nullptr); + clGetDeviceInfo(Device, CL_DEVICE_TYPE, sizeof(cl_device_type), &Type, + nullptr); } } diff --git a/source/adapters/opencl/enqueue.cpp b/source/adapters/opencl/enqueue.cpp index 420a070653..5c56cd2381 100644 --- a/source/adapters/opencl/enqueue.cpp +++ b/source/adapters/opencl/enqueue.cpp @@ -345,9 +345,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( if (!F || Res != CL_SUCCESS) return UR_RESULT_ERROR_INVALID_OPERATION; - Res = F(cl_adapter::cast(hQueue), - hProgram->get(), name, blockingWrite, count, - offset, pSrc, numEventsInWaitList, + Res = F(cl_adapter::cast(hQueue), hProgram->get(), name, + blockingWrite, count, offset, pSrc, numEventsInWaitList, cl_adapter::cast(phEventWaitList), cl_adapter::cast(phEvent)); @@ -376,9 +375,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( if (!F || Res != CL_SUCCESS) return UR_RESULT_ERROR_INVALID_OPERATION; - Res = F(cl_adapter::cast(hQueue), - hProgram->get(), name, blockingRead, count, - offset, pDst, numEventsInWaitList, + Res = F(cl_adapter::cast(hQueue), hProgram->get(), name, + blockingRead, count, offset, pDst, numEventsInWaitList, cl_adapter::cast(phEventWaitList), cl_adapter::cast(phEvent)); @@ -407,9 +405,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( if (FuncPtr) { RetVal = mapCLErrorToUR( - FuncPtr(cl_adapter::cast(hQueue), - hProgram->get(), pipe_symbol, blocking, - pDst, size, numEventsInWaitList, + FuncPtr(cl_adapter::cast(hQueue), hProgram->get(), + pipe_symbol, blocking, pDst, size, numEventsInWaitList, cl_adapter::cast(phEventWaitList), cl_adapter::cast(phEvent))); } @@ -439,9 +436,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( if (FuncPtr) { RetVal = mapCLErrorToUR( - FuncPtr(cl_adapter::cast(hQueue), - hProgram->get(), pipe_symbol, blocking, - pSrc, size, numEventsInWaitList, + FuncPtr(cl_adapter::cast(hQueue), hProgram->get(), + pipe_symbol, blocking, pSrc, size, numEventsInWaitList, cl_adapter::cast(phEventWaitList), cl_adapter::cast(phEvent))); } diff --git a/source/adapters/opencl/kernel.cpp b/source/adapters/opencl/kernel.cpp index 71a728a506..a589ffd70f 100644 --- a/source/adapters/opencl/kernel.cpp +++ b/source/adapters/opencl/kernel.cpp @@ -19,8 +19,8 @@ urKernelCreate(ur_program_handle_t hProgram, const char *pKernelName, ur_kernel_handle_t *phKernel) { cl_int CLResult; - *phKernel = cl_adapter::cast(clCreateKernel( - hProgram->get(), pKernelName, &CLResult)); + *phKernel = cl_adapter::cast( + clCreateKernel(hProgram->get(), pKernelName, &CLResult)); CL_RETURN_ON_FAILURE(CLResult); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp index ae712e3315..13c314de02 100644 --- a/source/adapters/opencl/memory.cpp +++ b/source/adapters/opencl/memory.cpp @@ -270,9 +270,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( } void *HostPtr = pProperties ? pProperties->pHost : nullptr; - *phBuffer = reinterpret_cast(clCreateBuffer( - hContext->get(), static_cast(flags), - size, HostPtr, cl_adapter::cast(&RetErr))); + *phBuffer = reinterpret_cast( + clCreateBuffer(hContext->get(), static_cast(flags), size, + HostPtr, cl_adapter::cast(&RetErr))); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; @@ -289,9 +289,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( cl_image_desc ImageDesc = mapURImageDescToCL(pImageDesc); cl_map_flags MapFlags = convertURMemFlagsToCL(flags); - *phMem = reinterpret_cast(clCreateImage( - hContext->get(), MapFlags, &ImageFormat, - &ImageDesc, pHost, cl_adapter::cast(&RetErr))); + *phMem = reinterpret_cast( + clCreateImage(hContext->get(), MapFlags, &ImageFormat, &ImageDesc, pHost, + cl_adapter::cast(&RetErr))); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; diff --git a/source/adapters/opencl/platform.cpp b/source/adapters/opencl/platform.cpp index 86027d5beb..2526c14a51 100644 --- a/source/adapters/opencl/platform.cpp +++ b/source/adapters/opencl/platform.cpp @@ -66,9 +66,9 @@ urPlatformGetInfo(ur_platform_handle_t hPlatform, ur_platform_info_t propName, if (hPlatform) { Plat = hPlatform->get(); } - CL_RETURN_ON_FAILURE(clGetPlatformInfo(Plat, CLPropName, - propSize, pPropValue, pSizeRet)); - + CL_RETURN_ON_FAILURE( + clGetPlatformInfo(Plat, CLPropName, propSize, pPropValue, pSizeRet)); + return UR_RESULT_SUCCESS; } default: @@ -93,26 +93,25 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, cl_int Result = CL_SUCCESS; std::call_once( - InitFlag, - [](cl_int &Result) { - Result = clGetPlatformIDs(0, nullptr, &NumPlatforms); - if (Result != CL_SUCCESS) { - return Result; - } - std::vector CLPlatforms(NumPlatforms); - Result = clGetPlatformIDs(cl_adapter::cast(NumPlatforms), - CLPlatforms.data(), - nullptr); - if (Result != CL_SUCCESS) { + InitFlag, + [](cl_int &Result) { + Result = clGetPlatformIDs(0, nullptr, &NumPlatforms); + if (Result != CL_SUCCESS) { + return Result; + } + std::vector CLPlatforms(NumPlatforms); + Result = clGetPlatformIDs(cl_adapter::cast(NumPlatforms), + CLPlatforms.data(), nullptr); + if (Result != CL_SUCCESS) { + return Result; + } + URPlatforms.resize(NumPlatforms); + for (uint32_t i = 0; i < NumPlatforms; i++) { + URPlatforms[i] = new ur_platform_handle_t_(CLPlatforms[i]); + } return Result; - } - URPlatforms.resize(NumPlatforms); - for (uint32_t i = 0; i < NumPlatforms; i++) { - URPlatforms[i] = new ur_platform_handle_t_(CLPlatforms[i]); - } - return Result; - }, - Result); + }, + Result); /* Absorb the CL_PLATFORM_NOT_FOUND_KHR and just return 0 in num_platforms */ if (Result == CL_PLATFORM_NOT_FOUND_KHR) { diff --git a/source/adapters/opencl/platform.hpp b/source/adapters/opencl/platform.hpp index ebdcf4d134..667b998970 100644 --- a/source/adapters/opencl/platform.hpp +++ b/source/adapters/opencl/platform.hpp @@ -202,14 +202,16 @@ struct ur_platform_handle_t_ { ur_result_t InitDevices() { cl_uint DeviceNum = 0; - CL_RETURN_ON_FAILURE(clGetDeviceIDs(Platform, CL_DEVICE_TYPE_ALL, 0, nullptr, &DeviceNum)); + CL_RETURN_ON_FAILURE( + clGetDeviceIDs(Platform, CL_DEVICE_TYPE_ALL, 0, nullptr, &DeviceNum)); std::vector CLDevices(DeviceNum); - CL_RETURN_ON_FAILURE(clGetDeviceIDs(Platform, CL_DEVICE_TYPE_ALL, DeviceNum, CLDevices.data(), nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceIDs(Platform, CL_DEVICE_TYPE_ALL, DeviceNum, + CLDevices.data(), nullptr)); Devices = std::vector(DeviceNum); for (size_t i = 0; i < DeviceNum; i++) { - Devices[i] = new ur_device_handle_t_(CLDevices[i], this, nullptr); + Devices[i] = new ur_device_handle_t_(CLDevices[i], this, nullptr); } return UR_RESULT_SUCCESS; diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index 6c34abbb82..03d1d2a680 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -8,11 +8,11 @@ // //===----------------------------------------------------------------------===// +#include "program.hpp" #include "common.hpp" #include "context.hpp" #include "device.hpp" #include "platform.hpp" -#include "program.hpp" static ur_result_t getDevicesFromProgram( ur_program_handle_t hProgram, @@ -69,7 +69,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( } } - cl_program Program = clCreateProgramWithIL(hContext->get(), pIL, length, &Err); + cl_program Program = + clCreateProgramWithIL(hContext->get(), pIL, length, &Err); CL_RETURN_ON_FAILURE(Err); *phProgram = new ur_program_handle_t_(Program, hContext); @@ -117,8 +118,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( cl_int BinaryStatus[1]; cl_int CLResult; cl_program Program = clCreateProgramWithBinary( - hContext->get(), cl_adapter::cast(1u), - Devices, Lengths, &pBinary, BinaryStatus, &CLResult); + hContext->get(), cl_adapter::cast(1u), Devices, Lengths, + &pBinary, BinaryStatus, &CLResult); *phProgram = new ur_program_handle_t_(Program, hContext); CL_RETURN_ON_FAILURE(BinaryStatus[0]); CL_RETURN_ON_FAILURE(CLResult); @@ -133,10 +134,9 @@ urProgramCompile([[maybe_unused]] ur_context_handle_t hContext, std::unique_ptr> DevicesInProgram; CL_RETURN_ON_FAILURE(getDevicesFromProgram(hProgram, DevicesInProgram)); - CL_RETURN_ON_FAILURE(clCompileProgram(hProgram->get(), - DevicesInProgram->size(), - DevicesInProgram->data(), pOptions, 0, - nullptr, nullptr, nullptr, nullptr)); + CL_RETURN_ON_FAILURE(clCompileProgram( + hProgram->get(), DevicesInProgram->size(), DevicesInProgram->data(), + pOptions, 0, nullptr, nullptr, nullptr, nullptr)); return UR_RESULT_SUCCESS; } @@ -171,9 +171,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetInfo(ur_program_handle_t hProgram, ur_program_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { size_t CheckPropSize = 0; - auto ClResult = clGetProgramInfo(hProgram->get(), - mapURProgramInfoToCL(propName), propSize, - pPropValue, &CheckPropSize); + auto ClResult = + clGetProgramInfo(hProgram->get(), mapURProgramInfoToCL(propName), + propSize, pPropValue, &CheckPropSize); if (pPropValue && CheckPropSize != propSize) { return UR_RESULT_ERROR_INVALID_SIZE; } @@ -191,9 +191,9 @@ urProgramBuild([[maybe_unused]] ur_context_handle_t hContext, std::unique_ptr> DevicesInProgram; CL_RETURN_ON_FAILURE(getDevicesFromProgram(hProgram, DevicesInProgram)); - CL_RETURN_ON_FAILURE(clBuildProgram( - hProgram->get(), DevicesInProgram->size(), - DevicesInProgram->data(), pOptions, nullptr, nullptr)); + CL_RETURN_ON_FAILURE(clBuildProgram(hProgram->get(), DevicesInProgram->size(), + DevicesInProgram->data(), pOptions, + nullptr, nullptr)); return UR_RESULT_SUCCESS; } @@ -207,11 +207,9 @@ urProgramLink(ur_context_handle_t hContext, uint32_t count, for (uint32_t i = 0; i < count; i++) { CLPrograms[i] = phPrograms[i]->get(); } - cl_program Program = - clLinkProgram(hContext->get(), 0, nullptr, - pOptions, cl_adapter::cast(count), - CLPrograms.data(), nullptr, - nullptr, &CLResult); + cl_program Program = clLinkProgram( + hContext->get(), 0, nullptr, pOptions, cl_adapter::cast(count), + CLPrograms.data(), nullptr, nullptr, &CLResult); *phProgram = new ur_program_handle_t_(Program, hContext); CL_RETURN_ON_FAILURE(CLResult); @@ -285,16 +283,14 @@ urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t hDevice, UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); cl_program_binary_type BinaryType; CL_RETURN_ON_FAILURE(clGetProgramBuildInfo( - hProgram->get(), hDevice->get(), - mapURProgramBuildInfoToCL(propName), sizeof(cl_program_binary_type), - &BinaryType, nullptr)); + hProgram->get(), hDevice->get(), mapURProgramBuildInfoToCL(propName), + sizeof(cl_program_binary_type), &BinaryType, nullptr)); return ReturnValue(mapCLBinaryTypeToUR(BinaryType)); } size_t CheckPropSize = 0; - cl_int ClErr = - clGetProgramBuildInfo(hProgram->get(), - hDevice->get(), mapURProgramBuildInfoToCL(propName), - propSize, pPropValue, &CheckPropSize); + cl_int ClErr = clGetProgramBuildInfo(hProgram->get(), hDevice->get(), + mapURProgramBuildInfoToCL(propName), + propSize, pPropValue, &CheckPropSize); if (pPropValue && CheckPropSize != propSize) { return UR_RESULT_ERROR_INVALID_SIZE; } @@ -316,8 +312,7 @@ urProgramRetain(ur_program_handle_t hProgram) { UR_APIEXPORT ur_result_t UR_APICALL urProgramRelease(ur_program_handle_t hProgram) { - CL_RETURN_ON_FAILURE( - clReleaseProgram(hProgram->get())); + CL_RETURN_ON_FAILURE(clReleaseProgram(hProgram->get())); return UR_RESULT_SUCCESS; } @@ -332,8 +327,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithNativeHandle( ur_native_handle_t hNativeProgram, ur_context_handle_t hContext, const ur_program_native_properties_t *pProperties, ur_program_handle_t *phProgram) { - cl_program NativeHandle = - reinterpret_cast(hNativeProgram); + cl_program NativeHandle = reinterpret_cast(hNativeProgram); *phProgram = new ur_program_handle_t_(NativeHandle, hContext); if (!pProperties || !pProperties->isNativeHandleOwned) { return urProgramRetain(*phProgram); @@ -389,7 +383,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( SetProgramSpecializationConstant = nullptr; const ur_result_t URResult = cl_ext::getExtFuncFromContext< decltype(SetProgramSpecializationConstant)>( - Ctx->get(), cl_ext::ExtFuncPtrCache->clSetProgramSpecializationConstantCache, + Ctx->get(), + cl_ext::ExtFuncPtrCache->clSetProgramSpecializationConstantCache, cl_ext::SetProgramSpecializationConstantName, &SetProgramSpecializationConstant); @@ -456,15 +451,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( // throws exceptions. *ppFunctionPointer = 0; size_t Size; - CL_RETURN_ON_FAILURE(clGetProgramInfo(hProgram->get(), - CL_PROGRAM_KERNEL_NAMES, 0, nullptr, - &Size)); + CL_RETURN_ON_FAILURE(clGetProgramInfo( + hProgram->get(), CL_PROGRAM_KERNEL_NAMES, 0, nullptr, &Size)); std::string KernelNames(Size, ' '); - CL_RETURN_ON_FAILURE(clGetProgramInfo( - hProgram->get(), CL_PROGRAM_KERNEL_NAMES, - KernelNames.size(), &KernelNames[0], nullptr)); + CL_RETURN_ON_FAILURE( + clGetProgramInfo(hProgram->get(), CL_PROGRAM_KERNEL_NAMES, + KernelNames.size(), &KernelNames[0], nullptr)); // Get rid of the null terminator and search for the kernel name. If the // function cannot be found, return an error code to indicate it exists. @@ -474,8 +468,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( } const cl_int CLResult = - FuncT(hDevice->get(), hProgram->get(), - pFunctionName, reinterpret_cast(ppFunctionPointer)); + FuncT(hDevice->get(), hProgram->get(), pFunctionName, + reinterpret_cast(ppFunctionPointer)); // GPU runtime sometimes returns CL_INVALID_ARG_VALUE if the function address // cannot be found but the kernel exists. As the kernel does exist, return // that the function name is invalid. diff --git a/source/adapters/opencl/program.hpp b/source/adapters/opencl/program.hpp index d8e8197331..84f486b7e8 100644 --- a/source/adapters/opencl/program.hpp +++ b/source/adapters/opencl/program.hpp @@ -14,13 +14,14 @@ #include struct ur_program_handle_t_ { - using native_type = cl_program; - native_type Program; - ur_context_handle_t Context; + using native_type = cl_program; + native_type Program; + ur_context_handle_t Context; - ur_program_handle_t_(native_type Prog, ur_context_handle_t Ctx) : Program(Prog), Context(Ctx) {} + ur_program_handle_t_(native_type Prog, ur_context_handle_t Ctx) + : Program(Prog), Context(Ctx) {} - ~ur_program_handle_t_() {} + ~ur_program_handle_t_() {} - native_type get() { return Program; } + native_type get() { return Program; } }; diff --git a/source/adapters/opencl/queue.cpp b/source/adapters/opencl/queue.cpp index 276c179f95..60218fd01f 100644 --- a/source/adapters/opencl/queue.cpp +++ b/source/adapters/opencl/queue.cpp @@ -7,9 +7,9 @@ //===-----------------------------------------------------------------===// #include "common.hpp" +#include "context.hpp" #include "device.hpp" #include "platform.hpp" -#include "context.hpp" cl_command_queue_info mapURQueueInfoToCL(const ur_queue_info_t PropName) { @@ -94,9 +94,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( cl_int RetErr = CL_INVALID_OPERATION; if (Version < oclv::V2_0) { - *phQueue = cl_adapter::cast(clCreateCommandQueue( - hContext->get(), hDevice->get(), - CLProperties & SupportByOpenCL, &RetErr)); + *phQueue = cl_adapter::cast( + clCreateCommandQueue(hContext->get(), hDevice->get(), + CLProperties & SupportByOpenCL, &RetErr)); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } @@ -106,8 +106,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( CL_QUEUE_PROPERTIES, CLProperties & SupportByOpenCL, 0}; *phQueue = cl_adapter::cast(clCreateCommandQueueWithProperties( - hContext->get(), hDevice->get(), - CreationFlagProperties, &RetErr)); + hContext->get(), hDevice->get(), CreationFlagProperties, &RetErr)); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/sampler.cpp b/source/adapters/opencl/sampler.cpp index 187b87a7f1..20920541fc 100644 --- a/source/adapters/opencl/sampler.cpp +++ b/source/adapters/opencl/sampler.cpp @@ -145,9 +145,8 @@ ur_result_t urSamplerCreate(ur_context_handle_t hContext, // Always call OpenCL 1.0 API *phSampler = cl_adapter::cast(clCreateSampler( - hContext->get(), - static_cast(pDesc->normalizedCoords), AddressingMode, FilterMode, - cl_adapter::cast(&ErrorCode))); + hContext->get(), static_cast(pDesc->normalizedCoords), + AddressingMode, FilterMode, cl_adapter::cast(&ErrorCode))); return mapCLErrorToUR(ErrorCode); } diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index 935c099b2c..9a1a5b9d2b 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -9,8 +9,8 @@ //===----------------------------------------------------------------------===// #include "common.hpp" -#include "device.hpp" #include "context.hpp" +#include "device.hpp" inline cl_mem_alloc_flags_intel hostDescToClFlags(const ur_usm_host_desc_t &desc) { @@ -149,8 +149,8 @@ urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, if (FuncPtr) { cl_int ClResult = CL_SUCCESS; Ptr = FuncPtr(CLContext, hDevice->get(), - AllocProperties.empty() ? nullptr : AllocProperties.data(), size, - Alignment, &ClResult); + AllocProperties.empty() ? nullptr : AllocProperties.data(), + size, Alignment, &ClResult); if (ClResult == CL_INVALID_BUFFER_SIZE) { return UR_RESULT_ERROR_INVALID_USM_SIZE; } @@ -192,8 +192,8 @@ urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, if (FuncPtr) { cl_int ClResult = CL_SUCCESS; Ptr = FuncPtr(CLContext, hDevice->get(), - AllocProperties.empty() ? nullptr : AllocProperties.data(), size, - Alignment, cl_adapter::cast(&ClResult)); + AllocProperties.empty() ? nullptr : AllocProperties.data(), + size, Alignment, cl_adapter::cast(&ClResult)); if (ClResult == CL_INVALID_BUFFER_SIZE) { return UR_RESULT_ERROR_INVALID_USM_SIZE; } @@ -551,9 +551,8 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, } size_t CheckPropSize = 0; - cl_int ClErr = - GetMemAllocInfo(hContext->get(), pMem, PropNameCL, - propSize, pPropValue, &CheckPropSize); + cl_int ClErr = GetMemAllocInfo(hContext->get(), pMem, PropNameCL, propSize, + pPropValue, &CheckPropSize); if (pPropValue && CheckPropSize != propSize) { return UR_RESULT_ERROR_INVALID_SIZE; } From 338bc38a56fd665ba90f76c0cdd477384060070f Mon Sep 17 00:00:00 2001 From: omarahmed1111 Date: Mon, 11 Dec 2023 15:50:25 +0000 Subject: [PATCH 07/19] Add memory handle --- source/adapters/opencl/command_buffer.cpp | 13 ++-- source/adapters/opencl/context.cpp | 8 ++- source/adapters/opencl/enqueue.cpp | 84 ++++++++++------------- source/adapters/opencl/memory.cpp | 76 +++++++++++--------- source/adapters/opencl/memory.hpp | 27 ++++++++ source/adapters/opencl/program.cpp | 37 +++++++--- 6 files changed, 147 insertions(+), 98 deletions(-) create mode 100644 source/adapters/opencl/memory.hpp diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 97c91e4672..a1b95fc9be 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -11,6 +11,7 @@ #include "command_buffer.hpp" #include "common.hpp" #include "context.hpp" +#include "memory.hpp" UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, @@ -166,8 +167,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( return UR_RESULT_ERROR_INVALID_OPERATION; CL_RETURN_ON_FAILURE(clCommandCopyBufferKHR( - hCommandBuffer->CLCommandBuffer, nullptr, - cl_adapter::cast(hSrcMem), cl_adapter::cast(hDstMem), + hCommandBuffer->CLCommandBuffer, nullptr, hSrcMem->get(), hDstMem->get(), srcOffset, dstOffset, size, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, nullptr)); @@ -203,8 +203,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( return UR_RESULT_ERROR_INVALID_OPERATION; CL_RETURN_ON_FAILURE(clCommandCopyBufferRectKHR( - hCommandBuffer->CLCommandBuffer, nullptr, - cl_adapter::cast(hSrcMem), cl_adapter::cast(hDstMem), + hCommandBuffer->CLCommandBuffer, nullptr, hSrcMem->get(), hDstMem->get(), OpenCLOriginRect, OpenCLDstRect, OpenCLRegion, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, nullptr)); @@ -292,9 +291,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( return UR_RESULT_ERROR_INVALID_OPERATION; CL_RETURN_ON_FAILURE(clCommandFillBufferKHR( - hCommandBuffer->CLCommandBuffer, nullptr, - cl_adapter::cast(hBuffer), pPattern, patternSize, offset, size, - numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, nullptr)); + hCommandBuffer->CLCommandBuffer, nullptr, hBuffer->get(), pPattern, + patternSize, offset, size, numSyncPointsInWaitList, pSyncPointWaitList, + pSyncPoint, nullptr)); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/context.cpp b/source/adapters/opencl/context.cpp index 4264b438f0..b49314ee7c 100644 --- a/source/adapters/opencl/context.cpp +++ b/source/adapters/opencl/context.cpp @@ -91,8 +91,12 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, * queries of each device separately and building the intersection set. */ return UR_RESULT_ERROR_INVALID_ARGUMENT; } - case UR_CONTEXT_INFO_NUM_DEVICES: - case UR_CONTEXT_INFO_DEVICES: + case UR_CONTEXT_INFO_NUM_DEVICES: { + return ReturnValue(hContext->DeviceCount); + } + case UR_CONTEXT_INFO_DEVICES: { + return ReturnValue(hContext->Devices); + } case UR_CONTEXT_INFO_REFERENCE_COUNT: { size_t CheckPropSize = 0; auto ClResult = clGetContextInfo(hContext->get(), CLPropName, propSize, diff --git a/source/adapters/opencl/enqueue.cpp b/source/adapters/opencl/enqueue.cpp index 5c56cd2381..e0a5484f5a 100644 --- a/source/adapters/opencl/enqueue.cpp +++ b/source/adapters/opencl/enqueue.cpp @@ -9,6 +9,7 @@ //===----------------------------------------------------------------------===// #include "common.hpp" +#include "memory.hpp" #include "program.hpp" cl_map_flags convertURMapFlagsToCL(ur_map_flags_t URFlags) { @@ -72,9 +73,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { CL_RETURN_ON_FAILURE(clEnqueueReadBuffer( - cl_adapter::cast(hQueue), - cl_adapter::cast(hBuffer), blockingRead, offset, size, pDst, - numEventsInWaitList, cl_adapter::cast(phEventWaitList), + cl_adapter::cast(hQueue), hBuffer->get(), blockingRead, + offset, size, pDst, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), cl_adapter::cast(phEvent))); return UR_RESULT_SUCCESS; @@ -86,9 +87,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { CL_RETURN_ON_FAILURE(clEnqueueWriteBuffer( - cl_adapter::cast(hQueue), - cl_adapter::cast(hBuffer), blockingWrite, offset, size, pSrc, - numEventsInWaitList, cl_adapter::cast(phEventWaitList), + cl_adapter::cast(hQueue), hBuffer->get(), blockingWrite, + offset, size, pSrc, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), cl_adapter::cast(phEvent))); return UR_RESULT_SUCCESS; @@ -107,10 +108,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( const size_t Region[3] = {region.width, region.height, region.depth}; CL_RETURN_ON_FAILURE(clEnqueueReadBufferRect( - cl_adapter::cast(hQueue), - cl_adapter::cast(hBuffer), blockingRead, BufferOrigin, HostOrigin, - Region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, - pDst, numEventsInWaitList, + cl_adapter::cast(hQueue), hBuffer->get(), blockingRead, + BufferOrigin, HostOrigin, Region, bufferRowPitch, bufferSlicePitch, + hostRowPitch, hostSlicePitch, pDst, numEventsInWaitList, cl_adapter::cast(phEventWaitList), cl_adapter::cast(phEvent))); @@ -130,10 +130,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( const size_t Region[3] = {region.width, region.height, region.depth}; CL_RETURN_ON_FAILURE(clEnqueueWriteBufferRect( - cl_adapter::cast(hQueue), - cl_adapter::cast(hBuffer), blockingWrite, BufferOrigin, - HostOrigin, Region, bufferRowPitch, bufferSlicePitch, hostRowPitch, - hostSlicePitch, pSrc, numEventsInWaitList, + cl_adapter::cast(hQueue), hBuffer->get(), blockingWrite, + BufferOrigin, HostOrigin, Region, bufferRowPitch, bufferSlicePitch, + hostRowPitch, hostSlicePitch, pSrc, numEventsInWaitList, cl_adapter::cast(phEventWaitList), cl_adapter::cast(phEvent))); @@ -147,10 +146,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( ur_event_handle_t *phEvent) { CL_RETURN_ON_FAILURE(clEnqueueCopyBuffer( - cl_adapter::cast(hQueue), - cl_adapter::cast(hBufferSrc), - cl_adapter::cast(hBufferDst), srcOffset, dstOffset, size, - numEventsInWaitList, cl_adapter::cast(phEventWaitList), + cl_adapter::cast(hQueue), hBufferSrc->get(), + hBufferDst->get(), srcOffset, dstOffset, size, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), cl_adapter::cast(phEvent))); return UR_RESULT_SUCCESS; @@ -168,11 +166,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( const size_t Region[3] = {region.width, region.height, region.depth}; CL_RETURN_ON_FAILURE(clEnqueueCopyBufferRect( - cl_adapter::cast(hQueue), - cl_adapter::cast(hBufferSrc), - cl_adapter::cast(hBufferDst), SrcOrigin, DstOrigin, Region, - srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, - numEventsInWaitList, cl_adapter::cast(phEventWaitList), + cl_adapter::cast(hQueue), hBufferSrc->get(), + hBufferDst->get(), SrcOrigin, DstOrigin, Region, srcRowPitch, + srcSlicePitch, dstRowPitch, dstSlicePitch, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), cl_adapter::cast(phEvent))); return UR_RESULT_SUCCESS; @@ -186,12 +183,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( // CL FillBuffer only allows pattern sizes up to the largest CL type: // long16/double16 if (patternSize <= 128) { - CL_RETURN_ON_FAILURE( - clEnqueueFillBuffer(cl_adapter::cast(hQueue), - cl_adapter::cast(hBuffer), pPattern, - patternSize, offset, size, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + CL_RETURN_ON_FAILURE(clEnqueueFillBuffer( + cl_adapter::cast(hQueue), hBuffer->get(), pPattern, + patternSize, offset, size, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); return UR_RESULT_SUCCESS; } @@ -204,10 +200,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( cl_event WriteEvent = nullptr; auto ClErr = clEnqueueWriteBuffer( - cl_adapter::cast(hQueue), - cl_adapter::cast(hBuffer), false, offset, size, HostBuffer, - numEventsInWaitList, cl_adapter::cast(phEventWaitList), - &WriteEvent); + cl_adapter::cast(hQueue), hBuffer->get(), false, offset, + size, HostBuffer, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), &WriteEvent); if (ClErr != CL_SUCCESS) { delete[] HostBuffer; CL_RETURN_ON_FAILURE(ClErr); @@ -245,9 +240,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( const size_t Region[3] = {region.width, region.height, region.depth}; CL_RETURN_ON_FAILURE(clEnqueueReadImage( - cl_adapter::cast(hQueue), - cl_adapter::cast(hImage), blockingRead, Origin, Region, rowPitch, - slicePitch, pDst, numEventsInWaitList, + cl_adapter::cast(hQueue), hImage->get(), blockingRead, + Origin, Region, rowPitch, slicePitch, pDst, numEventsInWaitList, cl_adapter::cast(phEventWaitList), cl_adapter::cast(phEvent))); @@ -263,9 +257,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( const size_t Region[3] = {region.width, region.height, region.depth}; CL_RETURN_ON_FAILURE(clEnqueueWriteImage( - cl_adapter::cast(hQueue), - cl_adapter::cast(hImage), blockingWrite, Origin, Region, rowPitch, - slicePitch, pSrc, numEventsInWaitList, + cl_adapter::cast(hQueue), hImage->get(), blockingWrite, + Origin, Region, rowPitch, slicePitch, pSrc, numEventsInWaitList, cl_adapter::cast(phEventWaitList), cl_adapter::cast(phEvent))); @@ -283,9 +276,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( const size_t Region[3] = {region.width, region.height, region.depth}; CL_RETURN_ON_FAILURE(clEnqueueCopyImage( - cl_adapter::cast(hQueue), - cl_adapter::cast(hImageSrc), cl_adapter::cast(hImageDst), - SrcOrigin, DstOrigin, Region, numEventsInWaitList, + cl_adapter::cast(hQueue), hImageSrc->get(), + hImageDst->get(), SrcOrigin, DstOrigin, Region, numEventsInWaitList, cl_adapter::cast(phEventWaitList), cl_adapter::cast(phEvent))); @@ -300,8 +292,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( cl_int Err; *ppRetMap = clEnqueueMapBuffer( - cl_adapter::cast(hQueue), - cl_adapter::cast(hBuffer), blockingMap, + cl_adapter::cast(hQueue), hBuffer->get(), blockingMap, convertURMapFlagsToCL(mapFlags), offset, size, numEventsInWaitList, cl_adapter::cast(phEventWaitList), cl_adapter::cast(phEvent), &Err); @@ -315,9 +306,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( ur_event_handle_t *phEvent) { CL_RETURN_ON_FAILURE(clEnqueueUnmapMemObject( - cl_adapter::cast(hQueue), - cl_adapter::cast(hMem), pMappedPtr, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), + cl_adapter::cast(hQueue), hMem->get(), pMappedPtr, + numEventsInWaitList, cl_adapter::cast(phEventWaitList), cl_adapter::cast(phEvent))); return UR_RESULT_SUCCESS; diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp index 13c314de02..871626c50e 100644 --- a/source/adapters/opencl/memory.cpp +++ b/source/adapters/opencl/memory.cpp @@ -8,6 +8,7 @@ // //===----------------------------------------------------------------------===// +#include "memory.hpp" #include "common.hpp" #include "context.hpp" @@ -262,18 +263,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( } PropertiesIntel.push_back(0); - *phBuffer = reinterpret_cast(FuncPtr( + cl_mem Buffer = FuncPtr( CLContext, PropertiesIntel.data(), static_cast(flags), - size, pProperties->pHost, cl_adapter::cast(&RetErr))); + size, pProperties->pHost, cl_adapter::cast(&RetErr)); + *phBuffer = new ur_mem_handle_t_(Buffer, hContext); return mapCLErrorToUR(RetErr); } } void *HostPtr = pProperties ? pProperties->pHost : nullptr; - *phBuffer = reinterpret_cast( + cl_mem Buffer = clCreateBuffer(hContext->get(), static_cast(flags), size, - HostPtr, cl_adapter::cast(&RetErr))); + HostPtr, cl_adapter::cast(&RetErr)); CL_RETURN_ON_FAILURE(RetErr); + *phBuffer = new ur_mem_handle_t_(Buffer, hContext); return UR_RESULT_SUCCESS; } @@ -289,10 +292,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( cl_image_desc ImageDesc = mapURImageDescToCL(pImageDesc); cl_map_flags MapFlags = convertURMemFlagsToCL(flags); - *phMem = reinterpret_cast( + cl_mem Mem = clCreateImage(hContext->get(), MapFlags, &ImageFormat, &ImageDesc, pHost, - cl_adapter::cast(&RetErr))); + cl_adapter::cast(&RetErr)); CL_RETURN_ON_FAILURE(RetErr); + *phMem = new ur_mem_handle_t_(Mem, hContext); return UR_RESULT_SUCCESS; } @@ -318,30 +322,29 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( BufferRegion.size = pRegion->size; *phMem = reinterpret_cast(clCreateSubBuffer( - cl_adapter::cast(hBuffer), static_cast(flags), - BufferCreateType, &BufferRegion, cl_adapter::cast(&RetErr))); + hBuffer->get(), static_cast(flags), BufferCreateType, + &BufferRegion, cl_adapter::cast(&RetErr))); if (RetErr == CL_INVALID_VALUE) { size_t BufferSize = 0; - CL_RETURN_ON_FAILURE(clGetMemObjectInfo(cl_adapter::cast(hBuffer), - CL_MEM_SIZE, sizeof(BufferSize), - &BufferSize, nullptr)); + CL_RETURN_ON_FAILURE(clGetMemObjectInfo( + hBuffer->get(), CL_MEM_SIZE, sizeof(BufferSize), &BufferSize, nullptr)); if (BufferRegion.size + BufferRegion.origin > BufferSize) return UR_RESULT_ERROR_INVALID_BUFFER_SIZE; } return mapCLErrorToUR(RetErr); } -UR_APIEXPORT ur_result_t UR_APICALL urMemGetNativeHandle( - ur_mem_handle_t hMem, ur_device_handle_t, ur_native_handle_t *phNativeMem) { - return getNativeHandle(hMem, phNativeMem); +UR_APIEXPORT ur_result_t UR_APICALL +urMemGetNativeHandle(ur_mem_handle_t hMem, ur_native_handle_t *phNativeMem) { + return getNativeHandle(hMem->get(), phNativeMem); } UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( - ur_native_handle_t hNativeMem, - [[maybe_unused]] ur_context_handle_t hContext, + ur_native_handle_t hNativeMem, ur_context_handle_t hContext, const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem) { - *phMem = reinterpret_cast(hNativeMem); + cl_mem NativeHandle = reinterpret_cast(hNativeMem); + *phMem = new ur_mem_handle_t_(NativeHandle, hContext); if (!pProperties || !pProperties->isNativeHandleOwned) { return urMemRetain(*phMem); } @@ -349,12 +352,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( } UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( - ur_native_handle_t hNativeMem, - [[maybe_unused]] ur_context_handle_t hContext, + ur_native_handle_t hNativeMem, ur_context_handle_t hContext, [[maybe_unused]] const ur_image_format_t *pImageFormat, [[maybe_unused]] const ur_image_desc_t *pImageDesc, const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem) { - *phMem = reinterpret_cast(hNativeMem); + cl_mem NativeHandle = reinterpret_cast(hNativeMem); + *phMem = new ur_mem_handle_t_(NativeHandle, hContext); if (!pProperties || !pProperties->isNativeHandleOwned) { return urMemRetain(*phMem); } @@ -370,17 +373,24 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); const cl_int CLPropName = mapURMemInfoToCL(propName); - size_t CheckPropSize = 0; - auto ClResult = - clGetMemObjectInfo(cl_adapter::cast(hMemory), CLPropName, - propSize, pPropValue, &CheckPropSize); - if (pPropValue && CheckPropSize != propSize) { - return UR_RESULT_ERROR_INVALID_SIZE; + switch (static_cast(propName)) { + case UR_PROGRAM_INFO_CONTEXT: { + return ReturnValue(hMemory->Context); } - CL_RETURN_ON_FAILURE(ClResult); - if (pPropSizeRet) { - *pPropSizeRet = CheckPropSize; + default: { + size_t CheckPropSize = 0; + auto ClResult = clGetMemObjectInfo(hMemory->get(), CLPropName, propSize, + pPropValue, &CheckPropSize); + if (pPropValue && CheckPropSize != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + CL_RETURN_ON_FAILURE(ClResult); + if (pPropSizeRet) { + *pPropSizeRet = CheckPropSize; + } } + } + return UR_RESULT_SUCCESS; } @@ -394,8 +404,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory, const cl_int CLPropName = mapURMemImageInfoToCL(propName); size_t CheckPropSize = 0; - auto ClResult = clGetImageInfo(cl_adapter::cast(hMemory), CLPropName, - propSize, pPropValue, &CheckPropSize); + auto ClResult = clGetImageInfo(hMemory->get(), CLPropName, propSize, + pPropValue, &CheckPropSize); if (pPropValue && CheckPropSize != propSize) { return UR_RESULT_ERROR_INVALID_SIZE; } @@ -407,11 +417,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory, } UR_APIEXPORT ur_result_t UR_APICALL urMemRetain(ur_mem_handle_t hMem) { - CL_RETURN_ON_FAILURE(clRetainMemObject(cl_adapter::cast(hMem))); + CL_RETURN_ON_FAILURE(clRetainMemObject(hMem->get())); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urMemRelease(ur_mem_handle_t hMem) { - CL_RETURN_ON_FAILURE(clReleaseMemObject(cl_adapter::cast(hMem))); + CL_RETURN_ON_FAILURE(clReleaseMemObject(hMem->get())); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/memory.hpp b/source/adapters/opencl/memory.hpp new file mode 100644 index 0000000000..48d5cfd895 --- /dev/null +++ b/source/adapters/opencl/memory.hpp @@ -0,0 +1,27 @@ +//===--------- memory.hpp - OpenCL Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include "common.hpp" + +#include + +struct ur_mem_handle_t_ { + using native_type = cl_mem; + native_type Memory; + ur_context_handle_t Context; + + ur_mem_handle_t_(native_type Mem, ur_context_handle_t Ctx) + : Memory(Mem), Context(Ctx) {} + + ~ur_mem_handle_t_() {} + + native_type get() { return Memory; } +}; diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index 03d1d2a680..d5efdcc273 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -170,17 +170,36 @@ static cl_int mapURProgramInfoToCL(ur_program_info_t URPropName) { UR_APIEXPORT ur_result_t UR_APICALL urProgramGetInfo(ur_program_handle_t hProgram, ur_program_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - size_t CheckPropSize = 0; - auto ClResult = - clGetProgramInfo(hProgram->get(), mapURProgramInfoToCL(propName), - propSize, pPropValue, &CheckPropSize); - if (pPropValue && CheckPropSize != propSize) { - return UR_RESULT_ERROR_INVALID_SIZE; + + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + + const cl_program_info CLPropName = mapURProgramInfoToCL(propName); + + switch (static_cast(propName)) { + case UR_PROGRAM_INFO_CONTEXT: { + return ReturnValue(hProgram->Context); } - CL_RETURN_ON_FAILURE(ClResult); - if (pPropSizeRet) { - *pPropSizeRet = CheckPropSize; + case UR_PROGRAM_INFO_NUM_DEVICES: { + if (!hProgram->Context || !hProgram->Context->DeviceCount) { + return UR_RESULT_ERROR_INVALID_PROGRAM; + } + cl_uint DeviceCount = hProgram->Context->DeviceCount; + return ReturnValue(DeviceCount); } + default: { + size_t CheckPropSize = 0; + auto ClResult = clGetProgramInfo(hProgram->get(), CLPropName, propSize, + pPropValue, &CheckPropSize); + if (pPropValue && CheckPropSize != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + CL_RETURN_ON_FAILURE(ClResult); + if (pPropSizeRet) { + *pPropSizeRet = CheckPropSize; + } + } + } + return UR_RESULT_SUCCESS; } From 6bb6d3786cd8468606e48e26f0893dbd8bf0f213 Mon Sep 17 00:00:00 2001 From: omarahmed1111 Date: Tue, 12 Dec 2023 17:15:00 +0000 Subject: [PATCH 08/19] Add queue and event handlers --- source/adapters/opencl/command_buffer.cpp | 24 +- source/adapters/opencl/context.cpp | 2 +- source/adapters/opencl/enqueue.cpp | 360 +++++++++++++--------- source/adapters/opencl/event.cpp | 47 +-- source/adapters/opencl/event.hpp | 29 ++ source/adapters/opencl/memory.cpp | 2 +- source/adapters/opencl/queue.cpp | 53 ++-- source/adapters/opencl/queue.hpp | 29 ++ source/adapters/opencl/usm.cpp | 160 +++++----- 9 files changed, 421 insertions(+), 285 deletions(-) create mode 100644 source/adapters/opencl/event.hpp create mode 100644 source/adapters/opencl/queue.hpp diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index a1b95fc9be..854d9c655c 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -11,7 +11,9 @@ #include "command_buffer.hpp" #include "common.hpp" #include "context.hpp" +#include "event.hpp" #include "memory.hpp" +#include "queue.hpp" UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, @@ -30,9 +32,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( if (!clCreateCommandBufferKHR || Res != CL_SUCCESS) return UR_RESULT_ERROR_INVALID_OPERATION; - - auto CLCommandBuffer = clCreateCommandBufferKHR( - 1, cl_adapter::cast(&Queue), nullptr, &Res); + cl_command_queue CLQueue = Queue->get(); + auto CLCommandBuffer = clCreateCommandBufferKHR(1, &CLQueue, nullptr, &Res); CL_RETURN_ON_FAILURE_AND_SET_NULL(Res, phCommandBuffer); try { @@ -348,13 +349,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( return UR_RESULT_ERROR_INVALID_OPERATION; const uint32_t NumberOfQueues = 1; - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } + cl_command_queue CLQueue = hQueue->get(); CL_RETURN_ON_FAILURE(clEnqueueCommandBufferKHR( - NumberOfQueues, cl_adapter::cast(&hQueue), - hCommandBuffer->CLCommandBuffer, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + NumberOfQueues, &CLQueue, hCommandBuffer->CLCommandBuffer, + numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/context.cpp b/source/adapters/opencl/context.cpp index b49314ee7c..a066e7b978 100644 --- a/source/adapters/opencl/context.cpp +++ b/source/adapters/opencl/context.cpp @@ -95,7 +95,7 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, return ReturnValue(hContext->DeviceCount); } case UR_CONTEXT_INFO_DEVICES: { - return ReturnValue(hContext->Devices); + return ReturnValue(&hContext->Devices[0], hContext->DeviceCount); } case UR_CONTEXT_INFO_REFERENCE_COUNT: { size_t CheckPropSize = 0; diff --git a/source/adapters/opencl/enqueue.cpp b/source/adapters/opencl/enqueue.cpp index e0a5484f5a..0da75663a5 100644 --- a/source/adapters/opencl/enqueue.cpp +++ b/source/adapters/opencl/enqueue.cpp @@ -9,8 +9,11 @@ //===----------------------------------------------------------------------===// #include "common.hpp" +#include "context.hpp" +#include "event.hpp" #include "memory.hpp" #include "program.hpp" +#include "queue.hpp" cl_map_flags convertURMapFlagsToCL(ur_map_flags_t URFlags) { cl_map_flags CLFlags = 0; @@ -32,38 +35,50 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } CL_RETURN_ON_FAILURE(clEnqueueNDRangeKernel( - cl_adapter::cast(hQueue), - cl_adapter::cast(hKernel), workDim, pGlobalWorkOffset, - pGlobalWorkSize, pLocalWorkSize, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + hQueue->get(), cl_adapter::cast(hKernel), workDim, + pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numEventsInWaitList, + CLWaitEvents.data(), &Event)); + if (phEvent) { + *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + } return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } CL_RETURN_ON_FAILURE(clEnqueueMarkerWithWaitList( - cl_adapter::cast(hQueue), numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + hQueue->get(), numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + } return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } CL_RETURN_ON_FAILURE(clEnqueueBarrierWithWaitList( - cl_adapter::cast(hQueue), numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + hQueue->get(), numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + } return UR_RESULT_SUCCESS; } @@ -71,13 +86,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, size_t offset, size_t size, void *pDst, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } CL_RETURN_ON_FAILURE(clEnqueueReadBuffer( - cl_adapter::cast(hQueue), hBuffer->get(), blockingRead, - offset, size, pDst, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + hQueue->get(), hBuffer->get(), blockingRead, offset, size, pDst, + numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + } return UR_RESULT_SUCCESS; } @@ -85,13 +104,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, size_t offset, size_t size, const void *pSrc, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } CL_RETURN_ON_FAILURE(clEnqueueWriteBuffer( - cl_adapter::cast(hQueue), hBuffer->get(), blockingWrite, - offset, size, pSrc, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + hQueue->get(), hBuffer->get(), blockingWrite, offset, size, pSrc, + numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + } return UR_RESULT_SUCCESS; } @@ -106,14 +129,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( bufferOrigin.z}; const size_t HostOrigin[3] = {hostOrigin.x, hostOrigin.y, hostOrigin.z}; const size_t Region[3] = {region.width, region.height, region.depth}; - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } CL_RETURN_ON_FAILURE(clEnqueueReadBufferRect( - cl_adapter::cast(hQueue), hBuffer->get(), blockingRead, - BufferOrigin, HostOrigin, Region, bufferRowPitch, bufferSlicePitch, - hostRowPitch, hostSlicePitch, pDst, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + hQueue->get(), hBuffer->get(), blockingRead, BufferOrigin, HostOrigin, + Region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, + pDst, numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + } return UR_RESULT_SUCCESS; } @@ -128,14 +155,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( bufferOrigin.z}; const size_t HostOrigin[3] = {hostOrigin.x, hostOrigin.y, hostOrigin.z}; const size_t Region[3] = {region.width, region.height, region.depth}; - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } CL_RETURN_ON_FAILURE(clEnqueueWriteBufferRect( - cl_adapter::cast(hQueue), hBuffer->get(), blockingWrite, - BufferOrigin, HostOrigin, Region, bufferRowPitch, bufferSlicePitch, - hostRowPitch, hostSlicePitch, pSrc, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + hQueue->get(), hBuffer->get(), blockingWrite, BufferOrigin, HostOrigin, + Region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, + pSrc, numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + } return UR_RESULT_SUCCESS; } @@ -144,13 +175,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( ur_mem_handle_t hBufferDst, size_t srcOffset, size_t dstOffset, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } CL_RETURN_ON_FAILURE(clEnqueueCopyBuffer( - cl_adapter::cast(hQueue), hBufferSrc->get(), - hBufferDst->get(), srcOffset, dstOffset, size, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + hQueue->get(), hBufferSrc->get(), hBufferDst->get(), srcOffset, dstOffset, + size, numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + } return UR_RESULT_SUCCESS; } @@ -164,14 +199,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( const size_t SrcOrigin[3] = {srcOrigin.x, srcOrigin.y, srcOrigin.z}; const size_t DstOrigin[3] = {dstOrigin.x, dstOrigin.y, dstOrigin.z}; const size_t Region[3] = {region.width, region.height, region.depth}; - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } CL_RETURN_ON_FAILURE(clEnqueueCopyBufferRect( - cl_adapter::cast(hQueue), hBufferSrc->get(), - hBufferDst->get(), SrcOrigin, DstOrigin, Region, srcRowPitch, - srcSlicePitch, dstRowPitch, dstSlicePitch, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + hQueue->get(), hBufferSrc->get(), hBufferDst->get(), SrcOrigin, DstOrigin, + Region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, + numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + } return UR_RESULT_SUCCESS; } @@ -183,11 +222,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( // CL FillBuffer only allows pattern sizes up to the largest CL type: // long16/double16 if (patternSize <= 128) { + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } CL_RETURN_ON_FAILURE(clEnqueueFillBuffer( - cl_adapter::cast(hQueue), hBuffer->get(), pPattern, - patternSize, offset, size, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + hQueue->get(), hBuffer->get(), pPattern, patternSize, offset, size, + numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + } return UR_RESULT_SUCCESS; } @@ -199,10 +244,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( } cl_event WriteEvent = nullptr; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } auto ClErr = clEnqueueWriteBuffer( - cl_adapter::cast(hQueue), hBuffer->get(), false, offset, - size, HostBuffer, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), &WriteEvent); + hQueue->get(), hBuffer->get(), false, offset, size, HostBuffer, + numEventsInWaitList, CLWaitEvents.data(), &WriteEvent); if (ClErr != CL_SUCCESS) { delete[] HostBuffer; CL_RETURN_ON_FAILURE(ClErr); @@ -223,7 +271,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( } if (phEvent) { - *phEvent = cl_adapter::cast(WriteEvent); + *phEvent = new ur_event_handle_t_(WriteEvent, hQueue->Context, hQueue); } else { CL_RETURN_ON_FAILURE(clReleaseEvent(WriteEvent)); } @@ -238,13 +286,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { const size_t Origin[3] = {origin.x, origin.y, origin.z}; const size_t Region[3] = {region.width, region.height, region.depth}; - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } CL_RETURN_ON_FAILURE(clEnqueueReadImage( - cl_adapter::cast(hQueue), hImage->get(), blockingRead, - Origin, Region, rowPitch, slicePitch, pDst, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + hQueue->get(), hImage->get(), blockingRead, Origin, Region, rowPitch, + slicePitch, pDst, numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + } return UR_RESULT_SUCCESS; } @@ -255,13 +307,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { const size_t Origin[3] = {origin.x, origin.y, origin.z}; const size_t Region[3] = {region.width, region.height, region.depth}; - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } CL_RETURN_ON_FAILURE(clEnqueueWriteImage( - cl_adapter::cast(hQueue), hImage->get(), blockingWrite, - Origin, Region, rowPitch, slicePitch, pSrc, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + hQueue->get(), hImage->get(), blockingWrite, Origin, Region, rowPitch, + slicePitch, pSrc, numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + } return UR_RESULT_SUCCESS; } @@ -274,13 +330,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( const size_t SrcOrigin[3] = {srcOrigin.x, srcOrigin.y, srcOrigin.z}; const size_t DstOrigin[3] = {dstOrigin.x, dstOrigin.y, dstOrigin.z}; const size_t Region[3] = {region.width, region.height, region.depth}; - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } CL_RETURN_ON_FAILURE(clEnqueueCopyImage( - cl_adapter::cast(hQueue), hImageSrc->get(), - hImageDst->get(), SrcOrigin, DstOrigin, Region, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + hQueue->get(), hImageSrc->get(), hImageDst->get(), SrcOrigin, DstOrigin, + Region, numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + } return UR_RESULT_SUCCESS; } @@ -289,14 +349,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( ur_map_flags_t mapFlags, size_t offset, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, void **ppRetMap) { - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } cl_int Err; - *ppRetMap = clEnqueueMapBuffer( - cl_adapter::cast(hQueue), hBuffer->get(), blockingMap, - convertURMapFlagsToCL(mapFlags), offset, size, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent), &Err); - + *ppRetMap = clEnqueueMapBuffer(hQueue->get(), hBuffer->get(), blockingMap, + convertURMapFlagsToCL(mapFlags), offset, size, + numEventsInWaitList, CLWaitEvents.data(), + &Event, &Err); + if (phEvent) { + *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + } return mapCLErrorToUR(Err); } @@ -304,12 +369,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( ur_queue_handle_t hQueue, ur_mem_handle_t hMem, void *pMappedPtr, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - - CL_RETURN_ON_FAILURE(clEnqueueUnmapMemObject( - cl_adapter::cast(hQueue), hMem->get(), pMappedPtr, - numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } + CL_RETURN_ON_FAILURE(clEnqueueUnmapMemObject(hQueue->get(), hMem->get(), + pMappedPtr, numEventsInWaitList, + CLWaitEvents.data(), &Event)); + if (phEvent) { + *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + } return UR_RESULT_SUCCESS; } @@ -319,27 +389,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - cl_context Ctx = nullptr; - cl_int Res = - clGetCommandQueueInfo(cl_adapter::cast(hQueue), - CL_QUEUE_CONTEXT, sizeof(Ctx), &Ctx, nullptr); - - if (Res != CL_SUCCESS) - return mapCLErrorToUR(Res); + cl_context Ctx = hQueue->Context->get(); cl_ext::clEnqueueWriteGlobalVariable_fn F = nullptr; - Res = cl_ext::getExtFuncFromContext( + cl_int Res = cl_ext::getExtFuncFromContext( Ctx, cl_ext::ExtFuncPtrCache->clEnqueueWriteGlobalVariableCache, cl_ext::EnqueueWriteGlobalVariableName, &F); if (!F || Res != CL_SUCCESS) return UR_RESULT_ERROR_INVALID_OPERATION; - - Res = F(cl_adapter::cast(hQueue), hProgram->get(), name, - blockingWrite, count, offset, pSrc, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent)); - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } + Res = F(hQueue->get(), hProgram->get(), name, blockingWrite, count, offset, + pSrc, numEventsInWaitList, CLWaitEvents.data(), &Event); + if (phEvent) { + *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + } return mapCLErrorToUR(Res); } @@ -349,27 +417,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - cl_context Ctx = nullptr; - cl_int Res = - clGetCommandQueueInfo(cl_adapter::cast(hQueue), - CL_QUEUE_CONTEXT, sizeof(Ctx), &Ctx, nullptr); - - if (Res != CL_SUCCESS) - return mapCLErrorToUR(Res); + cl_context Ctx = hQueue->Context->get(); cl_ext::clEnqueueReadGlobalVariable_fn F = nullptr; - Res = cl_ext::getExtFuncFromContext( + cl_int Res = cl_ext::getExtFuncFromContext( Ctx, cl_ext::ExtFuncPtrCache->clEnqueueReadGlobalVariableCache, cl_ext::EnqueueReadGlobalVariableName, &F); if (!F || Res != CL_SUCCESS) return UR_RESULT_ERROR_INVALID_OPERATION; - - Res = F(cl_adapter::cast(hQueue), hProgram->get(), name, - blockingRead, count, offset, pDst, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent)); - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } + Res = F(hQueue->get(), hProgram->get(), name, blockingRead, count, offset, + pDst, numEventsInWaitList, CLWaitEvents.data(), &Event); + if (phEvent) { + *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + } return mapCLErrorToUR(Res); } @@ -379,13 +445,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - cl_context CLContext; - cl_int CLErr = clGetCommandQueueInfo( - cl_adapter::cast(hQueue), CL_QUEUE_CONTEXT, - sizeof(cl_context), &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return mapCLErrorToUR(CLErr); - } + cl_context CLContext = hQueue->Context->get(); cl_ext::clEnqueueReadHostPipeINTEL_fn FuncPtr = nullptr; ur_result_t RetVal = @@ -394,11 +454,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( cl_ext::EnqueueReadHostPipeName, &FuncPtr); if (FuncPtr) { - RetVal = mapCLErrorToUR( - FuncPtr(cl_adapter::cast(hQueue), hProgram->get(), - pipe_symbol, blocking, pDst, size, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } + RetVal = mapCLErrorToUR(FuncPtr(hQueue->get(), hProgram->get(), pipe_symbol, + blocking, pDst, size, numEventsInWaitList, + CLWaitEvents.data(), &Event)); + if (phEvent) { + *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + } } return RetVal; @@ -410,13 +476,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - cl_context CLContext; - cl_int CLErr = clGetCommandQueueInfo( - cl_adapter::cast(hQueue), CL_QUEUE_CONTEXT, - sizeof(cl_context), &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return mapCLErrorToUR(CLErr); - } + cl_context CLContext = hQueue->Context->get(); cl_ext::clEnqueueWriteHostPipeINTEL_fn FuncPtr = nullptr; ur_result_t RetVal = @@ -425,11 +485,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( cl_ext::EnqueueWriteHostPipeName, &FuncPtr); if (FuncPtr) { - RetVal = mapCLErrorToUR( - FuncPtr(cl_adapter::cast(hQueue), hProgram->get(), - pipe_symbol, blocking, pSrc, size, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } + RetVal = mapCLErrorToUR(FuncPtr(hQueue->get(), hProgram->get(), pipe_symbol, + blocking, pSrc, size, numEventsInWaitList, + CLWaitEvents.data(), &Event)); + if (phEvent) { + *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + } } return RetVal; diff --git a/source/adapters/opencl/event.cpp b/source/adapters/opencl/event.cpp index d180cfb097..5e1d2fabe4 100644 --- a/source/adapters/opencl/event.cpp +++ b/source/adapters/opencl/event.cpp @@ -8,6 +8,7 @@ // //===----------------------------------------------------------------------===// +#include "event.hpp" #include "common.hpp" #include @@ -109,12 +110,12 @@ ur_command_t convertCLCommandTypeToUR(const cl_command_type &CommandType) { } } -UR_APIEXPORT ur_result_t UR_APICALL -urEventCreateWithNativeHandle(ur_native_handle_t hNativeEvent, - [[maybe_unused]] ur_context_handle_t hContext, - const ur_event_native_properties_t *pProperties, - ur_event_handle_t *phEvent) { - *phEvent = reinterpret_cast(hNativeEvent); +UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( + ur_native_handle_t hNativeEvent, ur_context_handle_t hContext, + const ur_event_native_properties_t *pProperties, + ur_event_handle_t *phEvent) { + cl_event NativeHandle = reinterpret_cast(hNativeEvent); + *phEvent = new ur_event_handle_t_(NativeHandle, hContext, nullptr); if (!pProperties || !pProperties->isNativeHandleOwned) { return urEventRetain(*phEvent); } @@ -123,25 +124,28 @@ urEventCreateWithNativeHandle(ur_native_handle_t hNativeEvent, UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle( ur_event_handle_t hEvent, ur_native_handle_t *phNativeEvent) { - return getNativeHandle(hEvent, phNativeEvent); + return getNativeHandle(hEvent->get(), phNativeEvent); } UR_APIEXPORT ur_result_t UR_APICALL urEventRelease(ur_event_handle_t hEvent) { - cl_int RetErr = clReleaseEvent(cl_adapter::cast(hEvent)); + cl_int RetErr = clReleaseEvent(hEvent->get()); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEventRetain(ur_event_handle_t hEvent) { - cl_int RetErr = clRetainEvent(cl_adapter::cast(hEvent)); + cl_int RetErr = clRetainEvent(hEvent->get()); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEventWait(uint32_t numEvents, const ur_event_handle_t *phEventWaitList) { - cl_int RetErr = clWaitForEvents( - numEvents, cl_adapter::cast(phEventWaitList)); + std::vector CLEvents(numEvents); + for (uint32_t i = 0; i < numEvents; i++) { + CLEvents[i] = phEventWaitList[i]->get(); + } + cl_int RetErr = clWaitForEvents(numEvents, CLEvents.data()); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } @@ -152,11 +156,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent, void *pPropValue, size_t *pPropSizeRet) { cl_event_info CLEventInfo = convertUREventInfoToCL(propName); + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + if (CLEventInfo == CL_EVENT_CONTEXT) { + return ReturnValue(hEvent->Context); + } + if (CLEventInfo == CL_EVENT_COMMAND_QUEUE) { + return ReturnValue(hEvent->Queue); + } size_t CheckPropSize = 0; - cl_int RetErr = - clGetEventInfo(cl_adapter::cast(hEvent), CLEventInfo, propSize, - pPropValue, &CheckPropSize); + cl_int RetErr = clGetEventInfo(hEvent->get(), CLEventInfo, propSize, + pPropValue, &CheckPropSize); if (pPropValue && CheckPropSize != propSize) { return UR_RESULT_ERROR_INVALID_SIZE; } @@ -192,9 +202,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( ur_event_handle_t hEvent, ur_profiling_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { cl_profiling_info CLProfilingInfo = convertURProfilingInfoToCL(propName); - cl_int RetErr = clGetEventProfilingInfo(cl_adapter::cast(hEvent), - CLProfilingInfo, propSize, pPropValue, - pPropSizeRet); + cl_int RetErr = clGetEventProfilingInfo(hEvent->get(), CLProfilingInfo, + propSize, pPropValue, pPropSizeRet); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } @@ -259,7 +268,7 @@ urEventSetCallback(ur_event_handle_t hEvent, ur_execution_info_t execStatus, auto *C = static_cast(pUserData); C->execute(); }; - CL_RETURN_ON_FAILURE(clSetEventCallback(cl_adapter::cast(hEvent), - CallbackType, ClCallback, Callback)); + CL_RETURN_ON_FAILURE( + clSetEventCallback(hEvent->get(), CallbackType, ClCallback, Callback)); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/event.hpp b/source/adapters/opencl/event.hpp new file mode 100644 index 0000000000..7574e218a2 --- /dev/null +++ b/source/adapters/opencl/event.hpp @@ -0,0 +1,29 @@ +//===--------- queue.hpp - OpenCL Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include "common.hpp" + +#include + +struct ur_event_handle_t_ { + using native_type = cl_event; + native_type Event; + ur_context_handle_t Context; + ur_queue_handle_t Queue; + + ur_event_handle_t_(native_type Event, ur_context_handle_t Ctx, + ur_queue_handle_t Queue) + : Event(Event), Context(Ctx), Queue(Queue) {} + + ~ur_event_handle_t_() {} + + native_type get() { return Event; } +}; diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp index 871626c50e..68bfa0761d 100644 --- a/source/adapters/opencl/memory.cpp +++ b/source/adapters/opencl/memory.cpp @@ -374,7 +374,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, const cl_int CLPropName = mapURMemInfoToCL(propName); switch (static_cast(propName)) { - case UR_PROGRAM_INFO_CONTEXT: { + case UR_MEM_INFO_CONTEXT: { return ReturnValue(hMemory->Context); } default: { diff --git a/source/adapters/opencl/queue.cpp b/source/adapters/opencl/queue.cpp index 60218fd01f..41ff096acc 100644 --- a/source/adapters/opencl/queue.cpp +++ b/source/adapters/opencl/queue.cpp @@ -6,6 +6,7 @@ // //===-----------------------------------------------------------------===// +#include "queue.hpp" #include "common.hpp" #include "context.hpp" #include "device.hpp" @@ -73,11 +74,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_queue_properties_t *pProperties, ur_queue_handle_t *phQueue) { - cl_platform_id CurPlatform; - CL_RETURN_ON_FAILURE_AND_SET_NULL( - clGetDeviceInfo(hDevice->get(), CL_DEVICE_PLATFORM, - sizeof(cl_platform_id), &CurPlatform, nullptr), - phQueue); + cl_platform_id CurPlatform = hDevice->Platform->get(); cl_command_queue_properties CLProperties = pProperties ? convertURQueuePropertiesToCL(pProperties) : 0; @@ -94,20 +91,21 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( cl_int RetErr = CL_INVALID_OPERATION; if (Version < oclv::V2_0) { - *phQueue = cl_adapter::cast( + cl_command_queue Queue = clCreateCommandQueue(hContext->get(), hDevice->get(), - CLProperties & SupportByOpenCL, &RetErr)); + CLProperties & SupportByOpenCL, &RetErr); CL_RETURN_ON_FAILURE(RetErr); + *phQueue = new ur_queue_handle_t_(Queue, hContext, hDevice); return UR_RESULT_SUCCESS; } /* TODO: Add support for CL_QUEUE_PRIORITY_KHR */ cl_queue_properties CreationFlagProperties[] = { CL_QUEUE_PROPERTIES, CLProperties & SupportByOpenCL, 0}; - *phQueue = - cl_adapter::cast(clCreateCommandQueueWithProperties( - hContext->get(), hDevice->get(), CreationFlagProperties, &RetErr)); + cl_command_queue Queue = clCreateCommandQueueWithProperties( + hContext->get(), hDevice->get(), CreationFlagProperties, &RetErr); CL_RETURN_ON_FAILURE(RetErr); + *phQueue = new ur_queue_handle_t_(Queue, hContext, hDevice); return UR_RESULT_SUCCESS; } @@ -121,23 +119,27 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } cl_command_queue_info CLCommandQueueInfo = mapURQueueInfoToCL(propName); + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + if (propName == UR_QUEUE_INFO_CONTEXT) { + return ReturnValue(hQueue->Context); + } + if (propName == UR_QUEUE_INFO_DEVICE) { + return ReturnValue(hQueue->Device); + } // Unfortunately the size of cl_bitfield (unsigned long) doesn't line up with // our enums (forced to be sizeof(uint32_t)) so this needs special handling. if (propName == UR_QUEUE_INFO_FLAGS) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); - cl_command_queue_properties QueueProperties = 0; CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( - cl_adapter::cast(hQueue), CLCommandQueueInfo, - sizeof(QueueProperties), &QueueProperties, nullptr)); + hQueue->get(), CLCommandQueueInfo, sizeof(QueueProperties), + &QueueProperties, nullptr)); return ReturnValue(mapCLQueuePropsToUR(QueueProperties)); } else { size_t CheckPropSize = 0; - cl_int RetErr = clGetCommandQueueInfo( - cl_adapter::cast(hQueue), CLCommandQueueInfo, - propSize, pPropValue, &CheckPropSize); + cl_int RetErr = clGetCommandQueueInfo(hQueue->get(), CLCommandQueueInfo, + propSize, pPropValue, &CheckPropSize); if (pPropValue && CheckPropSize != propSize) { return UR_RESULT_ERROR_INVALID_SIZE; } @@ -153,7 +155,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, UR_APIEXPORT ur_result_t UR_APICALL urQueueGetNativeHandle(ur_queue_handle_t hQueue, ur_queue_native_desc_t *, ur_native_handle_t *phNativeQueue) { - return getNativeHandle(hQueue, phNativeQueue); + return getNativeHandle(hQueue->get(), phNativeQueue); } UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( @@ -163,7 +165,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( [[maybe_unused]] const ur_queue_native_properties_t *pProperties, ur_queue_handle_t *phQueue) { - *phQueue = reinterpret_cast(hNativeQueue); + cl_command_queue NativeHandle = + reinterpret_cast(hNativeQueue); + *phQueue = new ur_queue_handle_t_(NativeHandle, hContext, hDevice); + cl_int RetErr = clRetainCommandQueue(cl_adapter::cast(hNativeQueue)); CL_RETURN_ON_FAILURE(RetErr); @@ -171,27 +176,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( } UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish(ur_queue_handle_t hQueue) { - cl_int RetErr = clFinish(cl_adapter::cast(hQueue)); + cl_int RetErr = clFinish(hQueue->get()); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urQueueFlush(ur_queue_handle_t hQueue) { - cl_int RetErr = clFinish(cl_adapter::cast(hQueue)); + cl_int RetErr = clFinish(hQueue->get()); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urQueueRetain(ur_queue_handle_t hQueue) { - cl_int RetErr = - clRetainCommandQueue(cl_adapter::cast(hQueue)); + cl_int RetErr = clRetainCommandQueue(hQueue->get()); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(ur_queue_handle_t hQueue) { - cl_int RetErr = - clReleaseCommandQueue(cl_adapter::cast(hQueue)); + cl_int RetErr = clReleaseCommandQueue(hQueue->get()); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/queue.hpp b/source/adapters/opencl/queue.hpp new file mode 100644 index 0000000000..cdeb14af37 --- /dev/null +++ b/source/adapters/opencl/queue.hpp @@ -0,0 +1,29 @@ +//===--------- queue.hpp - OpenCL Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include "common.hpp" + +#include + +struct ur_queue_handle_t_ { + using native_type = cl_command_queue; + native_type Queue; + ur_context_handle_t Context; + ur_device_handle_t Device; + + ur_queue_handle_t_(native_type Queue, ur_context_handle_t Ctx, + ur_device_handle_t Dev) + : Queue(Queue), Context(Ctx), Device(Dev) {} + + ~ur_queue_handle_t_() {} + + native_type get() { return Queue; } +}; diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index 9a1a5b9d2b..684004e126 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -11,6 +11,8 @@ #include "common.hpp" #include "context.hpp" #include "device.hpp" +#include "event.hpp" +#include "queue.hpp" inline cl_mem_alloc_flags_intel hostDescToClFlags(const ur_usm_host_desc_t &desc) { @@ -82,7 +84,7 @@ usmDescToCLMemProperties(const ur_base_desc_t *Desc, } UR_APIEXPORT ur_result_t UR_APICALL -urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, +urUSMHostAlloc(ur_context_handle_t Context, const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t, size_t size, void **ppMem) { void *Ptr = nullptr; @@ -96,7 +98,7 @@ urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, // First we need to look up the function pointer clHostMemAllocINTEL_fn FuncPtr = nullptr; - cl_context CLContext = hContext->get(); + cl_context CLContext = Context->get(); if (auto UrResult = cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache, cl_ext::HostMemAllocName, &FuncPtr)) { @@ -124,7 +126,7 @@ urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, } UR_APIEXPORT ur_result_t UR_APICALL -urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, +urUSMDeviceAlloc(ur_context_handle_t Context, ur_device_handle_t hDevice, const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t, size_t size, void **ppMem) { @@ -139,7 +141,7 @@ urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, // First we need to look up the function pointer clDeviceMemAllocINTEL_fn FuncPtr = nullptr; - cl_context CLContext = hContext->get(); + cl_context CLContext = Context->get(); if (auto UrResult = cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clDeviceMemAllocINTELCache, cl_ext::DeviceMemAllocName, &FuncPtr)) { @@ -167,7 +169,7 @@ urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, } UR_APIEXPORT ur_result_t UR_APICALL -urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, +urUSMSharedAlloc(ur_context_handle_t Context, ur_device_handle_t hDevice, const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t, size_t size, void **ppMem) { @@ -182,7 +184,7 @@ urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, // First we need to look up the function pointer clSharedMemAllocINTEL_fn FuncPtr = nullptr; - cl_context CLContext = hContext->get(); + cl_context CLContext = Context->get(); if (auto UrResult = cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clSharedMemAllocINTELCache, cl_ext::SharedMemAllocName, &FuncPtr)) { @@ -208,14 +210,14 @@ urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMFree(ur_context_handle_t hContext, +UR_APIEXPORT ur_result_t UR_APICALL urUSMFree(ur_context_handle_t Context, void *pMem) { // Use a blocking free to avoid issues with indirect access from kernels that // might be still running. clMemBlockingFreeINTEL_fn FuncPtr = nullptr; - cl_context CLContext = hContext->get(); + cl_context CLContext = Context->get(); ur_result_t RetVal = UR_RESULT_ERROR_INVALID_OPERATION; RetVal = cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clMemBlockingFreeINTELCache, @@ -233,13 +235,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( const void *pPattern, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { // Have to look up the context from the kernel - cl_context CLContext; - cl_int CLErr = clGetCommandQueueInfo( - cl_adapter::cast(hQueue), CL_QUEUE_CONTEXT, - sizeof(cl_context), &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return mapCLErrorToUR(CLErr); - } + cl_context CLContext = hQueue->Context->get(); if (patternSize <= 128) { clEnqueueMemFillINTEL_fn EnqueueMemFill = nullptr; @@ -247,12 +243,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clEnqueueMemFillINTELCache, cl_ext::EnqueueMemFillName, &EnqueueMemFill)); - - CL_RETURN_ON_FAILURE( - EnqueueMemFill(cl_adapter::cast(hQueue), ptr, - pPattern, patternSize, size, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } + CL_RETURN_ON_FAILURE(EnqueueMemFill(hQueue->get(), ptr, pPattern, + patternSize, size, numEventsInWaitList, + CLWaitEvents.data(), &Event)); + if (phEvent) { + *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + } return UR_RESULT_SUCCESS; } @@ -287,9 +288,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( cl_event CopyEvent = nullptr; CL_RETURN_ON_FAILURE(USMMemcpy( - cl_adapter::cast(hQueue), false, ptr, HostBuffer, size, - numEventsInWaitList, cl_adapter::cast(phEventWaitList), - &CopyEvent)); + hQueue->get(), false, ptr, HostBuffer, size, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), &CopyEvent)); struct DeleteCallbackInfo { DeleteCallbackInfo(clMemBlockingFreeINTEL_fn USMFree, cl_context CLContext, @@ -326,7 +326,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( CL_RETURN_ON_FAILURE(ClErr); } if (phEvent) { - *phEvent = cl_adapter::cast(CopyEvent); + *phEvent = new ur_event_handle_t_(CopyEvent, hQueue->Context, hQueue); } else { CL_RETURN_ON_FAILURE(clReleaseEvent(CopyEvent)); } @@ -340,13 +340,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { // Have to look up the context from the kernel - cl_context CLContext; - cl_int CLErr = clGetCommandQueueInfo( - cl_adapter::cast(hQueue), CL_QUEUE_CONTEXT, - sizeof(cl_context), &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return mapCLErrorToUR(CLErr); - } + cl_context CLContext = hQueue->Context->get(); clEnqueueMemcpyINTEL_fn FuncPtr = nullptr; ur_result_t RetVal = cl_ext::getExtFuncFromContext( @@ -354,11 +348,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( cl_ext::EnqueueMemcpyName, &FuncPtr); if (FuncPtr) { - RetVal = mapCLErrorToUR( - FuncPtr(cl_adapter::cast(hQueue), blocking, pDst, - pSrc, size, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } + RetVal = mapCLErrorToUR(FuncPtr(hQueue->get(), blocking, pDst, pSrc, size, + numEventsInWaitList, CLWaitEvents.data(), + &Event)); + if (phEvent) { + *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + } } return RetVal; @@ -370,23 +370,21 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( [[maybe_unused]] ur_usm_migration_flags_t flags, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - - return mapCLErrorToUR(clEnqueueMarkerWithWaitList( - cl_adapter::cast(hQueue), numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } + CL_RETURN_ON_FAILURE(clEnqueueMarkerWithWaitList( + hQueue->get(), numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + } + return UR_RESULT_SUCCESS; /* // Use this once impls support it. // Have to look up the context from the kernel - cl_context CLContext; - cl_int CLErr = - clGetCommandQueueInfo(cl_adapter::cast(hQueue), - CL_QUEUE_CONTEXT, sizeof(cl_context), - &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return map_cl_error_to_ur(CLErr); - } + cl_context CLContext = hQueue->Context; clEnqueueMigrateMemINTEL_fn FuncPtr; ur_result_t Err = cl_ext::getExtFuncFromContext( @@ -397,7 +395,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( RetVal = Err; } else { RetVal = map_cl_error_to_ur( - FuncPtr(cl_adapter::cast(hQueue), pMem, size, flags, + FuncPtr(hQueue->get(), pMem, size, flags, numEventsInWaitList, reinterpret_cast(phEventWaitList), reinterpret_cast(phEvent))); @@ -409,23 +407,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMAdvise( ur_queue_handle_t hQueue, [[maybe_unused]] const void *pMem, [[maybe_unused]] size_t size, [[maybe_unused]] ur_usm_advice_flags_t advice, ur_event_handle_t *phEvent) { - - return mapCLErrorToUR(clEnqueueMarkerWithWaitList( - cl_adapter::cast(hQueue), 0, nullptr, - reinterpret_cast(phEvent))); - + cl_event Event; + CL_RETURN_ON_FAILURE( + clEnqueueMarkerWithWaitList(hQueue->get(), 0, nullptr, &Event)); + if (phEvent) { + *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + } + return UR_RESULT_SUCCESS; /* // Change to use this once drivers support it. // Have to look up the context from the kernel - cl_context CLContext; - cl_int CLErr = - clGetCommandQueueInfo(cl_adapter::cast(hQueue), - CL_QUEUE_CONTEXT, - sizeof(cl_context), - &CLContext, nullptr); - if (CLErr != CL_SUCCESS) { - return map_cl_error_to_ur(CLErr); - } + cl_context CLContext = hQueue->Context; clEnqueueMemAdviseINTEL_fn FuncPtr; ur_result_t Err = @@ -437,7 +429,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMAdvise( RetVal = Err; } else { RetVal = - map_cl_error_to_ur(FuncPtr(cl_adapter::cast(hQueue), pMem, + map_cl_error_to_ur(FuncPtr(hQueue->get(), pMem, size, advice, 0, nullptr, reinterpret_cast(phEvent))); } */ @@ -459,10 +451,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( const void *pSrc, size_t srcPitch, size_t width, size_t height, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - cl_context CLContext; - CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( - cl_adapter::cast(hQueue), CL_QUEUE_CONTEXT, - sizeof(cl_context), &CLContext, nullptr)); + cl_context CLContext = hQueue->Context->get(); clEnqueueMemcpyINTEL_fn FuncPtr = nullptr; ur_result_t RetVal = cl_ext::getExtFuncFromContext( @@ -476,12 +465,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( std::vector Events(height); for (size_t HeightIndex = 0; HeightIndex < height; HeightIndex++) { cl_event Event = nullptr; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } auto ClResult = - FuncPtr(cl_adapter::cast(hQueue), false, + FuncPtr(hQueue->get(), false, static_cast(pDst) + dstPitch * HeightIndex, static_cast(pSrc) + srcPitch * HeightIndex, - width, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), &Event); + width, numEventsInWaitList, CLWaitEvents.data(), &Event); Events[HeightIndex] = Event; if (ClResult != CL_SUCCESS) { for (const auto &E : Events) { @@ -495,9 +487,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( ClResult = clWaitForEvents(Events.size(), Events.data()); } if (phEvent && ClResult == CL_SUCCESS) { - ClResult = clEnqueueBarrierWithWaitList( - cl_adapter::cast(hQueue), Events.size(), - Events.data(), cl_adapter::cast(phEvent)); + cl_event Event; + ClResult = clEnqueueBarrierWithWaitList(hQueue->get(), Events.size(), + Events.data(), &Event); + if (phEvent) { + *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + } } for (const auto &E : Events) { CL_RETURN_ON_FAILURE(clReleaseEvent(E)); @@ -521,13 +516,12 @@ mapCLUSMTypeToUR(const cl_unified_shared_memory_type_intel &Type) { } } -UR_APIEXPORT ur_result_t UR_APICALL -urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, - ur_usm_alloc_info_t propName, size_t propSize, - void *pPropValue, size_t *pPropSizeRet) { +UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo( + ur_context_handle_t Context, const void *pMem, ur_usm_alloc_info_t propName, + size_t propSize, void *pPropValue, size_t *pPropSizeRet) { clGetMemAllocInfoINTEL_fn GetMemAllocInfo = nullptr; - cl_context CLContext = hContext->get(); + cl_context CLContext = Context->get(); UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clGetMemAllocInfoINTELCache, cl_ext::GetMemAllocInfoName, &GetMemAllocInfo)); @@ -551,7 +545,7 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, } size_t CheckPropSize = 0; - cl_int ClErr = GetMemAllocInfo(hContext->get(), pMem, PropNameCL, propSize, + cl_int ClErr = GetMemAllocInfo(Context->get(), pMem, PropNameCL, propSize, pPropValue, &CheckPropSize); if (pPropValue && CheckPropSize != propSize) { return UR_RESULT_ERROR_INVALID_SIZE; From f96109b97bbd24422af525a024fd69070e98010f Mon Sep 17 00:00:00 2001 From: omarahmed1111 Date: Wed, 13 Dec 2023 14:40:06 +0000 Subject: [PATCH 09/19] Add sampler handle --- source/adapters/opencl/kernel.cpp | 16 ++++++++----- source/adapters/opencl/program.cpp | 2 +- source/adapters/opencl/sampler.cpp | 36 +++++++++++++++--------------- source/adapters/opencl/sampler.hpp | 27 ++++++++++++++++++++++ 4 files changed, 56 insertions(+), 25 deletions(-) create mode 100644 source/adapters/opencl/sampler.hpp diff --git a/source/adapters/opencl/kernel.cpp b/source/adapters/opencl/kernel.cpp index a589ffd70f..8c7434b1ad 100644 --- a/source/adapters/opencl/kernel.cpp +++ b/source/adapters/opencl/kernel.cpp @@ -9,7 +9,9 @@ //===----------------------------------------------------------------------===// #include "common.hpp" #include "device.hpp" +#include "memory.hpp" #include "program.hpp" +#include "sampler.hpp" #include #include @@ -391,9 +393,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( ur_kernel_handle_t hKernel, uint32_t argIndex, const ur_kernel_arg_mem_obj_properties_t *, ur_mem_handle_t hArgValue) { - cl_int RetErr = clSetKernelArg( - cl_adapter::cast(hKernel), cl_adapter::cast(argIndex), - sizeof(hArgValue), cl_adapter::cast(&hArgValue)); + cl_mem CLArgValue = hArgValue->get(); + cl_int RetErr = clSetKernelArg(cl_adapter::cast(hKernel), + cl_adapter::cast(argIndex), + sizeof(hArgValue), &CLArgValue); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } @@ -402,9 +405,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgSampler( ur_kernel_handle_t hKernel, uint32_t argIndex, const ur_kernel_arg_sampler_properties_t *, ur_sampler_handle_t hArgValue) { - cl_int RetErr = clSetKernelArg( - cl_adapter::cast(hKernel), cl_adapter::cast(argIndex), - sizeof(hArgValue), cl_adapter::cast(&hArgValue)); + cl_sampler CLArgSampler = hArgValue->get(); + cl_int RetErr = clSetKernelArg(cl_adapter::cast(hKernel), + cl_adapter::cast(argIndex), + sizeof(hArgValue), &CLArgSampler); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index d5efdcc273..bd1cf9fca6 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -229,8 +229,8 @@ urProgramLink(ur_context_handle_t hContext, uint32_t count, cl_program Program = clLinkProgram( hContext->get(), 0, nullptr, pOptions, cl_adapter::cast(count), CLPrograms.data(), nullptr, nullptr, &CLResult); - *phProgram = new ur_program_handle_t_(Program, hContext); CL_RETURN_ON_FAILURE(CLResult); + *phProgram = new ur_program_handle_t_(Program, hContext); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/sampler.cpp b/source/adapters/opencl/sampler.cpp index 20920541fc..688f3b1354 100644 --- a/source/adapters/opencl/sampler.cpp +++ b/source/adapters/opencl/sampler.cpp @@ -8,6 +8,7 @@ // //===----------------------------------------------------------------------===// +#include "sampler.hpp" #include "common.hpp" #include "context.hpp" @@ -144,10 +145,10 @@ ur_result_t urSamplerCreate(ur_context_handle_t hContext, cl_filter_mode FilterMode = ur2CLFilterMode(pDesc->filterMode); // Always call OpenCL 1.0 API - *phSampler = cl_adapter::cast(clCreateSampler( + cl_sampler Sampler = clCreateSampler( hContext->get(), static_cast(pDesc->normalizedCoords), - AddressingMode, FilterMode, cl_adapter::cast(&ErrorCode))); - + AddressingMode, FilterMode, cl_adapter::cast(&ErrorCode)); + *phSampler = new ur_sampler_handle_t_(Sampler, hContext); return mapCLErrorToUR(ErrorCode); } @@ -158,10 +159,13 @@ urSamplerGetInfo(ur_sampler_handle_t hSampler, ur_sampler_info_t propName, static_assert(sizeof(cl_addressing_mode) == sizeof(ur_sampler_addressing_mode_t)); + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + if (SamplerInfo == CL_SAMPLER_CONTEXT) { + return ReturnValue(hSampler->Context); + } size_t CheckPropSize = 0; - ur_result_t Err = mapCLErrorToUR( - clGetSamplerInfo(cl_adapter::cast(hSampler), SamplerInfo, - propSize, pPropValue, &CheckPropSize)); + ur_result_t Err = mapCLErrorToUR(clGetSamplerInfo( + hSampler->get(), SamplerInfo, propSize, pPropValue, &CheckPropSize)); if (pPropValue && CheckPropSize != propSize) { return UR_RESULT_ERROR_INVALID_SIZE; } @@ -178,31 +182,27 @@ urSamplerGetInfo(ur_sampler_handle_t hSampler, ur_sampler_info_t propName, UR_APIEXPORT ur_result_t UR_APICALL urSamplerRetain(ur_sampler_handle_t hSampler) { - return mapCLErrorToUR( - clRetainSampler(cl_adapter::cast(hSampler))); + return mapCLErrorToUR(clRetainSampler(hSampler->get())); } UR_APIEXPORT ur_result_t UR_APICALL urSamplerRelease(ur_sampler_handle_t hSampler) { - return mapCLErrorToUR( - clReleaseSampler(cl_adapter::cast(hSampler))); + return mapCLErrorToUR(clReleaseSampler(hSampler->get())); } UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetNativeHandle( ur_sampler_handle_t hSampler, ur_native_handle_t *phNativeSampler) { - *phNativeSampler = reinterpret_cast( - cl_adapter::cast(hSampler)); + *phNativeSampler = reinterpret_cast(hSampler->get()); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( - ur_native_handle_t hNativeSampler, ur_context_handle_t, - const ur_sampler_native_properties_t *pProperties, - ur_sampler_handle_t *phSampler) { - *phSampler = reinterpret_cast( - cl_adapter::cast(hNativeSampler)); + ur_native_handle_t hNativeSampler, ur_context_handle_t hContext, + const ur_sampler_native_properties_t *pProperties, ur_sampler_handle_t *phSampler) { + cl_sampler NativeHandle = reinterpret_cast(hNativeSampler); + *phSampler = new ur_sampler_handle_t_(NativeHandle, hContext); if (!pProperties || !pProperties->isNativeHandleOwned) { - return urSamplerRetain(*phSampler); + return clRetainSampler(NativeHandle); } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/sampler.hpp b/source/adapters/opencl/sampler.hpp new file mode 100644 index 0000000000..8bee3c50e4 --- /dev/null +++ b/source/adapters/opencl/sampler.hpp @@ -0,0 +1,27 @@ +//===--------- sampler.hpp - OpenCL Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include "common.hpp" + +#include + +struct ur_sampler_handle_t_ { + using native_type = cl_sampler; + native_type Sampler; + ur_context_handle_t Context; + + ur_sampler_handle_t_(native_type Sampler, ur_context_handle_t Ctx) + : Sampler(Sampler), Context(Ctx) {} + + ~ur_sampler_handle_t_() {} + + native_type get() { return Sampler; } +}; From 3f64cce8c8e946f6b2cb5eef9bfa3bcef3b5e65a Mon Sep 17 00:00:00 2001 From: omarahmed1111 Date: Thu, 14 Dec 2023 10:44:10 +0000 Subject: [PATCH 10/19] Use unique_ptr instead of new to auto handle memory allocations --- source/adapters/opencl/adapter.cpp | 3 +- source/adapters/opencl/command_buffer.cpp | 4 +- source/adapters/opencl/context.cpp | 9 +- source/adapters/opencl/device.cpp | 12 +- source/adapters/opencl/enqueue.cpp | 80 ++++++--- source/adapters/opencl/event.cpp | 4 +- source/adapters/opencl/kernel.cpp | 11 +- source/adapters/opencl/memory.cpp | 22 ++- source/adapters/opencl/platform.cpp | 10 +- source/adapters/opencl/platform.hpp | 187 ++-------------------- source/adapters/opencl/program.cpp | 23 ++- source/adapters/opencl/queue.cpp | 11 +- source/adapters/opencl/sampler.cpp | 7 +- source/adapters/opencl/usm.cpp | 34 ++-- 14 files changed, 178 insertions(+), 239 deletions(-) diff --git a/source/adapters/opencl/adapter.cpp b/source/adapters/opencl/adapter.cpp index 763c6d532d..8ae1e77755 100644 --- a/source/adapters/opencl/adapter.cpp +++ b/source/adapters/opencl/adapter.cpp @@ -22,10 +22,10 @@ urAdapterGet(uint32_t NumEntries, ur_adapter_handle_t *phAdapters, uint32_t *pNumAdapters) { if (NumEntries > 0 && phAdapters) { std::lock_guard Lock{adapter.Mutex}; - // adapter.RefCount++; if (adapter.RefCount++ == 0) { cl_ext::ExtFuncPtrCache = std::make_unique(); } + *phAdapters = &adapter; } @@ -43,7 +43,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) { UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) { std::lock_guard Lock{adapter.Mutex}; - // --adapter.RefCount; if (--adapter.RefCount == 0) { cl_ext::ExtFuncPtrCache.reset(); } diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 854d9c655c..a6fd0f4391 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -359,7 +359,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( NumberOfQueues, &CLQueue, hCommandBuffer->CLCommandBuffer, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/context.cpp b/source/adapters/opencl/context.cpp index a066e7b978..8079c7dd5b 100644 --- a/source/adapters/opencl/context.cpp +++ b/source/adapters/opencl/context.cpp @@ -45,8 +45,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( cl_context Ctx = clCreateContext( nullptr, cl_adapter::cast(DeviceCount), CLDevices.data(), nullptr, nullptr, cl_adapter::cast(&Ret)); - - *phContext = new ur_context_handle_t_(Ctx, DeviceCount, phDevices); + auto URContext = + std::make_unique(Ctx, DeviceCount, phDevices); + *phContext = URContext.release(); return mapCLErrorToUR(Ret); } @@ -141,7 +142,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle( const ur_context_native_properties_t *pProperties, ur_context_handle_t *phContext) { cl_context NativeHandle = reinterpret_cast(hNativeContext); - *phContext = new ur_context_handle_t_(NativeHandle, numDevices, phDevices); + auto URContext = std::make_unique( + NativeHandle, numDevices, phDevices); + *phContext = URContext.release(); if (!pProperties || !pProperties->isNativeHandleOwned) { return clRetainContext(NativeHandle); } diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index 6ad0d129b1..d5336d8530 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -79,6 +79,7 @@ urDeviceGet(ur_platform_handle_t hPlatform, ur_device_type_t DeviceType, default: return UR_RESULT_ERROR_INVALID_ENUMERATION; } + UR_RETURN_ON_FAILURE(hPlatform->InitDevices()); try { uint32_t AllDevicesNum = hPlatform->Devices.size(); uint32_t DeviceNumIter = 0; @@ -86,7 +87,7 @@ urDeviceGet(ur_platform_handle_t hPlatform, ur_device_type_t DeviceType, cl_device_type DeviceType = hPlatform->Devices[i]->Type; if (DeviceType == Type || Type == CL_DEVICE_TYPE_ALL) { if (phDevices) { - phDevices[DeviceNumIter] = hPlatform->Devices[i]; + phDevices[DeviceNumIter] = hPlatform->Devices[i].get(); } DeviceNumIter++; } @@ -1021,8 +1022,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( CLNumDevicesRet, CLSubDevices.data(), nullptr)); for (uint32_t i = 0; i < NumDevices; i++) { - phSubDevices[i] = - new ur_device_handle_t_(CLSubDevices[i], hDevice->Platform, hDevice); + auto URSubDevice = std::make_unique( + CLSubDevices[i], hDevice->Platform, hDevice); + phSubDevices[i] = URSubDevice.release(); } } @@ -1055,7 +1057,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( ur_native_handle_t hNativeDevice, ur_platform_handle_t hPlatform, const ur_device_native_properties_t *, ur_device_handle_t *phDevice) { cl_device_id NativeHandle = reinterpret_cast(hNativeDevice); - *phDevice = new ur_device_handle_t_(NativeHandle, hPlatform, nullptr); + auto URDevice = + std::make_unique(NativeHandle, hPlatform, nullptr); + *phDevice = URDevice.release(); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/enqueue.cpp b/source/adapters/opencl/enqueue.cpp index 0da75663a5..13e952d1a4 100644 --- a/source/adapters/opencl/enqueue.cpp +++ b/source/adapters/opencl/enqueue.cpp @@ -45,7 +45,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); } return UR_RESULT_SUCCESS; } @@ -61,7 +63,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( CL_RETURN_ON_FAILURE(clEnqueueMarkerWithWaitList( hQueue->get(), numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); } return UR_RESULT_SUCCESS; } @@ -77,7 +81,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( CL_RETURN_ON_FAILURE(clEnqueueBarrierWithWaitList( hQueue->get(), numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); } return UR_RESULT_SUCCESS; } @@ -95,7 +101,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( hQueue->get(), hBuffer->get(), blockingRead, offset, size, pDst, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); } return UR_RESULT_SUCCESS; } @@ -113,7 +121,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( hQueue->get(), hBuffer->get(), blockingWrite, offset, size, pSrc, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); } return UR_RESULT_SUCCESS; } @@ -139,7 +149,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( Region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); } return UR_RESULT_SUCCESS; } @@ -165,7 +177,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( Region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); } return UR_RESULT_SUCCESS; } @@ -184,7 +198,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( hQueue->get(), hBufferSrc->get(), hBufferDst->get(), srcOffset, dstOffset, size, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); } return UR_RESULT_SUCCESS; } @@ -209,7 +225,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( Region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); } return UR_RESULT_SUCCESS; } @@ -231,7 +249,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( hQueue->get(), hBuffer->get(), pPattern, patternSize, offset, size, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); } return UR_RESULT_SUCCESS; } @@ -271,7 +291,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( } if (phEvent) { - *phEvent = new ur_event_handle_t_(WriteEvent, hQueue->Context, hQueue); + auto UREvent = std::make_unique( + WriteEvent, hQueue->Context, hQueue); + *phEvent = UREvent.release(); } else { CL_RETURN_ON_FAILURE(clReleaseEvent(WriteEvent)); } @@ -295,7 +317,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( hQueue->get(), hImage->get(), blockingRead, Origin, Region, rowPitch, slicePitch, pDst, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); } return UR_RESULT_SUCCESS; } @@ -316,7 +340,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( hQueue->get(), hImage->get(), blockingWrite, Origin, Region, rowPitch, slicePitch, pSrc, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); } return UR_RESULT_SUCCESS; } @@ -339,7 +365,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( hQueue->get(), hImageSrc->get(), hImageDst->get(), SrcOrigin, DstOrigin, Region, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); } return UR_RESULT_SUCCESS; } @@ -360,7 +388,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( numEventsInWaitList, CLWaitEvents.data(), &Event, &Err); if (phEvent) { - *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); } return mapCLErrorToUR(Err); } @@ -378,7 +408,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( pMappedPtr, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); } return UR_RESULT_SUCCESS; } @@ -406,7 +438,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( Res = F(hQueue->get(), hProgram->get(), name, blockingWrite, count, offset, pSrc, numEventsInWaitList, CLWaitEvents.data(), &Event); if (phEvent) { - *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); } return mapCLErrorToUR(Res); } @@ -434,7 +468,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( Res = F(hQueue->get(), hProgram->get(), name, blockingRead, count, offset, pDst, numEventsInWaitList, CLWaitEvents.data(), &Event); if (phEvent) { - *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); } return mapCLErrorToUR(Res); } @@ -463,7 +499,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( blocking, pDst, size, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); } } @@ -494,7 +532,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( blocking, pSrc, size, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); } } diff --git a/source/adapters/opencl/event.cpp b/source/adapters/opencl/event.cpp index 5e1d2fabe4..a41561f06c 100644 --- a/source/adapters/opencl/event.cpp +++ b/source/adapters/opencl/event.cpp @@ -115,7 +115,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( const ur_event_native_properties_t *pProperties, ur_event_handle_t *phEvent) { cl_event NativeHandle = reinterpret_cast(hNativeEvent); - *phEvent = new ur_event_handle_t_(NativeHandle, hContext, nullptr); + auto UREvent = + std::make_unique(NativeHandle, hContext, nullptr); + *phEvent = UREvent.release(); if (!pProperties || !pProperties->isNativeHandleOwned) { return urEventRetain(*phEvent); } diff --git a/source/adapters/opencl/kernel.cpp b/source/adapters/opencl/kernel.cpp index 8c7434b1ad..440c981030 100644 --- a/source/adapters/opencl/kernel.cpp +++ b/source/adapters/opencl/kernel.cpp @@ -393,11 +393,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( ur_kernel_handle_t hKernel, uint32_t argIndex, const ur_kernel_arg_mem_obj_properties_t *, ur_mem_handle_t hArgValue) { - cl_mem CLArgValue = hArgValue->get(); - cl_int RetErr = clSetKernelArg(cl_adapter::cast(hKernel), - cl_adapter::cast(argIndex), - sizeof(hArgValue), &CLArgValue); - CL_RETURN_ON_FAILURE(RetErr); + cl_mem CLArgValue = hArgValue ? hArgValue->get() : nullptr; + CL_RETURN_ON_FAILURE(clSetKernelArg(cl_adapter::cast(hKernel), + cl_adapter::cast(argIndex), + sizeof(CLArgValue), &CLArgValue)); return UR_RESULT_SUCCESS; } @@ -408,7 +407,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgSampler( cl_sampler CLArgSampler = hArgValue->get(); cl_int RetErr = clSetKernelArg(cl_adapter::cast(hKernel), cl_adapter::cast(argIndex), - sizeof(hArgValue), &CLArgSampler); + sizeof(CLArgSampler), &CLArgSampler); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp index 68bfa0761d..f1c58cbb3c 100644 --- a/source/adapters/opencl/memory.cpp +++ b/source/adapters/opencl/memory.cpp @@ -266,7 +266,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( cl_mem Buffer = FuncPtr( CLContext, PropertiesIntel.data(), static_cast(flags), size, pProperties->pHost, cl_adapter::cast(&RetErr)); - *phBuffer = new ur_mem_handle_t_(Buffer, hContext); + auto URMem = std::make_unique(Buffer, hContext); + *phBuffer = URMem.release(); return mapCLErrorToUR(RetErr); } } @@ -276,7 +277,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( clCreateBuffer(hContext->get(), static_cast(flags), size, HostPtr, cl_adapter::cast(&RetErr)); CL_RETURN_ON_FAILURE(RetErr); - *phBuffer = new ur_mem_handle_t_(Buffer, hContext); + auto URMem = std::make_unique(Buffer, hContext); + *phBuffer = URMem.release(); return UR_RESULT_SUCCESS; } @@ -296,7 +298,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( clCreateImage(hContext->get(), MapFlags, &ImageFormat, &ImageDesc, pHost, cl_adapter::cast(&RetErr)); CL_RETURN_ON_FAILURE(RetErr); - *phMem = new ur_mem_handle_t_(Mem, hContext); + auto URMem = std::make_unique(Mem, hContext); + *phMem = URMem.release(); return UR_RESULT_SUCCESS; } @@ -321,10 +324,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( BufferRegion.origin = pRegion->origin; BufferRegion.size = pRegion->size; - *phMem = reinterpret_cast(clCreateSubBuffer( + cl_mem Buffer = clCreateSubBuffer( hBuffer->get(), static_cast(flags), BufferCreateType, - &BufferRegion, cl_adapter::cast(&RetErr))); - + &BufferRegion, cl_adapter::cast(&RetErr)); + auto URMem = std::make_unique(Buffer, hBuffer->Context); + *phMem = URMem.release(); if (RetErr == CL_INVALID_VALUE) { size_t BufferSize = 0; CL_RETURN_ON_FAILURE(clGetMemObjectInfo( @@ -344,7 +348,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( ur_native_handle_t hNativeMem, ur_context_handle_t hContext, const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem) { cl_mem NativeHandle = reinterpret_cast(hNativeMem); - *phMem = new ur_mem_handle_t_(NativeHandle, hContext); + auto URMem = std::make_unique(NativeHandle, hContext); + *phMem = URMem.release(); if (!pProperties || !pProperties->isNativeHandleOwned) { return urMemRetain(*phMem); } @@ -357,7 +362,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( [[maybe_unused]] const ur_image_desc_t *pImageDesc, const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem) { cl_mem NativeHandle = reinterpret_cast(hNativeMem); - *phMem = new ur_mem_handle_t_(NativeHandle, hContext); + auto URMem = std::make_unique(NativeHandle, hContext); + *phMem = URMem.release(); if (!pProperties || !pProperties->isNativeHandleOwned) { return urMemRetain(*phMem); } diff --git a/source/adapters/opencl/platform.cpp b/source/adapters/opencl/platform.cpp index 2526c14a51..89bb16d7e9 100644 --- a/source/adapters/opencl/platform.cpp +++ b/source/adapters/opencl/platform.cpp @@ -87,7 +87,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, ur_platform_handle_t *phPlatforms, uint32_t *pNumPlatforms) { - static std::vector URPlatforms; + static std::vector> URPlatforms; static std::once_flag InitFlag; static uint32_t NumPlatforms = 0; cl_int Result = CL_SUCCESS; @@ -107,7 +107,8 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, } URPlatforms.resize(NumPlatforms); for (uint32_t i = 0; i < NumPlatforms; i++) { - URPlatforms[i] = new ur_platform_handle_t_(CLPlatforms[i]); + URPlatforms[i] = + std::make_unique(CLPlatforms[i]); } return Result; }, @@ -125,7 +126,7 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, } if (NumEntries && phPlatforms) { for (uint32_t i = 0; i < NumEntries; i++) { - phPlatforms[i] = URPlatforms[i]; + phPlatforms[i] = URPlatforms[i].get(); } } return mapCLErrorToUR(Result); @@ -142,7 +143,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( ur_platform_handle_t *phPlatform) { cl_platform_id NativeHandle = reinterpret_cast(hNativePlatform); - *phPlatform = new ur_platform_handle_t_(NativeHandle); + auto URPlatform = std::make_unique(NativeHandle); + *phPlatform = URPlatform.release(); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/platform.hpp b/source/adapters/opencl/platform.hpp index 667b998970..0957f4562d 100644 --- a/source/adapters/opencl/platform.hpp +++ b/source/adapters/opencl/platform.hpp @@ -17,172 +17,16 @@ namespace cl_adapter { ur_result_t getPlatformVersion(cl_platform_id Plat, oclv::OpenCLVersion &Version); - -// Older versions of GCC don't like "const" here -#if defined(__GNUC__) && (__GNUC__ < 7 || (__GNU__C == 7 && __GNUC_MINOR__ < 2)) -#define CONSTFIX constexpr -#else -#define CONSTFIX const -#endif - -// Names of USM functions that are queried from OpenCL -CONSTFIX char HostMemAllocName[] = "clHostMemAllocINTEL"; -CONSTFIX char DeviceMemAllocName[] = "clDeviceMemAllocINTEL"; -CONSTFIX char SharedMemAllocName[] = "clSharedMemAllocINTEL"; -CONSTFIX char MemBlockingFreeName[] = "clMemBlockingFreeINTEL"; -CONSTFIX char CreateBufferWithPropertiesName[] = - "clCreateBufferWithPropertiesINTEL"; -CONSTFIX char SetKernelArgMemPointerName[] = "clSetKernelArgMemPointerINTEL"; -CONSTFIX char EnqueueMemFillName[] = "clEnqueueMemFillINTEL"; -CONSTFIX char EnqueueMemcpyName[] = "clEnqueueMemcpyINTEL"; -CONSTFIX char GetMemAllocInfoName[] = "clGetMemAllocInfoINTEL"; -CONSTFIX char SetProgramSpecializationConstantName[] = - "clSetProgramSpecializationConstant"; -CONSTFIX char GetDeviceFunctionPointerName[] = - "clGetDeviceFunctionPointerINTEL"; -CONSTFIX char EnqueueWriteGlobalVariableName[] = - "clEnqueueWriteGlobalVariableINTEL"; -CONSTFIX char EnqueueReadGlobalVariableName[] = - "clEnqueueReadGlobalVariableINTEL"; -// Names of host pipe functions queried from OpenCL -CONSTFIX char EnqueueReadHostPipeName[] = "clEnqueueReadHostPipeINTEL"; -CONSTFIX char EnqueueWriteHostPipeName[] = "clEnqueueWriteHostPipeINTEL"; -// Names of command buffer functions queried from OpenCL -CONSTFIX char CreateCommandBufferName[] = "clCreateCommandBufferKHR"; -CONSTFIX char RetainCommandBufferName[] = "clRetainCommandBufferKHR"; -CONSTFIX char ReleaseCommandBufferName[] = "clReleaseCommandBufferKHR"; -CONSTFIX char FinalizeCommandBufferName[] = "clFinalizeCommandBufferKHR"; -CONSTFIX char CommandNRRangeKernelName[] = "clCommandNDRangeKernelKHR"; -CONSTFIX char CommandCopyBufferName[] = "clCommandCopyBufferKHR"; -CONSTFIX char CommandCopyBufferRectName[] = "clCommandCopyBufferRectKHR"; -CONSTFIX char CommandFillBufferName[] = "clCommandFillBufferKHR"; -CONSTFIX char EnqueueCommandBufferName[] = "clEnqueueCommandBufferKHR"; - -#undef CONSTFIX - -using clGetDeviceFunctionPointer_fn = CL_API_ENTRY -cl_int(CL_API_CALL *)(cl_device_id device, cl_program program, - const char *FuncName, cl_ulong *ret_ptr); - -using clEnqueueWriteGlobalVariable_fn = CL_API_ENTRY -cl_int(CL_API_CALL *)(cl_command_queue, cl_program, const char *, cl_bool, - size_t, size_t, const void *, cl_uint, const cl_event *, - cl_event *); - -using clEnqueueReadGlobalVariable_fn = CL_API_ENTRY -cl_int(CL_API_CALL *)(cl_command_queue, cl_program, const char *, cl_bool, - size_t, size_t, void *, cl_uint, const cl_event *, - cl_event *); - -using clSetProgramSpecializationConstant_fn = CL_API_ENTRY -cl_int(CL_API_CALL *)(cl_program program, cl_uint spec_id, size_t spec_size, - const void *spec_value); - -using clEnqueueReadHostPipeINTEL_fn = CL_API_ENTRY -cl_int(CL_API_CALL *)(cl_command_queue queue, cl_program program, - const char *pipe_symbol, cl_bool blocking, void *ptr, - size_t size, cl_uint num_events_in_waitlist, - const cl_event *events_waitlist, cl_event *event); - -using clEnqueueWriteHostPipeINTEL_fn = CL_API_ENTRY -cl_int(CL_API_CALL *)(cl_command_queue queue, cl_program program, - const char *pipe_symbol, cl_bool blocking, - const void *ptr, size_t size, - cl_uint num_events_in_waitlist, - const cl_event *events_waitlist, cl_event *event); - -using clCreateCommandBufferKHR_fn = CL_API_ENTRY cl_command_buffer_khr( - CL_API_CALL *)(cl_uint num_queues, const cl_command_queue *queues, - const cl_command_buffer_properties_khr *properties, - cl_int *errcode_ret); - -using clRetainCommandBufferKHR_fn = CL_API_ENTRY -cl_int(CL_API_CALL *)(cl_command_buffer_khr command_buffer); - -using clReleaseCommandBufferKHR_fn = CL_API_ENTRY -cl_int(CL_API_CALL *)(cl_command_buffer_khr command_buffer); - -using clFinalizeCommandBufferKHR_fn = CL_API_ENTRY -cl_int(CL_API_CALL *)(cl_command_buffer_khr command_buffer); - -using clCommandNDRangeKernelKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)( - cl_command_buffer_khr command_buffer, cl_command_queue command_queue, - const cl_ndrange_kernel_command_properties_khr *properties, - cl_kernel kernel, cl_uint work_dim, const size_t *global_work_offset, - const size_t *global_work_size, const size_t *local_work_size, - cl_uint num_sync_points_in_wait_list, - const cl_sync_point_khr *sync_point_wait_list, - cl_sync_point_khr *sync_point, cl_mutable_command_khr *mutable_handle); - -using clCommandCopyBufferKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)( - cl_command_buffer_khr command_buffer, cl_command_queue command_queue, - cl_mem src_buffer, cl_mem dst_buffer, size_t src_offset, size_t dst_offset, - size_t size, cl_uint num_sync_points_in_wait_list, - const cl_sync_point_khr *sync_point_wait_list, - cl_sync_point_khr *sync_point, cl_mutable_command_khr *mutable_handle); - -using clCommandCopyBufferRectKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)( - cl_command_buffer_khr command_buffer, cl_command_queue command_queue, - cl_mem src_buffer, cl_mem dst_buffer, const size_t *src_origin, - const size_t *dst_origin, const size_t *region, size_t src_row_pitch, - size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, - cl_uint num_sync_points_in_wait_list, - const cl_sync_point_khr *sync_point_wait_list, - cl_sync_point_khr *sync_point, cl_mutable_command_khr *mutable_handle); - -using clCommandFillBufferKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)( - cl_command_buffer_khr command_buffer, cl_command_queue command_queue, - cl_mem buffer, const void *pattern, size_t pattern_size, size_t offset, - size_t size, cl_uint num_sync_points_in_wait_list, - const cl_sync_point_khr *sync_point_wait_list, - cl_sync_point_khr *sync_point, cl_mutable_command_khr *mutable_handle); - -using clEnqueueCommandBufferKHR_fn = CL_API_ENTRY -cl_int(CL_API_CALL *)(cl_uint num_queues, cl_command_queue *queues, - cl_command_buffer_khr command_buffer, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, cl_event *event); - -struct ExtFuncPtrT { - clHostMemAllocINTEL_fn clHostMemAllocINTELCache; - clDeviceMemAllocINTEL_fn clDeviceMemAllocINTELCache; - clSharedMemAllocINTEL_fn clSharedMemAllocINTELCache; - clGetDeviceFunctionPointer_fn clGetDeviceFunctionPointerCache; - clCreateBufferWithPropertiesINTEL_fn clCreateBufferWithPropertiesINTELCache; - clMemBlockingFreeINTEL_fn clMemBlockingFreeINTELCache; - clSetKernelArgMemPointerINTEL_fn clSetKernelArgMemPointerINTELCache; - clEnqueueMemFillINTEL_fn clEnqueueMemFillINTELCache; - clEnqueueMemcpyINTEL_fn clEnqueueMemcpyINTELCache; - clGetMemAllocInfoINTEL_fn clGetMemAllocInfoINTELCache; - clEnqueueWriteGlobalVariable_fn clEnqueueWriteGlobalVariableCache; - clEnqueueReadGlobalVariable_fn clEnqueueReadGlobalVariableCache; - clEnqueueReadHostPipeINTEL_fn clEnqueueReadHostPipeINTELCache; - clEnqueueWriteHostPipeINTEL_fn clEnqueueWriteHostPipeINTELCache; - clSetProgramSpecializationConstant_fn clSetProgramSpecializationConstantCache; - clCreateCommandBufferKHR_fn clCreateCommandBufferKHRCache; - clRetainCommandBufferKHR_fn clRetainCommandBufferKHRCache; - clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHRCache; - clFinalizeCommandBufferKHR_fn clFinalizeCommandBufferKHRCache; - clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHRCache; - clCommandCopyBufferKHR_fn clCommandCopyBufferKHRCache; - clCommandCopyBufferRectKHR_fn clCommandCopyBufferRectKHRCache; - clCommandFillBufferKHR_fn clCommandFillBufferKHRCache; - clEnqueueCommandBufferKHR_fn clEnqueueCommandBufferKHRCache; -}; } // namespace cl_adapter struct ur_platform_handle_t_ { using native_type = cl_platform_id; native_type Platform = nullptr; - std::unique_ptr ExtFuncPtr; - std::vector Devices; + std::vector> Devices; - ur_platform_handle_t_(native_type Plat) : Platform(Plat) { - ExtFuncPtr = std::make_unique(); - InitDevices(); - } + ur_platform_handle_t_(native_type Plat) : Platform(Plat) {} - ~ur_platform_handle_t_() { ExtFuncPtr.reset(); } + ~ur_platform_handle_t_() {} template ur_result_t getExtFunc(T CachedExtFunc, const char *FuncName, T *Fptr) { @@ -201,17 +45,20 @@ struct ur_platform_handle_t_ { native_type get() { return Platform; } ur_result_t InitDevices() { - cl_uint DeviceNum = 0; - CL_RETURN_ON_FAILURE( - clGetDeviceIDs(Platform, CL_DEVICE_TYPE_ALL, 0, nullptr, &DeviceNum)); - - std::vector CLDevices(DeviceNum); - CL_RETURN_ON_FAILURE(clGetDeviceIDs(Platform, CL_DEVICE_TYPE_ALL, DeviceNum, - CLDevices.data(), nullptr)); - - Devices = std::vector(DeviceNum); - for (size_t i = 0; i < DeviceNum; i++) { - Devices[i] = new ur_device_handle_t_(CLDevices[i], this, nullptr); + if (Devices.empty()) { + cl_uint DeviceNum = 0; + CL_RETURN_ON_FAILURE( + clGetDeviceIDs(Platform, CL_DEVICE_TYPE_ALL, 0, nullptr, &DeviceNum)); + + std::vector CLDevices(DeviceNum); + CL_RETURN_ON_FAILURE(clGetDeviceIDs( + Platform, CL_DEVICE_TYPE_ALL, DeviceNum, CLDevices.data(), nullptr)); + + Devices.resize(DeviceNum); + for (size_t i = 0; i < DeviceNum; i++) { + Devices[i] = + std::make_unique(CLDevices[i], this, nullptr); + } } return UR_RESULT_SUCCESS; diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index bd1cf9fca6..1ab6fb35ee 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -72,8 +72,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( cl_program Program = clCreateProgramWithIL(hContext->get(), pIL, length, &Err); CL_RETURN_ON_FAILURE(Err); - - *phProgram = new ur_program_handle_t_(Program, hContext); + auto URProgram = std::make_unique(Program, hContext); + *phProgram = URProgram.release(); } else { /* If none of the devices conform with CL 2.1 or newer make sure they all @@ -100,7 +100,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( assert(FuncPtr != nullptr); cl_program Program = FuncPtr(hContext->get(), pIL, length, &Err); - *phProgram = new ur_program_handle_t_(Program, hContext); + auto URProgram = std::make_unique(Program, hContext); + *phProgram = URProgram.release(); CL_RETURN_ON_FAILURE(Err); } @@ -120,7 +121,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( cl_program Program = clCreateProgramWithBinary( hContext->get(), cl_adapter::cast(1u), Devices, Lengths, &pBinary, BinaryStatus, &CLResult); - *phProgram = new ur_program_handle_t_(Program, hContext); + + auto URProgram = std::make_unique(Program, hContext); + *phProgram = URProgram.release(); CL_RETURN_ON_FAILURE(BinaryStatus[0]); CL_RETURN_ON_FAILURE(CLResult); @@ -186,6 +189,10 @@ urProgramGetInfo(ur_program_handle_t hProgram, ur_program_info_t propName, cl_uint DeviceCount = hProgram->Context->DeviceCount; return ReturnValue(DeviceCount); } + case UR_PROGRAM_INFO_DEVICES: { + return ReturnValue(&hProgram->Context->Devices[0], + hProgram->Context->DeviceCount); + } default: { size_t CheckPropSize = 0; auto ClResult = clGetProgramInfo(hProgram->get(), CLPropName, propSize, @@ -230,7 +237,8 @@ urProgramLink(ur_context_handle_t hContext, uint32_t count, hContext->get(), 0, nullptr, pOptions, cl_adapter::cast(count), CLPrograms.data(), nullptr, nullptr, &CLResult); CL_RETURN_ON_FAILURE(CLResult); - *phProgram = new ur_program_handle_t_(Program, hContext); + auto URProgram = std::make_unique(Program, hContext); + *phProgram = URProgram.release(); return UR_RESULT_SUCCESS; } @@ -347,7 +355,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithNativeHandle( const ur_program_native_properties_t *pProperties, ur_program_handle_t *phProgram) { cl_program NativeHandle = reinterpret_cast(hNativeProgram); - *phProgram = new ur_program_handle_t_(NativeHandle, hContext); + + auto URProgram = + std::make_unique(NativeHandle, hContext); + *phProgram = URProgram.release(); if (!pProperties || !pProperties->isNativeHandleOwned) { return urProgramRetain(*phProgram); } diff --git a/source/adapters/opencl/queue.cpp b/source/adapters/opencl/queue.cpp index 41ff096acc..477bcb2015 100644 --- a/source/adapters/opencl/queue.cpp +++ b/source/adapters/opencl/queue.cpp @@ -95,7 +95,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( clCreateCommandQueue(hContext->get(), hDevice->get(), CLProperties & SupportByOpenCL, &RetErr); CL_RETURN_ON_FAILURE(RetErr); - *phQueue = new ur_queue_handle_t_(Queue, hContext, hDevice); + auto URQueue = + std::make_unique(Queue, hContext, hDevice); + *phQueue = URQueue.release(); return UR_RESULT_SUCCESS; } @@ -105,7 +107,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( cl_command_queue Queue = clCreateCommandQueueWithProperties( hContext->get(), hDevice->get(), CreationFlagProperties, &RetErr); CL_RETURN_ON_FAILURE(RetErr); - *phQueue = new ur_queue_handle_t_(Queue, hContext, hDevice); + auto URQueue = std::make_unique(Queue, hContext, hDevice); + *phQueue = URQueue.release(); return UR_RESULT_SUCCESS; } @@ -167,7 +170,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( cl_command_queue NativeHandle = reinterpret_cast(hNativeQueue); - *phQueue = new ur_queue_handle_t_(NativeHandle, hContext, hDevice); + auto URQueue = + std::make_unique(NativeHandle, hContext, hDevice); + *phQueue = URQueue.release(); cl_int RetErr = clRetainCommandQueue(cl_adapter::cast(hNativeQueue)); diff --git a/source/adapters/opencl/sampler.cpp b/source/adapters/opencl/sampler.cpp index 688f3b1354..532dcbeb8a 100644 --- a/source/adapters/opencl/sampler.cpp +++ b/source/adapters/opencl/sampler.cpp @@ -148,7 +148,8 @@ ur_result_t urSamplerCreate(ur_context_handle_t hContext, cl_sampler Sampler = clCreateSampler( hContext->get(), static_cast(pDesc->normalizedCoords), AddressingMode, FilterMode, cl_adapter::cast(&ErrorCode)); - *phSampler = new ur_sampler_handle_t_(Sampler, hContext); + auto URSampler = std::make_unique(Sampler, hContext); + *phSampler = URSampler.release(); return mapCLErrorToUR(ErrorCode); } @@ -200,7 +201,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( ur_native_handle_t hNativeSampler, ur_context_handle_t hContext, const ur_sampler_native_properties_t *pProperties, ur_sampler_handle_t *phSampler) { cl_sampler NativeHandle = reinterpret_cast(hNativeSampler); - *phSampler = new ur_sampler_handle_t_(NativeHandle, hContext); + auto URSampler = + std::make_unique(NativeHandle, hContext); + *phSampler = URSampler.release(); if (!pProperties || !pProperties->isNativeHandleOwned) { return clRetainSampler(NativeHandle); } diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index 684004e126..241936a6ad 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -252,7 +252,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( patternSize, size, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); } return UR_RESULT_SUCCESS; } @@ -287,9 +289,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( } cl_event CopyEvent = nullptr; - CL_RETURN_ON_FAILURE(USMMemcpy( - hQueue->get(), false, ptr, HostBuffer, size, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), &CopyEvent)); + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } + CL_RETURN_ON_FAILURE(USMMemcpy(hQueue->get(), false, ptr, HostBuffer, size, + numEventsInWaitList, CLWaitEvents.data(), + &CopyEvent)); struct DeleteCallbackInfo { DeleteCallbackInfo(clMemBlockingFreeINTEL_fn USMFree, cl_context CLContext, @@ -326,7 +332,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( CL_RETURN_ON_FAILURE(ClErr); } if (phEvent) { - *phEvent = new ur_event_handle_t_(CopyEvent, hQueue->Context, hQueue); + auto UREvent = std::make_unique( + CopyEvent, hQueue->Context, hQueue); + *phEvent = UREvent.release(); } else { CL_RETURN_ON_FAILURE(clReleaseEvent(CopyEvent)); } @@ -357,7 +365,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); } } @@ -378,7 +388,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( CL_RETURN_ON_FAILURE(clEnqueueMarkerWithWaitList( hQueue->get(), numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); } return UR_RESULT_SUCCESS; /* @@ -411,7 +423,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMAdvise( CL_RETURN_ON_FAILURE( clEnqueueMarkerWithWaitList(hQueue->get(), 0, nullptr, &Event)); if (phEvent) { - *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); } return UR_RESULT_SUCCESS; /* @@ -491,7 +505,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( ClResult = clEnqueueBarrierWithWaitList(hQueue->get(), Events.size(), Events.data(), &Event); if (phEvent) { - *phEvent = new ur_event_handle_t_(Event, hQueue->Context, hQueue); + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); } } for (const auto &E : Events) { From e18c736f689df8637e3ec3a4a3aa4729733cfa93 Mon Sep 17 00:00:00 2001 From: omarahmed1111 Date: Mon, 18 Dec 2023 14:36:11 +0000 Subject: [PATCH 11/19] Change CreateWithNative impl for multiple handles --- source/adapters/opencl/context.cpp | 1 + source/adapters/opencl/context.hpp | 19 +++++++++++++++++++ source/adapters/opencl/device.cpp | 29 ++++++++++++++++++++++++----- source/adapters/opencl/device.hpp | 1 + source/adapters/opencl/memory.cpp | 2 ++ source/adapters/opencl/memory.hpp | 13 +++++++++++++ source/adapters/opencl/platform.cpp | 17 ++++++++++++++--- source/adapters/opencl/queue.cpp | 11 +++++------ source/adapters/opencl/queue.hpp | 22 ++++++++++++++++++++++ 9 files changed, 101 insertions(+), 14 deletions(-) diff --git a/source/adapters/opencl/context.cpp b/source/adapters/opencl/context.cpp index 8079c7dd5b..82f69e251b 100644 --- a/source/adapters/opencl/context.cpp +++ b/source/adapters/opencl/context.cpp @@ -144,6 +144,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle( cl_context NativeHandle = reinterpret_cast(hNativeContext); auto URContext = std::make_unique( NativeHandle, numDevices, phDevices); + UR_RETURN_ON_FAILURE(URContext->initWithNative()); *phContext = URContext.release(); if (!pProperties || !pProperties->isNativeHandleOwned) { return clRetainContext(NativeHandle); diff --git a/source/adapters/opencl/context.hpp b/source/adapters/opencl/context.hpp index 46cbd2167f..e08501cb8a 100644 --- a/source/adapters/opencl/context.hpp +++ b/source/adapters/opencl/context.hpp @@ -33,6 +33,25 @@ struct ur_context_handle_t_ { Devices.emplace_back(phDevices[i]); } } + ur_result_t initWithNative() { + if (!DeviceCount) { + CL_RETURN_ON_FAILURE(clGetContextInfo(Context, CL_CONTEXT_NUM_DEVICES, + sizeof(DeviceCount), &DeviceCount, + nullptr)); + std::vector CLDevices(DeviceCount); + CL_RETURN_ON_FAILURE(clGetContextInfo(Context, CL_CONTEXT_DEVICES, + sizeof(CLDevices), CLDevices.data(), + nullptr)); + Devices.resize(DeviceCount); + for (uint32_t i = 0; i < DeviceCount; i++) { + ur_native_handle_t NativeDevice = + reinterpret_cast(CLDevices[i]); + UR_RETURN_ON_FAILURE(urDeviceCreateWithNativeHandle( + NativeDevice, nullptr, nullptr, &Devices[i])); + } + } + return UR_RESULT_SUCCESS; + } ~ur_context_handle_t_() {} diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index d5336d8530..021b271211 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -1054,13 +1054,32 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetNativeHandle( } UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( - ur_native_handle_t hNativeDevice, ur_platform_handle_t hPlatform, + ur_native_handle_t hNativeDevice, ur_platform_handle_t, const ur_device_native_properties_t *, ur_device_handle_t *phDevice) { cl_device_id NativeHandle = reinterpret_cast(hNativeDevice); - auto URDevice = - std::make_unique(NativeHandle, hPlatform, nullptr); - *phDevice = URDevice.release(); - return UR_RESULT_SUCCESS; + + uint32_t NumPlatforms = 0; + UR_RETURN_ON_FAILURE(urPlatformGet(nullptr, 0, 0, nullptr, &NumPlatforms)); + std::vector Platforms(NumPlatforms); + UR_RETURN_ON_FAILURE( + urPlatformGet(nullptr, 0, NumPlatforms, Platforms.data(), nullptr)); + + for (uint32_t i = 0; i < NumPlatforms; i++) { + uint32_t NumDevices = 0; + UR_RETURN_ON_FAILURE( + urDeviceGet(Platforms[i], UR_DEVICE_TYPE_ALL, 0, nullptr, &NumDevices)); + std::vector Devices(NumDevices); + UR_RETURN_ON_FAILURE(urDeviceGet(Platforms[i], UR_DEVICE_TYPE_ALL, + NumDevices, Devices.data(), nullptr)); + + for (auto &Device : Devices) { + if (Device->get() == NativeHandle) { + *phDevice = Device; + return UR_RESULT_SUCCESS; + } + } + } + return UR_RESULT_ERROR_INVALID_DEVICE; } UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetGlobalTimestamps( diff --git a/source/adapters/opencl/device.hpp b/source/adapters/opencl/device.hpp index dc29a1692e..2c4c97193b 100644 --- a/source/adapters/opencl/device.hpp +++ b/source/adapters/opencl/device.hpp @@ -10,6 +10,7 @@ #pragma once #include "common.hpp" +#include "platform.hpp" namespace cl_adapter { ur_result_t getDeviceVersion(cl_device_id Dev, oclv::OpenCLVersion &Version); diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp index f1c58cbb3c..b5dacbe5eb 100644 --- a/source/adapters/opencl/memory.cpp +++ b/source/adapters/opencl/memory.cpp @@ -349,6 +349,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem) { cl_mem NativeHandle = reinterpret_cast(hNativeMem); auto URMem = std::make_unique(NativeHandle, hContext); + UR_RETURN_ON_FAILURE(URMem->initWithNative()); *phMem = URMem.release(); if (!pProperties || !pProperties->isNativeHandleOwned) { return urMemRetain(*phMem); @@ -363,6 +364,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem) { cl_mem NativeHandle = reinterpret_cast(hNativeMem); auto URMem = std::make_unique(NativeHandle, hContext); + UR_RETURN_ON_FAILURE(URMem->initWithNative()); *phMem = URMem.release(); if (!pProperties || !pProperties->isNativeHandleOwned) { return urMemRetain(*phMem); diff --git a/source/adapters/opencl/memory.hpp b/source/adapters/opencl/memory.hpp index 48d5cfd895..bd3a4c994d 100644 --- a/source/adapters/opencl/memory.hpp +++ b/source/adapters/opencl/memory.hpp @@ -23,5 +23,18 @@ struct ur_mem_handle_t_ { ~ur_mem_handle_t_() {} + ur_result_t initWithNative() { + if (!Context) { + cl_context CLContext; + CL_RETURN_ON_FAILURE(clGetMemObjectInfo( + Memory, CL_MEM_CONTEXT, sizeof(CLContext), &CLContext, nullptr)); + ur_native_handle_t NativeContext = + reinterpret_cast(CLContext); + UR_RETURN_ON_FAILURE(urContextCreateWithNativeHandle( + NativeContext, 0, nullptr, nullptr, &Context)); + } + return UR_RESULT_SUCCESS; + } + native_type get() { return Memory; } }; diff --git a/source/adapters/opencl/platform.cpp b/source/adapters/opencl/platform.cpp index 89bb16d7e9..ddeb276870 100644 --- a/source/adapters/opencl/platform.cpp +++ b/source/adapters/opencl/platform.cpp @@ -143,9 +143,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( ur_platform_handle_t *phPlatform) { cl_platform_id NativeHandle = reinterpret_cast(hNativePlatform); - auto URPlatform = std::make_unique(NativeHandle); - *phPlatform = URPlatform.release(); - return UR_RESULT_SUCCESS; + + uint32_t NumPlatforms = 0; + UR_RETURN_ON_FAILURE(urPlatformGet(nullptr, 0, 0, nullptr, &NumPlatforms)); + std::vector Platforms(NumPlatforms); + UR_RETURN_ON_FAILURE( + urPlatformGet(nullptr, 0, NumPlatforms, Platforms.data(), nullptr)); + + for (uint32_t i = 0; i < NumPlatforms; i++) { + if (Platforms[i]->get() == NativeHandle) { + *phPlatform = Platforms[i]; + return UR_RESULT_SUCCESS; + } + } + return UR_RESULT_ERROR_INVALID_PLATFORM; } // Returns plugin specific backend option. diff --git a/source/adapters/opencl/queue.cpp b/source/adapters/opencl/queue.cpp index 477bcb2015..4e387d7dbe 100644 --- a/source/adapters/opencl/queue.cpp +++ b/source/adapters/opencl/queue.cpp @@ -162,9 +162,8 @@ urQueueGetNativeHandle(ur_queue_handle_t hQueue, ur_queue_native_desc_t *, } UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( - ur_native_handle_t hNativeQueue, - [[maybe_unused]] ur_context_handle_t hContext, - [[maybe_unused]] ur_device_handle_t hDevice, + ur_native_handle_t hNativeQueue, ur_context_handle_t hContext, + ur_device_handle_t hDevice, [[maybe_unused]] const ur_queue_native_properties_t *pProperties, ur_queue_handle_t *phQueue) { @@ -172,11 +171,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( reinterpret_cast(hNativeQueue); auto URQueue = std::make_unique(NativeHandle, hContext, hDevice); + UR_RETURN_ON_FAILURE(URQueue->initWithNative()); *phQueue = URQueue.release(); - cl_int RetErr = - clRetainCommandQueue(cl_adapter::cast(hNativeQueue)); - CL_RETURN_ON_FAILURE(RetErr); + CL_RETURN_ON_FAILURE(clRetainCommandQueue(NativeHandle)); + return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/queue.hpp b/source/adapters/opencl/queue.hpp index cdeb14af37..4918fa0f31 100644 --- a/source/adapters/opencl/queue.hpp +++ b/source/adapters/opencl/queue.hpp @@ -23,6 +23,28 @@ struct ur_queue_handle_t_ { ur_device_handle_t Dev) : Queue(Queue), Context(Ctx), Device(Dev) {} + ur_result_t initWithNative() { + if (!Context) { + cl_context CLContext; + CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( + Queue, CL_QUEUE_CONTEXT, sizeof(CLContext), &CLContext, nullptr)); + ur_native_handle_t NativeContext = + reinterpret_cast(CLContext); + UR_RETURN_ON_FAILURE(urContextCreateWithNativeHandle( + NativeContext, 0, nullptr, nullptr, &Context)); + } + if (!Device) { + cl_device_id CLDevice; + CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( + Queue, CL_QUEUE_DEVICE, sizeof(CLDevice), &CLDevice, nullptr)); + ur_native_handle_t NativeDevice = + reinterpret_cast(CLDevice); + UR_RETURN_ON_FAILURE(urDeviceCreateWithNativeHandle(NativeDevice, nullptr, + nullptr, &Device)); + } + return UR_RESULT_SUCCESS; + } + ~ur_queue_handle_t_() {} native_type get() { return Queue; } From 09d2242c03aebbdd57132cb602701c16974a2a17 Mon Sep 17 00:00:00 2001 From: omarahmed1111 Date: Tue, 19 Dec 2023 12:42:16 +0000 Subject: [PATCH 12/19] Add kernel handle --- source/adapters/opencl/command_buffer.cpp | 8 +- source/adapters/opencl/enqueue.cpp | 9 +- source/adapters/opencl/kernel.cpp | 107 ++++++++++++---------- source/adapters/opencl/kernel.hpp | 54 +++++++++++ source/adapters/opencl/memory.cpp | 2 +- source/adapters/opencl/platform.cpp | 8 +- source/adapters/opencl/program.cpp | 1 + source/adapters/opencl/program.hpp | 13 +++ source/adapters/opencl/usm.cpp | 5 +- 9 files changed, 143 insertions(+), 64 deletions(-) create mode 100644 source/adapters/opencl/kernel.hpp diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index a6fd0f4391..df61e8b5b0 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -12,6 +12,7 @@ #include "common.hpp" #include "context.hpp" #include "event.hpp" +#include "kernel.hpp" #include "memory.hpp" #include "queue.hpp" @@ -121,10 +122,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( return UR_RESULT_ERROR_INVALID_OPERATION; CL_RETURN_ON_FAILURE(clCommandNDRangeKernelKHR( - hCommandBuffer->CLCommandBuffer, nullptr, nullptr, - cl_adapter::cast(hKernel), workDim, pGlobalWorkOffset, - pGlobalWorkSize, pLocalWorkSize, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint, nullptr)); + hCommandBuffer->CLCommandBuffer, nullptr, nullptr, hKernel->get(), + workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, + numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, nullptr)); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/enqueue.cpp b/source/adapters/opencl/enqueue.cpp index 13e952d1a4..ce2481f1a6 100644 --- a/source/adapters/opencl/enqueue.cpp +++ b/source/adapters/opencl/enqueue.cpp @@ -11,6 +11,7 @@ #include "common.hpp" #include "context.hpp" #include "event.hpp" +#include "kernel.hpp" #include "memory.hpp" #include "program.hpp" #include "queue.hpp" @@ -40,10 +41,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( for (uint32_t i = 0; i < numEventsInWaitList; i++) { CLWaitEvents[i] = phEventWaitList[i]->get(); } - CL_RETURN_ON_FAILURE(clEnqueueNDRangeKernel( - hQueue->get(), cl_adapter::cast(hKernel), workDim, - pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numEventsInWaitList, - CLWaitEvents.data(), &Event)); + CL_RETURN_ON_FAILURE( + clEnqueueNDRangeKernel(hQueue->get(), hKernel->get(), workDim, + pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, + numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { auto UREvent = std::make_unique(Event, hQueue->Context, hQueue); diff --git a/source/adapters/opencl/kernel.cpp b/source/adapters/opencl/kernel.cpp index 440c981030..8993ee693f 100644 --- a/source/adapters/opencl/kernel.cpp +++ b/source/adapters/opencl/kernel.cpp @@ -7,6 +7,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +#include "kernel.hpp" #include "common.hpp" #include "device.hpp" #include "memory.hpp" @@ -21,9 +22,11 @@ urKernelCreate(ur_program_handle_t hProgram, const char *pKernelName, ur_kernel_handle_t *phKernel) { cl_int CLResult; - *phKernel = cl_adapter::cast( - clCreateKernel(hProgram->get(), pKernelName, &CLResult)); + cl_kernel Kernel = clCreateKernel(hProgram->get(), pKernelName, &CLResult); CL_RETURN_ON_FAILURE(CLResult); + auto URKernel = std::make_unique(Kernel, hProgram, + hProgram->Context); + *phKernel = URKernel.release(); return UR_RESULT_SUCCESS; } @@ -31,9 +34,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue( ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize, const ur_kernel_arg_value_properties_t *, const void *pArgValue) { - CL_RETURN_ON_FAILURE(clSetKernelArg(cl_adapter::cast(hKernel), - cl_adapter::cast(argIndex), - argSize, pArgValue)); + CL_RETURN_ON_FAILURE(clSetKernelArg( + hKernel->get(), cl_adapter::cast(argIndex), argSize, pArgValue)); return UR_RESULT_SUCCESS; } @@ -42,9 +44,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgLocal(ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize, const ur_kernel_arg_local_properties_t *) { - CL_RETURN_ON_FAILURE(clSetKernelArg(cl_adapter::cast(hKernel), - cl_adapter::cast(argIndex), - argSize, nullptr)); + CL_RETURN_ON_FAILURE(clSetKernelArg( + hKernel->get(), cl_adapter::cast(argIndex), argSize, nullptr)); return UR_RESULT_SUCCESS; } @@ -76,6 +77,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); // We need this little bit of ugliness because the UR NUM_ARGS property is // size_t whereas the CL one is cl_uint. We should consider changing that see // #1038 @@ -83,7 +85,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel, if (pPropSizeRet) *pPropSizeRet = sizeof(size_t); cl_uint NumArgs = 0; - CL_RETURN_ON_FAILURE(clGetKernelInfo(cl_adapter::cast(hKernel), + CL_RETURN_ON_FAILURE(clGetKernelInfo(hKernel->get(), mapURKernelInfoToCL(propName), sizeof(NumArgs), &NumArgs, nullptr)); if (pPropValue) { @@ -91,11 +93,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel, return UR_RESULT_ERROR_INVALID_SIZE; *static_cast(pPropValue) = static_cast(NumArgs); } + } else if (propName == UR_KERNEL_INFO_PROGRAM) { + return ReturnValue(hKernel->Program); + } else if (propName == UR_KERNEL_INFO_CONTEXT) { + return ReturnValue(hKernel->Context); } else { size_t CheckPropSize = 0; - cl_int ClResult = clGetKernelInfo(cl_adapter::cast(hKernel), - mapURKernelInfoToCL(propName), propSize, - pPropValue, &CheckPropSize); + cl_int ClResult = + clGetKernelInfo(hKernel->get(), mapURKernelInfoToCL(propName), propSize, + pPropValue, &CheckPropSize); if (pPropValue && CheckPropSize != propSize) { return UR_RESULT_ERROR_INVALID_SIZE; } @@ -147,8 +153,8 @@ urKernelGetGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, } } CL_RETURN_ON_FAILURE(clGetKernelWorkGroupInfo( - cl_adapter::cast(hKernel), hDevice->get(), - mapURKernelGroupInfoToCL(propName), propSize, pPropValue, pPropSizeRet)); + hKernel->get(), hDevice->get(), mapURKernelGroupInfoToCL(propName), + propSize, pPropValue, pPropSizeRet)); return UR_RESULT_SUCCESS; } @@ -201,9 +207,8 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, } cl_int Ret = clGetKernelSubGroupInfo( - cl_adapter::cast(hKernel), hDevice->get(), - mapURKernelSubGroupInfoToCL(propName), InputValueSize, InputValue.get(), - sizeof(size_t), &RetVal, pPropSizeRet); + hKernel->get(), hDevice->get(), mapURKernelSubGroupInfoToCL(propName), + InputValueSize, InputValue.get(), sizeof(size_t), &RetVal, pPropSizeRet); if (Ret == CL_INVALID_OPERATION) { // clGetKernelSubGroupInfo returns CL_INVALID_OPERATION if the device does @@ -252,13 +257,13 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, } UR_APIEXPORT ur_result_t UR_APICALL urKernelRetain(ur_kernel_handle_t hKernel) { - CL_RETURN_ON_FAILURE(clRetainKernel(cl_adapter::cast(hKernel))); + CL_RETURN_ON_FAILURE(clRetainKernel(hKernel->get())); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urKernelRelease(ur_kernel_handle_t hKernel) { - CL_RETURN_ON_FAILURE(clReleaseKernel(cl_adapter::cast(hKernel))); + CL_RETURN_ON_FAILURE(clReleaseKernel(hKernel->get())); return UR_RESULT_SUCCESS; } @@ -276,19 +281,18 @@ static ur_result_t usmSetIndirectAccess(ur_kernel_handle_t hKernel) { /* We test that each alloc type is supported before we actually try to set * KernelExecInfo. */ - CL_RETURN_ON_FAILURE(clGetKernelInfo(cl_adapter::cast(hKernel), - CL_KERNEL_CONTEXT, sizeof(cl_context), - &CLContext, nullptr)); + CL_RETURN_ON_FAILURE(clGetKernelInfo(hKernel->get(), CL_KERNEL_CONTEXT, + sizeof(cl_context), &CLContext, + nullptr)); UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache, cl_ext::HostMemAllocName, &HFunc)); if (HFunc) { - CL_RETURN_ON_FAILURE( - clSetKernelExecInfo(cl_adapter::cast(hKernel), - CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, - sizeof(cl_bool), &TrueVal)); + CL_RETURN_ON_FAILURE(clSetKernelExecInfo( + hKernel->get(), CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, + sizeof(cl_bool), &TrueVal)); } UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( @@ -296,10 +300,9 @@ static ur_result_t usmSetIndirectAccess(ur_kernel_handle_t hKernel) { cl_ext::DeviceMemAllocName, &DFunc)); if (DFunc) { - CL_RETURN_ON_FAILURE( - clSetKernelExecInfo(cl_adapter::cast(hKernel), - CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, - sizeof(cl_bool), &TrueVal)); + CL_RETURN_ON_FAILURE(clSetKernelExecInfo( + hKernel->get(), CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, + sizeof(cl_bool), &TrueVal)); } UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( @@ -307,10 +310,9 @@ static ur_result_t usmSetIndirectAccess(ur_kernel_handle_t hKernel) { cl_ext::SharedMemAllocName, &SFunc)); if (SFunc) { - CL_RETURN_ON_FAILURE( - clSetKernelExecInfo(cl_adapter::cast(hKernel), - CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, - sizeof(cl_bool), &TrueVal)); + CL_RETURN_ON_FAILURE(clSetKernelExecInfo( + hKernel->get(), CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, + sizeof(cl_bool), &TrueVal)); } return UR_RESULT_SUCCESS; } @@ -332,9 +334,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetExecInfo( return UR_RESULT_SUCCESS; } case UR_KERNEL_EXEC_INFO_USM_PTRS: { - CL_RETURN_ON_FAILURE(clSetKernelExecInfo( - cl_adapter::cast(hKernel), - CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL, propSize, pPropValue)); + CL_RETURN_ON_FAILURE(clSetKernelExecInfo(hKernel->get(), + CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL, + propSize, pPropValue)); return UR_RESULT_SUCCESS; } default: { @@ -348,9 +350,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( const ur_kernel_arg_pointer_properties_t *, const void *pArgValue) { cl_context CLContext; - CL_RETURN_ON_FAILURE(clGetKernelInfo(cl_adapter::cast(hKernel), - CL_KERNEL_CONTEXT, sizeof(cl_context), - &CLContext, nullptr)); + CL_RETURN_ON_FAILURE(clGetKernelInfo(hKernel->get(), CL_KERNEL_CONTEXT, + sizeof(cl_context), &CLContext, + nullptr)); clSetKernelArgMemPointerINTEL_fn FuncPtr = nullptr; UR_RETURN_ON_FAILURE( @@ -364,9 +366,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( * deref the arg to get the pointer value */ auto PtrToPtr = reinterpret_cast(pArgValue); auto DerefPtr = reinterpret_cast(*PtrToPtr); - CL_RETURN_ON_FAILURE(FuncPtr(cl_adapter::cast(hKernel), - cl_adapter::cast(argIndex), - DerefPtr)); + CL_RETURN_ON_FAILURE( + FuncPtr(hKernel->get(), cl_adapter::cast(argIndex), DerefPtr)); } return UR_RESULT_SUCCESS; @@ -374,15 +375,21 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle( ur_kernel_handle_t hKernel, ur_native_handle_t *phNativeKernel) { - *phNativeKernel = reinterpret_cast(hKernel); + *phNativeKernel = reinterpret_cast(hKernel->get()); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urKernelCreateWithNativeHandle( - ur_native_handle_t hNativeKernel, ur_context_handle_t, ur_program_handle_t, + ur_native_handle_t hNativeKernel, ur_context_handle_t hContext, + ur_program_handle_t hProgram, const ur_kernel_native_properties_t *pProperties, ur_kernel_handle_t *phKernel) { - *phKernel = reinterpret_cast(hNativeKernel); + cl_kernel NativeHandle = reinterpret_cast(hNativeKernel); + auto URKernel = + std::make_unique(NativeHandle, hProgram, hContext); + UR_RETURN_ON_FAILURE(URKernel->initWithNative()); + *phKernel = URKernel.release(); + if (!pProperties || !pProperties->isNativeHandleOwned) { return urKernelRetain(*phKernel); } @@ -394,7 +401,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( const ur_kernel_arg_mem_obj_properties_t *, ur_mem_handle_t hArgValue) { cl_mem CLArgValue = hArgValue ? hArgValue->get() : nullptr; - CL_RETURN_ON_FAILURE(clSetKernelArg(cl_adapter::cast(hKernel), + CL_RETURN_ON_FAILURE(clSetKernelArg(hKernel->get(), cl_adapter::cast(argIndex), sizeof(CLArgValue), &CLArgValue)); return UR_RESULT_SUCCESS; @@ -405,9 +412,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgSampler( const ur_kernel_arg_sampler_properties_t *, ur_sampler_handle_t hArgValue) { cl_sampler CLArgSampler = hArgValue->get(); - cl_int RetErr = clSetKernelArg(cl_adapter::cast(hKernel), - cl_adapter::cast(argIndex), - sizeof(CLArgSampler), &CLArgSampler); + cl_int RetErr = + clSetKernelArg(hKernel->get(), cl_adapter::cast(argIndex), + sizeof(CLArgSampler), &CLArgSampler); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/kernel.hpp b/source/adapters/opencl/kernel.hpp new file mode 100644 index 0000000000..3323fb68c7 --- /dev/null +++ b/source/adapters/opencl/kernel.hpp @@ -0,0 +1,54 @@ +//===--------- kernel.hpp - OpenCL Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include "common.hpp" +#include "context.hpp" + +#include + +struct ur_kernel_handle_t_ { + using native_type = cl_kernel; + native_type Kernel; + ur_program_handle_t Program; + ur_context_handle_t Context; + + ur_kernel_handle_t_(native_type Kernel, ur_program_handle_t Program, + ur_context_handle_t Context) + : Kernel(Kernel), Program(Program), Context(Context) {} + + ~ur_kernel_handle_t_() {} + + ur_result_t initWithNative() { + if (!Program) { + cl_program CLProgram; + CL_RETURN_ON_FAILURE(clGetKernelInfo( + Kernel, CL_KERNEL_PROGRAM, sizeof(CLProgram), &CLProgram, nullptr)); + ur_native_handle_t NativeProgram = + reinterpret_cast(CLProgram); + UR_RETURN_ON_FAILURE(urProgramCreateWithNativeHandle( + NativeProgram, nullptr, nullptr, &Program)); + } + cl_context CLContext; + CL_RETURN_ON_FAILURE(clGetKernelInfo( + Kernel, CL_KERNEL_CONTEXT, sizeof(CLContext), &CLContext, nullptr)); + if (!Context) { + ur_native_handle_t NativeContext = + reinterpret_cast(CLContext); + UR_RETURN_ON_FAILURE(urContextCreateWithNativeHandle( + NativeContext, 0, nullptr, nullptr, &Context)); + } else if (Context->get() != CLContext) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + return UR_RESULT_SUCCESS; + } + + native_type get() { return Kernel; } +}; diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp index b5dacbe5eb..fcc4754374 100644 --- a/source/adapters/opencl/memory.cpp +++ b/source/adapters/opencl/memory.cpp @@ -226,8 +226,8 @@ cl_map_flags convertURMemFlagsToCL(ur_mem_flags_t URFlags) { UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( ur_context_handle_t hContext, ur_mem_flags_t flags, size_t size, const ur_buffer_properties_t *pProperties, ur_mem_handle_t *phBuffer) { - cl_int RetErr = CL_INVALID_OPERATION; + UR_RETURN_ON_FAILURE(urContextRetain(hContext)); if (pProperties) { // TODO: need to check if all properties are supported by OpenCL RT and // ignore unsupported diff --git a/source/adapters/opencl/platform.cpp b/source/adapters/opencl/platform.cpp index ddeb276870..8fa7056bcb 100644 --- a/source/adapters/opencl/platform.cpp +++ b/source/adapters/opencl/platform.cpp @@ -87,7 +87,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, ur_platform_handle_t *phPlatforms, uint32_t *pNumPlatforms) { - static std::vector> URPlatforms; + static std::vector URPlatforms; static std::once_flag InitFlag; static uint32_t NumPlatforms = 0; cl_int Result = CL_SUCCESS; @@ -105,10 +105,10 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, if (Result != CL_SUCCESS) { return Result; } - URPlatforms.resize(NumPlatforms); for (uint32_t i = 0; i < NumPlatforms; i++) { - URPlatforms[i] = + auto URPlatform = std::make_unique(CLPlatforms[i]); + URPlatforms.emplace_back(URPlatform.release()); } return Result; }, @@ -126,7 +126,7 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, } if (NumEntries && phPlatforms) { for (uint32_t i = 0; i < NumEntries; i++) { - phPlatforms[i] = URPlatforms[i].get(); + phPlatforms[i] = URPlatforms[i]; } } return mapCLErrorToUR(Result); diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index 1ab6fb35ee..0d47d62457 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -358,6 +358,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithNativeHandle( auto URProgram = std::make_unique(NativeHandle, hContext); + UR_RETURN_ON_FAILURE(URProgram->initWithNative()); *phProgram = URProgram.release(); if (!pProperties || !pProperties->isNativeHandleOwned) { return urProgramRetain(*phProgram); diff --git a/source/adapters/opencl/program.hpp b/source/adapters/opencl/program.hpp index 84f486b7e8..5c40cdc0b2 100644 --- a/source/adapters/opencl/program.hpp +++ b/source/adapters/opencl/program.hpp @@ -23,5 +23,18 @@ struct ur_program_handle_t_ { ~ur_program_handle_t_() {} + ur_result_t initWithNative() { + if (!Context) { + cl_context CLContext; + CL_RETURN_ON_FAILURE(clGetProgramInfo( + Program, CL_PROGRAM_CONTEXT, sizeof(CLContext), &CLContext, nullptr)); + ur_native_handle_t NativeContext = + reinterpret_cast(CLContext); + UR_RETURN_ON_FAILURE(urContextCreateWithNativeHandle( + NativeContext, 0, nullptr, nullptr, &Context)); + } + return UR_RESULT_SUCCESS; + } + native_type get() { return Program; } }; diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index 241936a6ad..ea541b0cb9 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -559,7 +559,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo( default: return UR_RESULT_ERROR_INVALID_VALUE; } - + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + if (propName == UR_USM_ALLOC_INFO_DEVICE) { + return ReturnValue(Context->Devices[0]); + } size_t CheckPropSize = 0; cl_int ClErr = GetMemAllocInfo(Context->get(), pMem, PropNameCL, propSize, pPropValue, &CheckPropSize); From 31fc8f637f40127e35a4831ee3c32100ec603037 Mon Sep 17 00:00:00 2001 From: omarahmed1111 Date: Mon, 8 Jan 2024 14:16:12 +0000 Subject: [PATCH 13/19] Refactor makeWithNative --- source/adapters/opencl/context.cpp | 4 ++-- source/adapters/opencl/context.hpp | 22 +++++++++++++++------- source/adapters/opencl/kernel.cpp | 7 +++---- source/adapters/opencl/kernel.hpp | 22 +++++++++++++++------- source/adapters/opencl/memory.cpp | 12 +++++------- source/adapters/opencl/memory.hpp | 12 ++++++++---- source/adapters/opencl/program.cpp | 6 ++---- source/adapters/opencl/program.hpp | 14 ++++++++++---- source/adapters/opencl/queue.cpp | 7 +++---- source/adapters/opencl/queue.hpp | 21 ++++++++++++++------- 10 files changed, 77 insertions(+), 50 deletions(-) diff --git a/source/adapters/opencl/context.cpp b/source/adapters/opencl/context.cpp index 82f69e251b..9a0786ca57 100644 --- a/source/adapters/opencl/context.cpp +++ b/source/adapters/opencl/context.cpp @@ -144,8 +144,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle( cl_context NativeHandle = reinterpret_cast(hNativeContext); auto URContext = std::make_unique( NativeHandle, numDevices, phDevices); - UR_RETURN_ON_FAILURE(URContext->initWithNative()); - *phContext = URContext.release(); + UR_RETURN_ON_FAILURE(ur_context_handle_t_::makeWithNative( + NativeHandle, numDevices, phDevices, *phContext)); if (!pProperties || !pProperties->isNativeHandleOwned) { return clRetainContext(NativeHandle); } diff --git a/source/adapters/opencl/context.hpp b/source/adapters/opencl/context.hpp index e08501cb8a..5f6c186db3 100644 --- a/source/adapters/opencl/context.hpp +++ b/source/adapters/opencl/context.hpp @@ -33,23 +33,31 @@ struct ur_context_handle_t_ { Devices.emplace_back(phDevices[i]); } } - ur_result_t initWithNative() { + + static ur_result_t makeWithNative(native_type Ctx, uint32_t DevCount, + const ur_device_handle_t *phDevices, + ur_context_handle_t &Context) { + auto URContext = + std::make_unique(Ctx, DevCount, phDevices); + native_type &NativeContext = URContext->Context; + uint32_t &DeviceCount = URContext->DeviceCount; if (!DeviceCount) { - CL_RETURN_ON_FAILURE(clGetContextInfo(Context, CL_CONTEXT_NUM_DEVICES, - sizeof(DeviceCount), &DeviceCount, - nullptr)); + CL_RETURN_ON_FAILURE( + clGetContextInfo(NativeContext, CL_CONTEXT_NUM_DEVICES, + sizeof(DeviceCount), &DeviceCount, nullptr)); std::vector CLDevices(DeviceCount); - CL_RETURN_ON_FAILURE(clGetContextInfo(Context, CL_CONTEXT_DEVICES, + CL_RETURN_ON_FAILURE(clGetContextInfo(NativeContext, CL_CONTEXT_DEVICES, sizeof(CLDevices), CLDevices.data(), nullptr)); - Devices.resize(DeviceCount); + URContext->Devices.resize(DeviceCount); for (uint32_t i = 0; i < DeviceCount; i++) { ur_native_handle_t NativeDevice = reinterpret_cast(CLDevices[i]); UR_RETURN_ON_FAILURE(urDeviceCreateWithNativeHandle( - NativeDevice, nullptr, nullptr, &Devices[i])); + NativeDevice, nullptr, nullptr, &(URContext->Devices[i]))); } } + Context = URContext.release(); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/kernel.cpp b/source/adapters/opencl/kernel.cpp index 8993ee693f..51fa78dc2a 100644 --- a/source/adapters/opencl/kernel.cpp +++ b/source/adapters/opencl/kernel.cpp @@ -385,10 +385,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelCreateWithNativeHandle( const ur_kernel_native_properties_t *pProperties, ur_kernel_handle_t *phKernel) { cl_kernel NativeHandle = reinterpret_cast(hNativeKernel); - auto URKernel = - std::make_unique(NativeHandle, hProgram, hContext); - UR_RETURN_ON_FAILURE(URKernel->initWithNative()); - *phKernel = URKernel.release(); + + UR_RETURN_ON_FAILURE(ur_kernel_handle_t_::makeWithNative( + NativeHandle, hProgram, hContext, *phKernel)); if (!pProperties || !pProperties->isNativeHandleOwned) { return urKernelRetain(*phKernel); diff --git a/source/adapters/opencl/kernel.hpp b/source/adapters/opencl/kernel.hpp index 3323fb68c7..37e60e74b7 100644 --- a/source/adapters/opencl/kernel.hpp +++ b/source/adapters/opencl/kernel.hpp @@ -26,27 +26,35 @@ struct ur_kernel_handle_t_ { ~ur_kernel_handle_t_() {} - ur_result_t initWithNative() { + static ur_result_t makeWithNative(native_type NativeKernel, + ur_program_handle_t Program, + ur_context_handle_t Context, + ur_kernel_handle_t &Kernel) { + auto URKernel = + std::make_unique(NativeKernel, Program, Context); if (!Program) { cl_program CLProgram; - CL_RETURN_ON_FAILURE(clGetKernelInfo( - Kernel, CL_KERNEL_PROGRAM, sizeof(CLProgram), &CLProgram, nullptr)); + CL_RETURN_ON_FAILURE(clGetKernelInfo(NativeKernel, CL_KERNEL_PROGRAM, + sizeof(CLProgram), &CLProgram, + nullptr)); ur_native_handle_t NativeProgram = reinterpret_cast(CLProgram); UR_RETURN_ON_FAILURE(urProgramCreateWithNativeHandle( - NativeProgram, nullptr, nullptr, &Program)); + NativeProgram, nullptr, nullptr, &(URKernel->Program))); } cl_context CLContext; - CL_RETURN_ON_FAILURE(clGetKernelInfo( - Kernel, CL_KERNEL_CONTEXT, sizeof(CLContext), &CLContext, nullptr)); + CL_RETURN_ON_FAILURE(clGetKernelInfo(NativeKernel, CL_KERNEL_CONTEXT, + sizeof(CLContext), &CLContext, + nullptr)); if (!Context) { ur_native_handle_t NativeContext = reinterpret_cast(CLContext); UR_RETURN_ON_FAILURE(urContextCreateWithNativeHandle( - NativeContext, 0, nullptr, nullptr, &Context)); + NativeContext, 0, nullptr, nullptr, &(URKernel->Context))); } else if (Context->get() != CLContext) { return UR_RESULT_ERROR_INVALID_CONTEXT; } + Kernel = URKernel.release(); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp index fcc4754374..3af3e32450 100644 --- a/source/adapters/opencl/memory.cpp +++ b/source/adapters/opencl/memory.cpp @@ -227,7 +227,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( ur_context_handle_t hContext, ur_mem_flags_t flags, size_t size, const ur_buffer_properties_t *pProperties, ur_mem_handle_t *phBuffer) { cl_int RetErr = CL_INVALID_OPERATION; - UR_RETURN_ON_FAILURE(urContextRetain(hContext)); + // UR_RETURN_ON_FAILURE(urContextRetain(hContext)); if (pProperties) { // TODO: need to check if all properties are supported by OpenCL RT and // ignore unsupported @@ -348,9 +348,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( ur_native_handle_t hNativeMem, ur_context_handle_t hContext, const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem) { cl_mem NativeHandle = reinterpret_cast(hNativeMem); - auto URMem = std::make_unique(NativeHandle, hContext); - UR_RETURN_ON_FAILURE(URMem->initWithNative()); - *phMem = URMem.release(); + UR_RETURN_ON_FAILURE( + ur_mem_handle_t_::makeWithNative(NativeHandle, hContext, *phMem)); if (!pProperties || !pProperties->isNativeHandleOwned) { return urMemRetain(*phMem); } @@ -363,9 +362,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( [[maybe_unused]] const ur_image_desc_t *pImageDesc, const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem) { cl_mem NativeHandle = reinterpret_cast(hNativeMem); - auto URMem = std::make_unique(NativeHandle, hContext); - UR_RETURN_ON_FAILURE(URMem->initWithNative()); - *phMem = URMem.release(); + UR_RETURN_ON_FAILURE( + ur_mem_handle_t_::makeWithNative(NativeHandle, hContext, *phMem)); if (!pProperties || !pProperties->isNativeHandleOwned) { return urMemRetain(*phMem); } diff --git a/source/adapters/opencl/memory.hpp b/source/adapters/opencl/memory.hpp index bd3a4c994d..ff4cddec3e 100644 --- a/source/adapters/opencl/memory.hpp +++ b/source/adapters/opencl/memory.hpp @@ -23,16 +23,20 @@ struct ur_mem_handle_t_ { ~ur_mem_handle_t_() {} - ur_result_t initWithNative() { - if (!Context) { + static ur_result_t makeWithNative(native_type NativeMem, + ur_context_handle_t Ctx, + ur_mem_handle_t &Mem) { + auto URMem = std::make_unique(NativeMem, Ctx); + if (!Ctx) { cl_context CLContext; CL_RETURN_ON_FAILURE(clGetMemObjectInfo( - Memory, CL_MEM_CONTEXT, sizeof(CLContext), &CLContext, nullptr)); + NativeMem, CL_MEM_CONTEXT, sizeof(CLContext), &CLContext, nullptr)); ur_native_handle_t NativeContext = reinterpret_cast(CLContext); UR_RETURN_ON_FAILURE(urContextCreateWithNativeHandle( - NativeContext, 0, nullptr, nullptr, &Context)); + NativeContext, 0, nullptr, nullptr, &(URMem->Context))); } + Mem = URMem.release(); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index 0d47d62457..33561a02bc 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -356,10 +356,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithNativeHandle( ur_program_handle_t *phProgram) { cl_program NativeHandle = reinterpret_cast(hNativeProgram); - auto URProgram = - std::make_unique(NativeHandle, hContext); - UR_RETURN_ON_FAILURE(URProgram->initWithNative()); - *phProgram = URProgram.release(); + UR_RETURN_ON_FAILURE( + ur_program_handle_t_::makeWithNative(NativeHandle, hContext, *phProgram)); if (!pProperties || !pProperties->isNativeHandleOwned) { return urProgramRetain(*phProgram); } diff --git a/source/adapters/opencl/program.hpp b/source/adapters/opencl/program.hpp index 5c40cdc0b2..63a0f5bac0 100644 --- a/source/adapters/opencl/program.hpp +++ b/source/adapters/opencl/program.hpp @@ -23,16 +23,22 @@ struct ur_program_handle_t_ { ~ur_program_handle_t_() {} - ur_result_t initWithNative() { + static ur_result_t makeWithNative(native_type NativeProg, + ur_context_handle_t Context, + ur_program_handle_t &Program) { + auto URProgram = + std::make_unique(NativeProg, Context); if (!Context) { cl_context CLContext; - CL_RETURN_ON_FAILURE(clGetProgramInfo( - Program, CL_PROGRAM_CONTEXT, sizeof(CLContext), &CLContext, nullptr)); + CL_RETURN_ON_FAILURE(clGetProgramInfo(NativeProg, CL_PROGRAM_CONTEXT, + sizeof(CLContext), &CLContext, + nullptr)); ur_native_handle_t NativeContext = reinterpret_cast(CLContext); UR_RETURN_ON_FAILURE(urContextCreateWithNativeHandle( - NativeContext, 0, nullptr, nullptr, &Context)); + NativeContext, 0, nullptr, nullptr, &(URProgram->Context))); } + Program = URProgram.release(); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/queue.cpp b/source/adapters/opencl/queue.cpp index 4e387d7dbe..c21006fe90 100644 --- a/source/adapters/opencl/queue.cpp +++ b/source/adapters/opencl/queue.cpp @@ -169,10 +169,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( cl_command_queue NativeHandle = reinterpret_cast(hNativeQueue); - auto URQueue = - std::make_unique(NativeHandle, hContext, hDevice); - UR_RETURN_ON_FAILURE(URQueue->initWithNative()); - *phQueue = URQueue.release(); + + UR_RETURN_ON_FAILURE(ur_queue_handle_t_::makeWithNative( + NativeHandle, hContext, hDevice, *phQueue)); CL_RETURN_ON_FAILURE(clRetainCommandQueue(NativeHandle)); diff --git a/source/adapters/opencl/queue.hpp b/source/adapters/opencl/queue.hpp index 4918fa0f31..80d620d1e4 100644 --- a/source/adapters/opencl/queue.hpp +++ b/source/adapters/opencl/queue.hpp @@ -23,25 +23,32 @@ struct ur_queue_handle_t_ { ur_device_handle_t Dev) : Queue(Queue), Context(Ctx), Device(Dev) {} - ur_result_t initWithNative() { + static ur_result_t makeWithNative(native_type NativeQueue, + ur_context_handle_t Context, + ur_device_handle_t Device, + ur_queue_handle_t &Queue) { + auto URQueue = + std::make_unique(NativeQueue, Context, Device); if (!Context) { cl_context CLContext; - CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( - Queue, CL_QUEUE_CONTEXT, sizeof(CLContext), &CLContext, nullptr)); + CL_RETURN_ON_FAILURE(clGetCommandQueueInfo(NativeQueue, CL_QUEUE_CONTEXT, + sizeof(CLContext), &CLContext, + nullptr)); ur_native_handle_t NativeContext = reinterpret_cast(CLContext); UR_RETURN_ON_FAILURE(urContextCreateWithNativeHandle( - NativeContext, 0, nullptr, nullptr, &Context)); + NativeContext, 0, nullptr, nullptr, &(URQueue->Context))); } if (!Device) { cl_device_id CLDevice; CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( - Queue, CL_QUEUE_DEVICE, sizeof(CLDevice), &CLDevice, nullptr)); + NativeQueue, CL_QUEUE_DEVICE, sizeof(CLDevice), &CLDevice, nullptr)); ur_native_handle_t NativeDevice = reinterpret_cast(CLDevice); - UR_RETURN_ON_FAILURE(urDeviceCreateWithNativeHandle(NativeDevice, nullptr, - nullptr, &Device)); + UR_RETURN_ON_FAILURE(urDeviceCreateWithNativeHandle( + NativeDevice, nullptr, nullptr, &(URQueue->Device))); } + Queue = URQueue.release(); return UR_RESULT_SUCCESS; } From 6b85ca6f589f1db9d93e6316d02087012f3a1365 Mon Sep 17 00:00:00 2001 From: omarahmed1111 Date: Mon, 15 Jan 2024 15:36:13 +0000 Subject: [PATCH 14/19] Move platform and device version query to the handle --- source/adapters/opencl/context.cpp | 18 ------- source/adapters/opencl/context.hpp | 6 --- source/adapters/opencl/device.cpp | 81 +++++++---------------------- source/adapters/opencl/device.hpp | 46 +++++++++++++--- source/adapters/opencl/memory.cpp | 2 +- source/adapters/opencl/platform.cpp | 19 ------- source/adapters/opencl/platform.hpp | 23 ++++++-- source/adapters/opencl/program.cpp | 33 ++++++------ source/adapters/opencl/queue.cpp | 6 +-- 9 files changed, 93 insertions(+), 141 deletions(-) diff --git a/source/adapters/opencl/context.cpp b/source/adapters/opencl/context.cpp index 9a0786ca57..b68b8cedcd 100644 --- a/source/adapters/opencl/context.cpp +++ b/source/adapters/opencl/context.cpp @@ -14,24 +14,6 @@ #include #include -ur_result_t cl_adapter::getDevicesFromContext( - ur_context_handle_t hContext, - std::unique_ptr> &DevicesInCtx) { - - cl_uint DeviceCount = hContext->DeviceCount; - - if (DeviceCount < 1) { - return UR_RESULT_ERROR_INVALID_CONTEXT; - } - - DevicesInCtx = std::make_unique>(DeviceCount); - for (size_t i = 0; i < DeviceCount; i++) { - (*DevicesInCtx)[i] = hContext->Devices[i]->get(); - } - - return UR_RESULT_SUCCESS; -} - UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( uint32_t DeviceCount, const ur_device_handle_t *phDevices, const ur_context_properties_t *, ur_context_handle_t *phContext) { diff --git a/source/adapters/opencl/context.hpp b/source/adapters/opencl/context.hpp index 5f6c186db3..ab1d38f518 100644 --- a/source/adapters/opencl/context.hpp +++ b/source/adapters/opencl/context.hpp @@ -14,12 +14,6 @@ #include -namespace cl_adapter { -ur_result_t -getDevicesFromContext(ur_context_handle_t hContext, - std::unique_ptr> &DevicesInCtx); -} - struct ur_context_handle_t_ { using native_type = cl_context; native_type Context; diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index 021b271211..1a2d3d5991 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -12,46 +12,6 @@ #include -ur_result_t cl_adapter::getDeviceVersion(cl_device_id Dev, - oclv::OpenCLVersion &Version) { - - size_t DevVerSize = 0; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(Dev, CL_DEVICE_VERSION, 0, nullptr, &DevVerSize)); - - std::string DevVer(DevVerSize, '\0'); - CL_RETURN_ON_FAILURE(clGetDeviceInfo(Dev, CL_DEVICE_VERSION, DevVerSize, - DevVer.data(), nullptr)); - - Version = oclv::OpenCLVersion(DevVer); - if (!Version.isValid()) { - return UR_RESULT_ERROR_INVALID_DEVICE; - } - - return UR_RESULT_SUCCESS; -} - -ur_result_t cl_adapter::checkDeviceExtensions( - cl_device_id Dev, const std::vector &Exts, bool &Supported) { - size_t ExtSize = 0; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize)); - - std::string ExtStr(ExtSize, '\0'); - - CL_RETURN_ON_FAILURE(clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, ExtSize, - ExtStr.data(), nullptr)); - - Supported = true; - for (const std::string &Ext : Exts) { - if (!(Supported = (ExtStr.find(Ext) != std::string::npos))) { - break; - } - } - - return UR_RESULT_SUCCESS; -} - UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform, ur_device_type_t DeviceType, [[maybe_unused]] uint32_t NumEntries, ur_device_handle_t *phDevices, @@ -351,8 +311,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_DEVICE_ID: { bool Supported = false; - CL_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( - hDevice->get(), {"cl_khr_pci_bus_info"}, Supported)); + CL_RETURN_ON_FAILURE( + hDevice->checkDeviceExtensions({"cl_khr_pci_bus_info"}, Supported)); if (!Supported) { return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; @@ -367,7 +327,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_BACKEND_RUNTIME_VERSION: { oclv::OpenCLVersion Version; - CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion(hDevice->get(), Version)); + CL_RETURN_ON_FAILURE(hDevice->getDeviceVersion(Version)); const std::string Results = std::to_string(Version.getMajor()) + "." + std::to_string(Version.getMinor()); @@ -470,7 +430,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, /* Corresponding OpenCL query is only available starting with OpenCL 2.1 * and we have to emulate it on older OpenCL runtimes. */ oclv::OpenCLVersion DevVer; - CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion(hDevice->get(), DevVer)); + CL_RETURN_ON_FAILURE(hDevice->getDeviceVersion(DevVer)); if (DevVer >= oclv::V2_1) { cl_uint CLValue; @@ -498,9 +458,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, /* CL type: cl_device_fp_config * UR type: ur_device_fp_capability_flags_t */ if (propName == UR_DEVICE_INFO_HALF_FP_CONFIG) { - bool Supported; - CL_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( - hDevice->get(), {"cl_khr_fp16"}, Supported)); + bool Supported = false; + CL_RETURN_ON_FAILURE( + hDevice->checkDeviceExtensions({"cl_khr_fp16"}, Supported)); if (!Supported) { return UR_RESULT_ERROR_INVALID_ENUMERATION; @@ -519,7 +479,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, /* This query is missing before OpenCL 3.0. Check version and handle * appropriately */ oclv::OpenCLVersion DevVer; - CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion(hDevice->get(), DevVer)); + CL_RETURN_ON_FAILURE(hDevice->getDeviceVersion(DevVer)); /* Minimum required capability to be returned. For OpenCL 1.2, this is all * that is required */ @@ -576,7 +536,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP; oclv::OpenCLVersion DevVer; - CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion(hDevice->get(), DevVer)); + CL_RETURN_ON_FAILURE(hDevice->getDeviceVersion(DevVer)); cl_device_atomic_capabilities CLCapabilities; if (DevVer >= oclv::V3_0) { @@ -627,7 +587,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL; oclv::OpenCLVersion DevVer; - CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion(hDevice->get(), DevVer)); + CL_RETURN_ON_FAILURE(hDevice->getDeviceVersion(DevVer)); cl_device_atomic_capabilities CLCapabilities; if (DevVer >= oclv::V3_0) { @@ -674,7 +634,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP; oclv::OpenCLVersion DevVer; - CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion(hDevice->get(), DevVer)); + CL_RETURN_ON_FAILURE(hDevice->getDeviceVersion(DevVer)); cl_device_atomic_capabilities CLCapabilities; if (DevVer >= oclv::V3_0) { @@ -725,8 +685,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_ATOMIC_64: { bool Supported = false; - CL_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( - hDevice->get(), + CL_RETURN_ON_FAILURE(hDevice->checkDeviceExtensions( {"cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics"}, Supported)); @@ -743,8 +702,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_MEM_CHANNEL_SUPPORT: { bool Supported = false; - CL_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( - hDevice->get(), {"cl_intel_mem_channel_property"}, Supported)); + CL_RETURN_ON_FAILURE(hDevice->checkDeviceExtensions( + {"cl_intel_mem_channel_property"}, Supported)); return ReturnValue(Supported); } @@ -769,8 +728,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_HOST_PIPE_READ_WRITE_SUPPORTED: { bool Supported = false; - CL_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( - hDevice->get(), {"cl_intel_program_scope_host_pipe"}, Supported)); + CL_RETURN_ON_FAILURE(hDevice->checkDeviceExtensions( + {"cl_intel_program_scope_host_pipe"}, Supported)); return ReturnValue(Supported); } case UR_DEVICE_INFO_QUEUE_PROPERTIES: @@ -1086,18 +1045,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetGlobalTimestamps( ur_device_handle_t hDevice, uint64_t *pDeviceTimestamp, uint64_t *pHostTimestamp) { oclv::OpenCLVersion DevVer, PlatVer; - cl_platform_id Platform; cl_device_id DeviceId = hDevice->get(); // TODO: Cache OpenCL version for each device and platform - auto RetErr = clGetDeviceInfo(DeviceId, CL_DEVICE_PLATFORM, - sizeof(cl_platform_id), &Platform, nullptr); - CL_RETURN_ON_FAILURE(RetErr); - RetErr = cl_adapter::getDeviceVersion(DeviceId, DevVer); + auto RetErr = hDevice->getDeviceVersion(DevVer); CL_RETURN_ON_FAILURE(RetErr); - RetErr = cl_adapter::getPlatformVersion(Platform, PlatVer); + RetErr = hDevice->Platform->getPlatformVersion(PlatVer); if (PlatVer < oclv::V2_1 || DevVer < oclv::V2_1) { return UR_RESULT_ERROR_INVALID_OPERATION; diff --git a/source/adapters/opencl/device.hpp b/source/adapters/opencl/device.hpp index 2c4c97193b..f538ce538b 100644 --- a/source/adapters/opencl/device.hpp +++ b/source/adapters/opencl/device.hpp @@ -12,14 +12,6 @@ #include "common.hpp" #include "platform.hpp" -namespace cl_adapter { -ur_result_t getDeviceVersion(cl_device_id Dev, oclv::OpenCLVersion &Version); - -ur_result_t checkDeviceExtensions(cl_device_id Dev, - const std::vector &Exts, - bool &Supported); -} // namespace cl_adapter - struct ur_device_handle_t_ { using native_type = cl_device_id; native_type Device; @@ -41,4 +33,42 @@ struct ur_device_handle_t_ { ~ur_device_handle_t_() {} native_type get() { return Device; } + + ur_result_t getDeviceVersion(oclv::OpenCLVersion &Version) { + size_t DevVerSize = 0; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(Device, CL_DEVICE_VERSION, 0, nullptr, &DevVerSize)); + + std::string DevVer(DevVerSize, '\0'); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(Device, CL_DEVICE_VERSION, DevVerSize, + DevVer.data(), nullptr)); + + Version = oclv::OpenCLVersion(DevVer); + if (!Version.isValid()) { + return UR_RESULT_ERROR_INVALID_DEVICE; + } + + return UR_RESULT_SUCCESS; + } + + ur_result_t checkDeviceExtensions(const std::vector &Exts, + bool &Supported) { + size_t ExtSize = 0; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(Device, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize)); + + std::string ExtStr(ExtSize, '\0'); + + CL_RETURN_ON_FAILURE(clGetDeviceInfo(Device, CL_DEVICE_EXTENSIONS, ExtSize, + ExtStr.data(), nullptr)); + + Supported = true; + for (const std::string &Ext : Exts) { + if (!(Supported = (ExtStr.find(Ext) != std::string::npos))) { + break; + } + } + + return UR_RESULT_SUCCESS; + } }; \ No newline at end of file diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp index 3af3e32450..41d43ccc5a 100644 --- a/source/adapters/opencl/memory.cpp +++ b/source/adapters/opencl/memory.cpp @@ -227,7 +227,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( ur_context_handle_t hContext, ur_mem_flags_t flags, size_t size, const ur_buffer_properties_t *pProperties, ur_mem_handle_t *phBuffer) { cl_int RetErr = CL_INVALID_OPERATION; - // UR_RETURN_ON_FAILURE(urContextRetain(hContext)); + UR_RETURN_ON_FAILURE(urContextRetain(hContext)); if (pProperties) { // TODO: need to check if all properties are supported by OpenCL RT and // ignore unsupported diff --git a/source/adapters/opencl/platform.cpp b/source/adapters/opencl/platform.cpp index 8fa7056bcb..a6d2659199 100644 --- a/source/adapters/opencl/platform.cpp +++ b/source/adapters/opencl/platform.cpp @@ -10,25 +10,6 @@ #include "platform.hpp" -ur_result_t cl_adapter::getPlatformVersion(cl_platform_id Plat, - oclv::OpenCLVersion &Version) { - - size_t PlatVerSize = 0; - CL_RETURN_ON_FAILURE( - clGetPlatformInfo(Plat, CL_PLATFORM_VERSION, 0, nullptr, &PlatVerSize)); - - std::string PlatVer(PlatVerSize, '\0'); - CL_RETURN_ON_FAILURE(clGetPlatformInfo(Plat, CL_PLATFORM_VERSION, PlatVerSize, - PlatVer.data(), nullptr)); - - Version = oclv::OpenCLVersion(PlatVer); - if (!Version.isValid()) { - return UR_RESULT_ERROR_INVALID_PLATFORM; - } - - return UR_RESULT_SUCCESS; -} - static cl_int mapURPlatformInfoToCL(ur_platform_info_t URPropName) { switch (URPropName) { diff --git a/source/adapters/opencl/platform.hpp b/source/adapters/opencl/platform.hpp index 0957f4562d..16fe833fab 100644 --- a/source/adapters/opencl/platform.hpp +++ b/source/adapters/opencl/platform.hpp @@ -14,11 +14,6 @@ #include -namespace cl_adapter { -ur_result_t getPlatformVersion(cl_platform_id Plat, - oclv::OpenCLVersion &Version); -} // namespace cl_adapter - struct ur_platform_handle_t_ { using native_type = cl_platform_id; native_type Platform = nullptr; @@ -63,4 +58,22 @@ struct ur_platform_handle_t_ { return UR_RESULT_SUCCESS; } + + ur_result_t getPlatformVersion(oclv::OpenCLVersion &Version) { + + size_t PlatVerSize = 0; + CL_RETURN_ON_FAILURE(clGetPlatformInfo(Platform, CL_PLATFORM_VERSION, 0, + nullptr, &PlatVerSize)); + + std::string PlatVer(PlatVerSize, '\0'); + CL_RETURN_ON_FAILURE(clGetPlatformInfo( + Platform, CL_PLATFORM_VERSION, PlatVerSize, PlatVer.data(), nullptr)); + + Version = oclv::OpenCLVersion(PlatVer); + if (!Version.isValid()) { + return UR_RESULT_ERROR_INVALID_PLATFORM; + } + + return UR_RESULT_SUCCESS; + } }; diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index 33561a02bc..9ef0bd4722 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -14,6 +14,8 @@ #include "device.hpp" #include "platform.hpp" +#include + static ur_result_t getDevicesFromProgram( ur_program_handle_t hProgram, std::unique_ptr> &DevicesInProgram) { @@ -36,22 +38,21 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( if (!hContext->DeviceCount || !hContext->Devices[0]->Platform) { return UR_RESULT_ERROR_INVALID_CONTEXT; } - cl_platform_id CurPlatform = hContext->Devices[0]->Platform->get(); + ur_platform_handle_t CurPlatform = hContext->Devices[0]->Platform; oclv::OpenCLVersion PlatVer; - CL_RETURN_ON_FAILURE_AND_SET_NULL( - cl_adapter::getPlatformVersion(CurPlatform, PlatVer), phProgram); + CL_RETURN_ON_FAILURE_AND_SET_NULL(CurPlatform->getPlatformVersion(PlatVer), + phProgram); cl_int Err = CL_SUCCESS; if (PlatVer >= oclv::V2_1) { /* Make sure all devices support CL 2.1 or newer as well. */ for (ur_device_handle_t URDev : hContext->Devices) { - cl_device_id Dev = URDev->get(); oclv::OpenCLVersion DevVer; - CL_RETURN_ON_FAILURE_AND_SET_NULL( - cl_adapter::getDeviceVersion(Dev, DevVer), phProgram); + CL_RETURN_ON_FAILURE_AND_SET_NULL(URDev->getDeviceVersion(DevVer), + phProgram); /* If the device does not support CL 2.1 or greater, we need to make sure * it supports the cl_khr_il_program extension. @@ -59,8 +60,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( if (DevVer < oclv::V2_1) { bool Supported = false; CL_RETURN_ON_FAILURE_AND_SET_NULL( - cl_adapter::checkDeviceExtensions(Dev, {"cl_khr_il_program"}, - Supported), + URDev->checkDeviceExtensions({"cl_khr_il_program"}, Supported), phProgram); if (!Supported) { @@ -80,11 +80,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( * support the cl_khr_il_program extension. */ for (ur_device_handle_t URDev : hContext->Devices) { - cl_device_id Dev = URDev->get(); bool Supported = false; CL_RETURN_ON_FAILURE_AND_SET_NULL( - cl_adapter::checkDeviceExtensions(Dev, {"cl_khr_il_program"}, - Supported), + URDev->checkDeviceExtensions({"cl_khr_il_program"}, Supported), phProgram); if (!Supported) { @@ -96,7 +94,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( cl_program(CL_API_CALL *)(cl_context, const void *, size_t, cl_int *); ApiFuncT FuncPtr = reinterpret_cast(clGetExtensionFunctionAddressForPlatform( - CurPlatform, "clCreateProgramWithILKHR")); + CurPlatform->get(), "clCreateProgramWithILKHR")); assert(FuncPtr != nullptr); cl_program Program = FuncPtr(hContext->get(), pIL, length, &Err); @@ -377,22 +375,21 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( return UR_RESULT_ERROR_INVALID_CONTEXT; } - std::unique_ptr> DevicesInCtx; - UR_RETURN_ON_FAILURE(cl_adapter::getDevicesFromContext(Ctx, DevicesInCtx)); + std::vector &DevicesInCtx = Ctx->Devices; - cl_platform_id CurPlatform = Ctx->Devices[0]->Platform->get(); + ur_platform_handle_t CurPlatform = Ctx->Devices[0]->Platform; oclv::OpenCLVersion PlatVer; - cl_adapter::getPlatformVersion(CurPlatform, PlatVer); + CurPlatform->getPlatformVersion(PlatVer); bool UseExtensionLookup = false; if (PlatVer < oclv::V2_2) { UseExtensionLookup = true; } else { - for (cl_device_id Dev : *DevicesInCtx) { + for (ur_device_handle_t Dev : DevicesInCtx) { oclv::OpenCLVersion DevVer; - UR_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion(Dev, DevVer)); + UR_RETURN_ON_FAILURE(Dev->getDeviceVersion(DevVer)); if (DevVer < oclv::V2_2) { UseExtensionLookup = true; diff --git a/source/adapters/opencl/queue.cpp b/source/adapters/opencl/queue.cpp index c21006fe90..accb50555e 100644 --- a/source/adapters/opencl/queue.cpp +++ b/source/adapters/opencl/queue.cpp @@ -74,7 +74,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_queue_properties_t *pProperties, ur_queue_handle_t *phQueue) { - cl_platform_id CurPlatform = hDevice->Platform->get(); + ur_platform_handle_t CurPlatform = hDevice->Platform; cl_command_queue_properties CLProperties = pProperties ? convertURQueuePropertiesToCL(pProperties) : 0; @@ -85,8 +85,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT; oclv::OpenCLVersion Version; - CL_RETURN_ON_FAILURE_AND_SET_NULL( - cl_adapter::getPlatformVersion(CurPlatform, Version), phQueue); + CL_RETURN_ON_FAILURE_AND_SET_NULL(CurPlatform->getPlatformVersion(Version), + phQueue); cl_int RetErr = CL_INVALID_OPERATION; From 76cfa1d9ade45399eb948c83b3e85155fbaccd77 Mon Sep 17 00:00:00 2001 From: omarahmed1111 Date: Wed, 17 Jan 2024 11:18:18 +0000 Subject: [PATCH 15/19] Add exception handling for bad_alloc --- source/adapters/opencl/command_buffer.cpp | 16 +- source/adapters/opencl/context.cpp | 20 +- source/adapters/opencl/context.hpp | 45 ++-- source/adapters/opencl/device.cpp | 12 +- source/adapters/opencl/device.hpp | 2 +- source/adapters/opencl/enqueue.cpp | 240 ++++++++++++++++------ source/adapters/opencl/event.cpp | 13 +- source/adapters/opencl/kernel.cpp | 18 +- source/adapters/opencl/kernel.hpp | 53 ++--- source/adapters/opencl/memory.cpp | 70 +++++-- source/adapters/opencl/memory.hpp | 27 ++- source/adapters/opencl/platform.cpp | 14 +- source/adapters/opencl/platform.hpp | 14 +- source/adapters/opencl/program.cpp | 67 ++++-- source/adapters/opencl/program.hpp | 31 +-- source/adapters/opencl/queue.cpp | 24 ++- source/adapters/opencl/queue.hpp | 49 +++-- source/adapters/opencl/sampler.cpp | 33 ++- source/adapters/opencl/usm.cpp | 72 +++++-- 19 files changed, 571 insertions(+), 249 deletions(-) diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index df61e8b5b0..1c57246eca 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -41,8 +41,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( auto URCommandBuffer = std::make_unique( Queue, hContext, CLCommandBuffer); *phCommandBuffer = URCommandBuffer.release(); - } catch (...) { + } catch (std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; } CL_RETURN_ON_FAILURE(Res); @@ -359,9 +361,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( NumberOfQueues, &CLQueue, hCommandBuffer->CLCommandBuffer, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - auto UREvent = - std::make_unique(Event, hQueue->Context, hQueue); - *phEvent = UREvent.release(); + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/context.cpp b/source/adapters/opencl/context.cpp index b68b8cedcd..1419bc82cd 100644 --- a/source/adapters/opencl/context.cpp +++ b/source/adapters/opencl/context.cpp @@ -24,12 +24,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( CLDevices[i] = phDevices[i]->get(); } - cl_context Ctx = clCreateContext( - nullptr, cl_adapter::cast(DeviceCount), CLDevices.data(), - nullptr, nullptr, cl_adapter::cast(&Ret)); - auto URContext = - std::make_unique(Ctx, DeviceCount, phDevices); - *phContext = URContext.release(); + try { + cl_context Ctx = clCreateContext( + nullptr, cl_adapter::cast(DeviceCount), CLDevices.data(), + nullptr, nullptr, cl_adapter::cast(&Ret)); + CL_RETURN_ON_FAILURE(Ret); + auto URContext = + std::make_unique(Ctx, DeviceCount, phDevices); + *phContext = URContext.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + return mapCLErrorToUR(Ret); } diff --git a/source/adapters/opencl/context.hpp b/source/adapters/opencl/context.hpp index ab1d38f518..45089a8c79 100644 --- a/source/adapters/opencl/context.hpp +++ b/source/adapters/opencl/context.hpp @@ -31,27 +31,34 @@ struct ur_context_handle_t_ { static ur_result_t makeWithNative(native_type Ctx, uint32_t DevCount, const ur_device_handle_t *phDevices, ur_context_handle_t &Context) { - auto URContext = - std::make_unique(Ctx, DevCount, phDevices); - native_type &NativeContext = URContext->Context; - uint32_t &DeviceCount = URContext->DeviceCount; - if (!DeviceCount) { - CL_RETURN_ON_FAILURE( - clGetContextInfo(NativeContext, CL_CONTEXT_NUM_DEVICES, - sizeof(DeviceCount), &DeviceCount, nullptr)); - std::vector CLDevices(DeviceCount); - CL_RETURN_ON_FAILURE(clGetContextInfo(NativeContext, CL_CONTEXT_DEVICES, - sizeof(CLDevices), CLDevices.data(), - nullptr)); - URContext->Devices.resize(DeviceCount); - for (uint32_t i = 0; i < DeviceCount; i++) { - ur_native_handle_t NativeDevice = - reinterpret_cast(CLDevices[i]); - UR_RETURN_ON_FAILURE(urDeviceCreateWithNativeHandle( - NativeDevice, nullptr, nullptr, &(URContext->Devices[i]))); + try { + auto URContext = + std::make_unique(Ctx, DevCount, phDevices); + native_type &NativeContext = URContext->Context; + uint32_t &DeviceCount = URContext->DeviceCount; + if (!DeviceCount) { + CL_RETURN_ON_FAILURE( + clGetContextInfo(NativeContext, CL_CONTEXT_NUM_DEVICES, + sizeof(DeviceCount), &DeviceCount, nullptr)); + std::vector CLDevices(DeviceCount); + CL_RETURN_ON_FAILURE(clGetContextInfo(NativeContext, CL_CONTEXT_DEVICES, + sizeof(CLDevices), + CLDevices.data(), nullptr)); + URContext->Devices.resize(DeviceCount); + for (uint32_t i = 0; i < DeviceCount; i++) { + ur_native_handle_t NativeDevice = + reinterpret_cast(CLDevices[i]); + UR_RETURN_ON_FAILURE(urDeviceCreateWithNativeHandle( + NativeDevice, nullptr, nullptr, &(URContext->Devices[i]))); + } } + Context = URContext.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; } - Context = URContext.release(); + return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index 1a2d3d5991..addb353936 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -981,9 +981,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( CLNumDevicesRet, CLSubDevices.data(), nullptr)); for (uint32_t i = 0; i < NumDevices; i++) { - auto URSubDevice = std::make_unique( - CLSubDevices[i], hDevice->Platform, hDevice); - phSubDevices[i] = URSubDevice.release(); + try { + auto URSubDevice = std::make_unique( + CLSubDevices[i], hDevice->Platform, hDevice); + phSubDevices[i] = URSubDevice.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } } diff --git a/source/adapters/opencl/device.hpp b/source/adapters/opencl/device.hpp index f538ce538b..ed2e1b76cf 100644 --- a/source/adapters/opencl/device.hpp +++ b/source/adapters/opencl/device.hpp @@ -71,4 +71,4 @@ struct ur_device_handle_t_ { return UR_RESULT_SUCCESS; } -}; \ No newline at end of file +}; diff --git a/source/adapters/opencl/enqueue.cpp b/source/adapters/opencl/enqueue.cpp index ce2481f1a6..7ffaefd733 100644 --- a/source/adapters/opencl/enqueue.cpp +++ b/source/adapters/opencl/enqueue.cpp @@ -46,9 +46,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - auto UREvent = - std::make_unique(Event, hQueue->Context, hQueue); - *phEvent = UREvent.release(); + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } return UR_RESULT_SUCCESS; } @@ -64,9 +70,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( CL_RETURN_ON_FAILURE(clEnqueueMarkerWithWaitList( hQueue->get(), numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - auto UREvent = - std::make_unique(Event, hQueue->Context, hQueue); - *phEvent = UREvent.release(); + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } return UR_RESULT_SUCCESS; } @@ -82,9 +94,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( CL_RETURN_ON_FAILURE(clEnqueueBarrierWithWaitList( hQueue->get(), numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - auto UREvent = - std::make_unique(Event, hQueue->Context, hQueue); - *phEvent = UREvent.release(); + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } return UR_RESULT_SUCCESS; } @@ -102,9 +120,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( hQueue->get(), hBuffer->get(), blockingRead, offset, size, pDst, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - auto UREvent = - std::make_unique(Event, hQueue->Context, hQueue); - *phEvent = UREvent.release(); + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } return UR_RESULT_SUCCESS; } @@ -122,9 +146,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( hQueue->get(), hBuffer->get(), blockingWrite, offset, size, pSrc, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - auto UREvent = - std::make_unique(Event, hQueue->Context, hQueue); - *phEvent = UREvent.release(); + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } return UR_RESULT_SUCCESS; } @@ -150,9 +180,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( Region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - auto UREvent = - std::make_unique(Event, hQueue->Context, hQueue); - *phEvent = UREvent.release(); + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } return UR_RESULT_SUCCESS; } @@ -178,9 +214,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( Region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - auto UREvent = - std::make_unique(Event, hQueue->Context, hQueue); - *phEvent = UREvent.release(); + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } return UR_RESULT_SUCCESS; } @@ -199,9 +241,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( hQueue->get(), hBufferSrc->get(), hBufferDst->get(), srcOffset, dstOffset, size, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - auto UREvent = - std::make_unique(Event, hQueue->Context, hQueue); - *phEvent = UREvent.release(); + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } return UR_RESULT_SUCCESS; } @@ -226,9 +274,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( Region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - auto UREvent = - std::make_unique(Event, hQueue->Context, hQueue); - *phEvent = UREvent.release(); + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } return UR_RESULT_SUCCESS; } @@ -250,9 +304,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( hQueue->get(), hBuffer->get(), pPattern, patternSize, offset, size, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - auto UREvent = - std::make_unique(Event, hQueue->Context, hQueue); - *phEvent = UREvent.release(); + try { + auto UREvent = std::make_unique( + Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } return UR_RESULT_SUCCESS; } @@ -292,9 +352,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( } if (phEvent) { - auto UREvent = std::make_unique( - WriteEvent, hQueue->Context, hQueue); - *phEvent = UREvent.release(); + try { + auto UREvent = std::make_unique( + WriteEvent, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } else { CL_RETURN_ON_FAILURE(clReleaseEvent(WriteEvent)); } @@ -318,9 +384,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( hQueue->get(), hImage->get(), blockingRead, Origin, Region, rowPitch, slicePitch, pDst, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - auto UREvent = - std::make_unique(Event, hQueue->Context, hQueue); - *phEvent = UREvent.release(); + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } return UR_RESULT_SUCCESS; } @@ -341,9 +413,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( hQueue->get(), hImage->get(), blockingWrite, Origin, Region, rowPitch, slicePitch, pSrc, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - auto UREvent = - std::make_unique(Event, hQueue->Context, hQueue); - *phEvent = UREvent.release(); + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } return UR_RESULT_SUCCESS; } @@ -366,9 +444,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( hQueue->get(), hImageSrc->get(), hImageDst->get(), SrcOrigin, DstOrigin, Region, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - auto UREvent = - std::make_unique(Event, hQueue->Context, hQueue); - *phEvent = UREvent.release(); + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } return UR_RESULT_SUCCESS; } @@ -389,9 +473,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( numEventsInWaitList, CLWaitEvents.data(), &Event, &Err); if (phEvent) { - auto UREvent = - std::make_unique(Event, hQueue->Context, hQueue); - *phEvent = UREvent.release(); + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } return mapCLErrorToUR(Err); } @@ -409,9 +499,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( pMappedPtr, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - auto UREvent = - std::make_unique(Event, hQueue->Context, hQueue); - *phEvent = UREvent.release(); + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } return UR_RESULT_SUCCESS; } @@ -439,9 +535,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( Res = F(hQueue->get(), hProgram->get(), name, blockingWrite, count, offset, pSrc, numEventsInWaitList, CLWaitEvents.data(), &Event); if (phEvent) { - auto UREvent = - std::make_unique(Event, hQueue->Context, hQueue); - *phEvent = UREvent.release(); + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } return mapCLErrorToUR(Res); } @@ -469,9 +571,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( Res = F(hQueue->get(), hProgram->get(), name, blockingRead, count, offset, pDst, numEventsInWaitList, CLWaitEvents.data(), &Event); if (phEvent) { - auto UREvent = - std::make_unique(Event, hQueue->Context, hQueue); - *phEvent = UREvent.release(); + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } return mapCLErrorToUR(Res); } @@ -500,9 +608,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( blocking, pDst, size, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - auto UREvent = - std::make_unique(Event, hQueue->Context, hQueue); - *phEvent = UREvent.release(); + try { + auto UREvent = std::make_unique( + Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } } @@ -533,9 +647,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( blocking, pSrc, size, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - auto UREvent = - std::make_unique(Event, hQueue->Context, hQueue); - *phEvent = UREvent.release(); + try { + auto UREvent = std::make_unique( + Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } } diff --git a/source/adapters/opencl/event.cpp b/source/adapters/opencl/event.cpp index a41561f06c..cd825a3406 100644 --- a/source/adapters/opencl/event.cpp +++ b/source/adapters/opencl/event.cpp @@ -115,9 +115,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( const ur_event_native_properties_t *pProperties, ur_event_handle_t *phEvent) { cl_event NativeHandle = reinterpret_cast(hNativeEvent); - auto UREvent = - std::make_unique(NativeHandle, hContext, nullptr); - *phEvent = UREvent.release(); + try { + auto UREvent = + std::make_unique(NativeHandle, hContext, nullptr); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + if (!pProperties || !pProperties->isNativeHandleOwned) { return urEventRetain(*phEvent); } diff --git a/source/adapters/opencl/kernel.cpp b/source/adapters/opencl/kernel.cpp index 51fa78dc2a..5295740af5 100644 --- a/source/adapters/opencl/kernel.cpp +++ b/source/adapters/opencl/kernel.cpp @@ -20,13 +20,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelCreate(ur_program_handle_t hProgram, const char *pKernelName, ur_kernel_handle_t *phKernel) { + try { + cl_int CLResult; + cl_kernel Kernel = clCreateKernel(hProgram->get(), pKernelName, &CLResult); + CL_RETURN_ON_FAILURE(CLResult); + auto URKernel = std::make_unique(Kernel, hProgram, + hProgram->Context); + *phKernel = URKernel.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } - cl_int CLResult; - cl_kernel Kernel = clCreateKernel(hProgram->get(), pKernelName, &CLResult); - CL_RETURN_ON_FAILURE(CLResult); - auto URKernel = std::make_unique(Kernel, hProgram, - hProgram->Context); - *phKernel = URKernel.release(); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/kernel.hpp b/source/adapters/opencl/kernel.hpp index 37e60e74b7..c1e452cadc 100644 --- a/source/adapters/opencl/kernel.hpp +++ b/source/adapters/opencl/kernel.hpp @@ -30,31 +30,38 @@ struct ur_kernel_handle_t_ { ur_program_handle_t Program, ur_context_handle_t Context, ur_kernel_handle_t &Kernel) { - auto URKernel = - std::make_unique(NativeKernel, Program, Context); - if (!Program) { - cl_program CLProgram; - CL_RETURN_ON_FAILURE(clGetKernelInfo(NativeKernel, CL_KERNEL_PROGRAM, - sizeof(CLProgram), &CLProgram, + try { + auto URKernel = + std::make_unique(NativeKernel, Program, Context); + if (!Program) { + cl_program CLProgram; + CL_RETURN_ON_FAILURE(clGetKernelInfo(NativeKernel, CL_KERNEL_PROGRAM, + sizeof(CLProgram), &CLProgram, + nullptr)); + ur_native_handle_t NativeProgram = + reinterpret_cast(CLProgram); + UR_RETURN_ON_FAILURE(urProgramCreateWithNativeHandle( + NativeProgram, nullptr, nullptr, &(URKernel->Program))); + } + cl_context CLContext; + CL_RETURN_ON_FAILURE(clGetKernelInfo(NativeKernel, CL_KERNEL_CONTEXT, + sizeof(CLContext), &CLContext, nullptr)); - ur_native_handle_t NativeProgram = - reinterpret_cast(CLProgram); - UR_RETURN_ON_FAILURE(urProgramCreateWithNativeHandle( - NativeProgram, nullptr, nullptr, &(URKernel->Program))); + if (!Context) { + ur_native_handle_t NativeContext = + reinterpret_cast(CLContext); + UR_RETURN_ON_FAILURE(urContextCreateWithNativeHandle( + NativeContext, 0, nullptr, nullptr, &(URKernel->Context))); + } else if (Context->get() != CLContext) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + Kernel = URKernel.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; } - cl_context CLContext; - CL_RETURN_ON_FAILURE(clGetKernelInfo(NativeKernel, CL_KERNEL_CONTEXT, - sizeof(CLContext), &CLContext, - nullptr)); - if (!Context) { - ur_native_handle_t NativeContext = - reinterpret_cast(CLContext); - UR_RETURN_ON_FAILURE(urContextCreateWithNativeHandle( - NativeContext, 0, nullptr, nullptr, &(URKernel->Context))); - } else if (Context->get() != CLContext) { - return UR_RESULT_ERROR_INVALID_CONTEXT; - } - Kernel = URKernel.release(); + return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp index 41d43ccc5a..9056188848 100644 --- a/source/adapters/opencl/memory.cpp +++ b/source/adapters/opencl/memory.cpp @@ -263,22 +263,35 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( } PropertiesIntel.push_back(0); - cl_mem Buffer = FuncPtr( - CLContext, PropertiesIntel.data(), static_cast(flags), - size, pProperties->pHost, cl_adapter::cast(&RetErr)); - auto URMem = std::make_unique(Buffer, hContext); - *phBuffer = URMem.release(); + try { + cl_mem Buffer = FuncPtr( + CLContext, PropertiesIntel.data(), static_cast(flags), + size, pProperties->pHost, cl_adapter::cast(&RetErr)); + CL_RETURN_ON_FAILURE(RetErr); + auto URMem = std::make_unique(Buffer, hContext); + *phBuffer = URMem.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } return mapCLErrorToUR(RetErr); } } void *HostPtr = pProperties ? pProperties->pHost : nullptr; - cl_mem Buffer = - clCreateBuffer(hContext->get(), static_cast(flags), size, - HostPtr, cl_adapter::cast(&RetErr)); - CL_RETURN_ON_FAILURE(RetErr); - auto URMem = std::make_unique(Buffer, hContext); - *phBuffer = URMem.release(); + try { + cl_mem Buffer = + clCreateBuffer(hContext->get(), static_cast(flags), size, + HostPtr, cl_adapter::cast(&RetErr)); + CL_RETURN_ON_FAILURE(RetErr); + auto URMem = std::make_unique(Buffer, hContext); + *phBuffer = URMem.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } return UR_RESULT_SUCCESS; } @@ -294,12 +307,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( cl_image_desc ImageDesc = mapURImageDescToCL(pImageDesc); cl_map_flags MapFlags = convertURMemFlagsToCL(flags); - cl_mem Mem = - clCreateImage(hContext->get(), MapFlags, &ImageFormat, &ImageDesc, pHost, - cl_adapter::cast(&RetErr)); - CL_RETURN_ON_FAILURE(RetErr); - auto URMem = std::make_unique(Mem, hContext); - *phMem = URMem.release(); + try { + cl_mem Mem = + clCreateImage(hContext->get(), MapFlags, &ImageFormat, &ImageDesc, + pHost, cl_adapter::cast(&RetErr)); + CL_RETURN_ON_FAILURE(RetErr); + auto URMem = std::make_unique(Mem, hContext); + *phMem = URMem.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } return UR_RESULT_SUCCESS; } @@ -323,12 +342,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( _cl_buffer_region BufferRegion; BufferRegion.origin = pRegion->origin; BufferRegion.size = pRegion->size; + try { + cl_mem Buffer = clCreateSubBuffer( + hBuffer->get(), static_cast(flags), BufferCreateType, + &BufferRegion, cl_adapter::cast(&RetErr)); + CL_RETURN_ON_FAILURE(RetErr); + auto URMem = std::make_unique(Buffer, hBuffer->Context); + *phMem = URMem.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } - cl_mem Buffer = clCreateSubBuffer( - hBuffer->get(), static_cast(flags), BufferCreateType, - &BufferRegion, cl_adapter::cast(&RetErr)); - auto URMem = std::make_unique(Buffer, hBuffer->Context); - *phMem = URMem.release(); if (RetErr == CL_INVALID_VALUE) { size_t BufferSize = 0; CL_RETURN_ON_FAILURE(clGetMemObjectInfo( diff --git a/source/adapters/opencl/memory.hpp b/source/adapters/opencl/memory.hpp index ff4cddec3e..48fe0429b6 100644 --- a/source/adapters/opencl/memory.hpp +++ b/source/adapters/opencl/memory.hpp @@ -26,17 +26,24 @@ struct ur_mem_handle_t_ { static ur_result_t makeWithNative(native_type NativeMem, ur_context_handle_t Ctx, ur_mem_handle_t &Mem) { - auto URMem = std::make_unique(NativeMem, Ctx); - if (!Ctx) { - cl_context CLContext; - CL_RETURN_ON_FAILURE(clGetMemObjectInfo( - NativeMem, CL_MEM_CONTEXT, sizeof(CLContext), &CLContext, nullptr)); - ur_native_handle_t NativeContext = - reinterpret_cast(CLContext); - UR_RETURN_ON_FAILURE(urContextCreateWithNativeHandle( - NativeContext, 0, nullptr, nullptr, &(URMem->Context))); + try { + auto URMem = std::make_unique(NativeMem, Ctx); + if (!Ctx) { + cl_context CLContext; + CL_RETURN_ON_FAILURE(clGetMemObjectInfo( + NativeMem, CL_MEM_CONTEXT, sizeof(CLContext), &CLContext, nullptr)); + ur_native_handle_t NativeContext = + reinterpret_cast(CLContext); + UR_RETURN_ON_FAILURE(urContextCreateWithNativeHandle( + NativeContext, 0, nullptr, nullptr, &(URMem->Context))); + } + Mem = URMem.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; } - Mem = URMem.release(); + return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/platform.cpp b/source/adapters/opencl/platform.cpp index a6d2659199..7f02da4468 100644 --- a/source/adapters/opencl/platform.cpp +++ b/source/adapters/opencl/platform.cpp @@ -86,10 +86,16 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, if (Result != CL_SUCCESS) { return Result; } - for (uint32_t i = 0; i < NumPlatforms; i++) { - auto URPlatform = - std::make_unique(CLPlatforms[i]); - URPlatforms.emplace_back(URPlatform.release()); + try { + for (uint32_t i = 0; i < NumPlatforms; i++) { + auto URPlatform = + std::make_unique(CLPlatforms[i]); + URPlatforms.emplace_back(URPlatform.release()); + } + } catch (std::bad_alloc &) { + return CL_OUT_OF_RESOURCES; + } catch (...) { + return CL_INVALID_PLATFORM; } return Result; }, diff --git a/source/adapters/opencl/platform.hpp b/source/adapters/opencl/platform.hpp index 16fe833fab..b33b9a9365 100644 --- a/source/adapters/opencl/platform.hpp +++ b/source/adapters/opencl/platform.hpp @@ -49,10 +49,16 @@ struct ur_platform_handle_t_ { CL_RETURN_ON_FAILURE(clGetDeviceIDs( Platform, CL_DEVICE_TYPE_ALL, DeviceNum, CLDevices.data(), nullptr)); - Devices.resize(DeviceNum); - for (size_t i = 0; i < DeviceNum; i++) { - Devices[i] = - std::make_unique(CLDevices[i], this, nullptr); + try { + Devices.resize(DeviceNum); + for (size_t i = 0; i < DeviceNum; i++) { + Devices[i] = std::make_unique(CLDevices[i], this, + nullptr); + } + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; } } diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index 9ef0bd4722..381404ef34 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -24,9 +24,15 @@ static ur_result_t getDevicesFromProgram( return UR_RESULT_ERROR_INVALID_PROGRAM; } cl_uint DeviceCount = hProgram->Context->DeviceCount; - DevicesInProgram = std::make_unique>(DeviceCount); - for (uint32_t i = 0; i < DeviceCount; i++) { - (*DevicesInProgram)[i] = hProgram->Context->Devices[i]->get(); + try { + DevicesInProgram = std::make_unique>(DeviceCount); + for (uint32_t i = 0; i < DeviceCount; i++) { + (*DevicesInProgram)[i] = hProgram->Context->Devices[i]->get(); + } + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; } return UR_RESULT_SUCCESS; } @@ -72,10 +78,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( cl_program Program = clCreateProgramWithIL(hContext->get(), pIL, length, &Err); CL_RETURN_ON_FAILURE(Err); - auto URProgram = std::make_unique(Program, hContext); - *phProgram = URProgram.release(); + try { + auto URProgram = + std::make_unique(Program, hContext); + *phProgram = URProgram.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } else { - /* If none of the devices conform with CL 2.1 or newer make sure they all * support the cl_khr_il_program extension. */ @@ -97,10 +109,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( CurPlatform->get(), "clCreateProgramWithILKHR")); assert(FuncPtr != nullptr); - cl_program Program = FuncPtr(hContext->get(), pIL, length, &Err); - auto URProgram = std::make_unique(Program, hContext); - *phProgram = URProgram.release(); - + try { + cl_program Program = FuncPtr(hContext->get(), pIL, length, &Err); + CL_RETURN_ON_FAILURE(Err); + auto URProgram = + std::make_unique(Program, hContext); + *phProgram = URProgram.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } CL_RETURN_ON_FAILURE(Err); } @@ -116,12 +135,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( const size_t Lengths[1] = {size}; cl_int BinaryStatus[1]; cl_int CLResult; - cl_program Program = clCreateProgramWithBinary( - hContext->get(), cl_adapter::cast(1u), Devices, Lengths, - &pBinary, BinaryStatus, &CLResult); - - auto URProgram = std::make_unique(Program, hContext); - *phProgram = URProgram.release(); + try { + cl_program Program = clCreateProgramWithBinary( + hContext->get(), cl_adapter::cast(1u), Devices, Lengths, + &pBinary, BinaryStatus, &CLResult); + CL_RETURN_ON_FAILURE(CLResult); + auto URProgram = std::make_unique(Program, hContext); + *phProgram = URProgram.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } CL_RETURN_ON_FAILURE(BinaryStatus[0]); CL_RETURN_ON_FAILURE(CLResult); @@ -235,8 +260,14 @@ urProgramLink(ur_context_handle_t hContext, uint32_t count, hContext->get(), 0, nullptr, pOptions, cl_adapter::cast(count), CLPrograms.data(), nullptr, nullptr, &CLResult); CL_RETURN_ON_FAILURE(CLResult); - auto URProgram = std::make_unique(Program, hContext); - *phProgram = URProgram.release(); + try { + auto URProgram = std::make_unique(Program, hContext); + *phProgram = URProgram.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/program.hpp b/source/adapters/opencl/program.hpp index 63a0f5bac0..a90b30e473 100644 --- a/source/adapters/opencl/program.hpp +++ b/source/adapters/opencl/program.hpp @@ -26,19 +26,26 @@ struct ur_program_handle_t_ { static ur_result_t makeWithNative(native_type NativeProg, ur_context_handle_t Context, ur_program_handle_t &Program) { - auto URProgram = - std::make_unique(NativeProg, Context); - if (!Context) { - cl_context CLContext; - CL_RETURN_ON_FAILURE(clGetProgramInfo(NativeProg, CL_PROGRAM_CONTEXT, - sizeof(CLContext), &CLContext, - nullptr)); - ur_native_handle_t NativeContext = - reinterpret_cast(CLContext); - UR_RETURN_ON_FAILURE(urContextCreateWithNativeHandle( - NativeContext, 0, nullptr, nullptr, &(URProgram->Context))); + try { + auto URProgram = + std::make_unique(NativeProg, Context); + if (!Context) { + cl_context CLContext; + CL_RETURN_ON_FAILURE(clGetProgramInfo(NativeProg, CL_PROGRAM_CONTEXT, + sizeof(CLContext), &CLContext, + nullptr)); + ur_native_handle_t NativeContext = + reinterpret_cast(CLContext); + UR_RETURN_ON_FAILURE(urContextCreateWithNativeHandle( + NativeContext, 0, nullptr, nullptr, &(URProgram->Context))); + } + Program = URProgram.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; } - Program = URProgram.release(); + return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/queue.cpp b/source/adapters/opencl/queue.cpp index accb50555e..9a9e741dcd 100644 --- a/source/adapters/opencl/queue.cpp +++ b/source/adapters/opencl/queue.cpp @@ -95,9 +95,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( clCreateCommandQueue(hContext->get(), hDevice->get(), CLProperties & SupportByOpenCL, &RetErr); CL_RETURN_ON_FAILURE(RetErr); - auto URQueue = - std::make_unique(Queue, hContext, hDevice); - *phQueue = URQueue.release(); + try { + auto URQueue = + std::make_unique(Queue, hContext, hDevice); + *phQueue = URQueue.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + return UR_RESULT_SUCCESS; } @@ -107,8 +114,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( cl_command_queue Queue = clCreateCommandQueueWithProperties( hContext->get(), hDevice->get(), CreationFlagProperties, &RetErr); CL_RETURN_ON_FAILURE(RetErr); - auto URQueue = std::make_unique(Queue, hContext, hDevice); - *phQueue = URQueue.release(); + try { + auto URQueue = + std::make_unique(Queue, hContext, hDevice); + *phQueue = URQueue.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/queue.hpp b/source/adapters/opencl/queue.hpp index 80d620d1e4..8bbdb04d2a 100644 --- a/source/adapters/opencl/queue.hpp +++ b/source/adapters/opencl/queue.hpp @@ -27,28 +27,35 @@ struct ur_queue_handle_t_ { ur_context_handle_t Context, ur_device_handle_t Device, ur_queue_handle_t &Queue) { - auto URQueue = - std::make_unique(NativeQueue, Context, Device); - if (!Context) { - cl_context CLContext; - CL_RETURN_ON_FAILURE(clGetCommandQueueInfo(NativeQueue, CL_QUEUE_CONTEXT, - sizeof(CLContext), &CLContext, - nullptr)); - ur_native_handle_t NativeContext = - reinterpret_cast(CLContext); - UR_RETURN_ON_FAILURE(urContextCreateWithNativeHandle( - NativeContext, 0, nullptr, nullptr, &(URQueue->Context))); + try { + auto URQueue = + std::make_unique(NativeQueue, Context, Device); + if (!Context) { + cl_context CLContext; + CL_RETURN_ON_FAILURE( + clGetCommandQueueInfo(NativeQueue, CL_QUEUE_CONTEXT, + sizeof(CLContext), &CLContext, nullptr)); + ur_native_handle_t NativeContext = + reinterpret_cast(CLContext); + UR_RETURN_ON_FAILURE(urContextCreateWithNativeHandle( + NativeContext, 0, nullptr, nullptr, &(URQueue->Context))); + } + if (!Device) { + cl_device_id CLDevice; + CL_RETURN_ON_FAILURE(clGetCommandQueueInfo(NativeQueue, CL_QUEUE_DEVICE, + sizeof(CLDevice), &CLDevice, + nullptr)); + ur_native_handle_t NativeDevice = + reinterpret_cast(CLDevice); + UR_RETURN_ON_FAILURE(urDeviceCreateWithNativeHandle( + NativeDevice, nullptr, nullptr, &(URQueue->Device))); + } + Queue = URQueue.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; } - if (!Device) { - cl_device_id CLDevice; - CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( - NativeQueue, CL_QUEUE_DEVICE, sizeof(CLDevice), &CLDevice, nullptr)); - ur_native_handle_t NativeDevice = - reinterpret_cast(CLDevice); - UR_RETURN_ON_FAILURE(urDeviceCreateWithNativeHandle( - NativeDevice, nullptr, nullptr, &(URQueue->Device))); - } - Queue = URQueue.release(); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/sampler.cpp b/source/adapters/opencl/sampler.cpp index 532dcbeb8a..055e34038d 100644 --- a/source/adapters/opencl/sampler.cpp +++ b/source/adapters/opencl/sampler.cpp @@ -139,17 +139,24 @@ ur_result_t urSamplerCreate(ur_context_handle_t hContext, ur_sampler_handle_t *phSampler) { // Initialize properties according to OpenCL 2.1 spec. - ur_result_t ErrorCode; + cl_int ErrorCode; cl_addressing_mode AddressingMode = ur2CLAddressingMode(pDesc->addressingMode); cl_filter_mode FilterMode = ur2CLFilterMode(pDesc->filterMode); + try { + // Always call OpenCL 1.0 API + cl_sampler Sampler = clCreateSampler( + hContext->get(), static_cast(pDesc->normalizedCoords), + AddressingMode, FilterMode, &ErrorCode); + CL_RETURN_ON_FAILURE(ErrorCode); + auto URSampler = std::make_unique(Sampler, hContext); + *phSampler = URSampler.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } - // Always call OpenCL 1.0 API - cl_sampler Sampler = clCreateSampler( - hContext->get(), static_cast(pDesc->normalizedCoords), - AddressingMode, FilterMode, cl_adapter::cast(&ErrorCode)); - auto URSampler = std::make_unique(Sampler, hContext); - *phSampler = URSampler.release(); return mapCLErrorToUR(ErrorCode); } @@ -201,9 +208,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( ur_native_handle_t hNativeSampler, ur_context_handle_t hContext, const ur_sampler_native_properties_t *pProperties, ur_sampler_handle_t *phSampler) { cl_sampler NativeHandle = reinterpret_cast(hNativeSampler); - auto URSampler = - std::make_unique(NativeHandle, hContext); - *phSampler = URSampler.release(); + try { + auto URSampler = + std::make_unique(NativeHandle, hContext); + *phSampler = URSampler.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } if (!pProperties || !pProperties->isNativeHandleOwned) { return clRetainSampler(NativeHandle); } diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index ea541b0cb9..6e1917a034 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -252,9 +252,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( patternSize, size, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - auto UREvent = - std::make_unique(Event, hQueue->Context, hQueue); - *phEvent = UREvent.release(); + try { + auto UREvent = std::make_unique( + Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } return UR_RESULT_SUCCESS; } @@ -332,9 +338,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( CL_RETURN_ON_FAILURE(ClErr); } if (phEvent) { - auto UREvent = std::make_unique( - CopyEvent, hQueue->Context, hQueue); - *phEvent = UREvent.release(); + try { + auto UREvent = std::make_unique( + CopyEvent, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } else { CL_RETURN_ON_FAILURE(clReleaseEvent(CopyEvent)); } @@ -365,9 +377,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - auto UREvent = - std::make_unique(Event, hQueue->Context, hQueue); - *phEvent = UREvent.release(); + try { + auto UREvent = std::make_unique( + Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } } @@ -388,9 +406,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( CL_RETURN_ON_FAILURE(clEnqueueMarkerWithWaitList( hQueue->get(), numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { - auto UREvent = - std::make_unique(Event, hQueue->Context, hQueue); - *phEvent = UREvent.release(); + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } return UR_RESULT_SUCCESS; /* @@ -423,9 +447,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMAdvise( CL_RETURN_ON_FAILURE( clEnqueueMarkerWithWaitList(hQueue->get(), 0, nullptr, &Event)); if (phEvent) { - auto UREvent = - std::make_unique(Event, hQueue->Context, hQueue); - *phEvent = UREvent.release(); + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } return UR_RESULT_SUCCESS; /* @@ -505,9 +535,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( ClResult = clEnqueueBarrierWithWaitList(hQueue->get(), Events.size(), Events.data(), &Event); if (phEvent) { - auto UREvent = - std::make_unique(Event, hQueue->Context, hQueue); - *phEvent = UREvent.release(); + try { + auto UREvent = std::make_unique( + Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } } for (const auto &E : Events) { From a15d72fba49cbf57d6e6dcc0c8f5dbea97929875 Mon Sep 17 00:00:00 2001 From: omarahmed1111 Date: Tue, 23 Jan 2024 09:21:55 +0000 Subject: [PATCH 16/19] Add reference counting to the UR layer --- source/adapters/opencl/context.cpp | 45 +++--------------- source/adapters/opencl/context.hpp | 18 ++++++- source/adapters/opencl/device.cpp | 23 +++++---- source/adapters/opencl/device.hpp | 10 +++- source/adapters/opencl/event.cpp | 73 ++++++++++++++++------------- source/adapters/opencl/event.hpp | 27 ++++++++++- source/adapters/opencl/kernel.cpp | 27 +++++++---- source/adapters/opencl/kernel.hpp | 29 +++++++++++- source/adapters/opencl/memory.cpp | 27 ++++++----- source/adapters/opencl/memory.hpp | 22 ++++++++- source/adapters/opencl/platform.cpp | 4 +- source/adapters/opencl/platform.hpp | 7 ++- source/adapters/opencl/program.cpp | 61 ++++++++++-------------- source/adapters/opencl/program.hpp | 22 ++++++++- source/adapters/opencl/queue.cpp | 23 +++++---- source/adapters/opencl/queue.hpp | 29 +++++++++++- source/adapters/opencl/sampler.cpp | 46 +++++++++++------- source/adapters/opencl/sampler.hpp | 21 ++++++++- 18 files changed, 335 insertions(+), 179 deletions(-) diff --git a/source/adapters/opencl/context.cpp b/source/adapters/opencl/context.cpp index 1419bc82cd..59b3fefd0a 100644 --- a/source/adapters/opencl/context.cpp +++ b/source/adapters/opencl/context.cpp @@ -41,32 +41,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( return mapCLErrorToUR(Ret); } -static cl_int mapURContextInfoToCL(ur_context_info_t URPropName) { - - cl_int CLPropName; - switch (URPropName) { - case UR_CONTEXT_INFO_NUM_DEVICES: - CLPropName = CL_CONTEXT_NUM_DEVICES; - break; - case UR_CONTEXT_INFO_DEVICES: - CLPropName = CL_CONTEXT_DEVICES; - break; - case UR_CONTEXT_INFO_REFERENCE_COUNT: - CLPropName = CL_CONTEXT_REFERENCE_COUNT; - break; - default: - CLPropName = -1; - } - - return CLPropName; -} - UR_APIEXPORT ur_result_t UR_APICALL urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); - const cl_int CLPropName = mapURContextInfoToCL(propName); switch (static_cast(propName)) { /* 2D USM memops are not supported. */ @@ -89,17 +68,7 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, return ReturnValue(&hContext->Devices[0], hContext->DeviceCount); } case UR_CONTEXT_INFO_REFERENCE_COUNT: { - size_t CheckPropSize = 0; - auto ClResult = clGetContextInfo(hContext->get(), CLPropName, propSize, - pPropValue, &CheckPropSize); - if (pPropValue && CheckPropSize != propSize) { - return UR_RESULT_ERROR_INVALID_SIZE; - } - CL_RETURN_ON_FAILURE(ClResult); - if (pPropSizeRet) { - *pPropSizeRet = CheckPropSize; - } - return UR_RESULT_SUCCESS; + return ReturnValue(hContext->getReferenceCount()); } default: return UR_RESULT_ERROR_INVALID_ENUMERATION; @@ -108,16 +77,16 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, UR_APIEXPORT ur_result_t UR_APICALL urContextRelease(ur_context_handle_t hContext) { - - cl_int Ret = clReleaseContext(hContext->get()); - return mapCLErrorToUR(Ret); + if (hContext->decrementReferenceCount() == 0) { + delete hContext; + } + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urContextRetain(ur_context_handle_t hContext) { - - cl_int Ret = clRetainContext(hContext->get()); - return mapCLErrorToUR(Ret); + hContext->incrementReferenceCount(); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle( diff --git a/source/adapters/opencl/context.hpp b/source/adapters/opencl/context.hpp index 45089a8c79..8c40e3bcf5 100644 --- a/source/adapters/opencl/context.hpp +++ b/source/adapters/opencl/context.hpp @@ -19,21 +19,31 @@ struct ur_context_handle_t_ { native_type Context; std::vector Devices; uint32_t DeviceCount; + std::atomic RefCount = 0; ur_context_handle_t_(native_type Ctx, uint32_t DevCount, const ur_device_handle_t *phDevices) : Context(Ctx), DeviceCount(DevCount) { for (uint32_t i = 0; i < DeviceCount; i++) { Devices.emplace_back(phDevices[i]); + urDeviceRetain(phDevices[i]); } + RefCount = 1; } + uint32_t incrementReferenceCount() noexcept { return ++RefCount; } + + uint32_t decrementReferenceCount() noexcept { return --RefCount; } + + uint32_t getReferenceCount() const noexcept { return RefCount; } + static ur_result_t makeWithNative(native_type Ctx, uint32_t DevCount, const ur_device_handle_t *phDevices, ur_context_handle_t &Context) { try { auto URContext = std::make_unique(Ctx, DevCount, phDevices); + CL_RETURN_ON_FAILURE(clRetainContext(Ctx)); native_type &NativeContext = URContext->Context; uint32_t &DeviceCount = URContext->DeviceCount; if (!DeviceCount) { @@ -50,6 +60,7 @@ struct ur_context_handle_t_ { reinterpret_cast(CLDevices[i]); UR_RETURN_ON_FAILURE(urDeviceCreateWithNativeHandle( NativeDevice, nullptr, nullptr, &(URContext->Devices[i]))); + UR_RETURN_ON_FAILURE(urDeviceRetain(URContext->Devices[i])); } } Context = URContext.release(); @@ -62,7 +73,12 @@ struct ur_context_handle_t_ { return UR_RESULT_SUCCESS; } - ~ur_context_handle_t_() {} + ~ur_context_handle_t_() { + for (uint32_t i = 0; i < DeviceCount; i++) { + urDeviceRelease(Devices[i]); + } + clReleaseContext(Context); + } native_type get() { return Context; } }; diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index addb353936..88bc25b803 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -732,6 +732,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, {"cl_intel_program_scope_host_pipe"}, Supported)); return ReturnValue(Supported); } + case UR_DEVICE_INFO_REFERENCE_COUNT: { + return ReturnValue(hDevice->getReferenceCount()); + } case UR_DEVICE_INFO_QUEUE_PROPERTIES: case UR_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES: case UR_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES: @@ -802,7 +805,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_MAX_SAMPLERS: case UR_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE: case UR_DEVICE_INFO_MAX_CONSTANT_ARGS: - case UR_DEVICE_INFO_REFERENCE_COUNT: case UR_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES: case UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE: case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE: @@ -980,7 +982,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( CL_RETURN_ON_FAILURE(clCreateSubDevices(hDevice->get(), CLProperties.data(), CLNumDevicesRet, CLSubDevices.data(), nullptr)); - for (uint32_t i = 0; i < NumDevices; i++) { + for (uint32_t i = 0; i < std::min(CLNumDevicesRet, NumDevices); i++) { try { auto URSubDevice = std::make_unique( CLSubDevices[i], hDevice->Platform, hDevice); @@ -996,19 +998,23 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( return UR_RESULT_SUCCESS; } +// Root devices ref count are unchanged through out the program lifetime. UR_APIEXPORT ur_result_t UR_APICALL urDeviceRetain(ur_device_handle_t hDevice) { + if (hDevice->ParentDevice) { + hDevice->incrementReferenceCount(); + } - cl_int Result = clRetainDevice(hDevice->get()); - - return mapCLErrorToUR(Result); + return UR_RESULT_SUCCESS; } +// Root devices ref count are unchanged through out the program lifetime. UR_APIEXPORT ur_result_t UR_APICALL urDeviceRelease(ur_device_handle_t hDevice) { + if (hDevice->ParentDevice && hDevice->decrementReferenceCount() == 0) { + delete hDevice; + } - cl_int Result = clReleaseDevice(hDevice->get()); - - return mapCLErrorToUR(Result); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetNativeHandle( @@ -1054,7 +1060,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetGlobalTimestamps( cl_device_id DeviceId = hDevice->get(); // TODO: Cache OpenCL version for each device and platform - auto RetErr = hDevice->getDeviceVersion(DevVer); CL_RETURN_ON_FAILURE(RetErr); diff --git a/source/adapters/opencl/device.hpp b/source/adapters/opencl/device.hpp index ed2e1b76cf..3bea8b6b94 100644 --- a/source/adapters/opencl/device.hpp +++ b/source/adapters/opencl/device.hpp @@ -18,10 +18,12 @@ struct ur_device_handle_t_ { ur_platform_handle_t Platform; cl_device_type Type = 0; ur_device_handle_t ParentDevice = nullptr; + std::atomic RefCount = 0; ur_device_handle_t_(native_type Dev, ur_platform_handle_t Plat, ur_device_handle_t Parent) : Device(Dev), Platform(Plat), ParentDevice(Parent) { + RefCount = 1; if (Parent) { Type = Parent->Type; } else { @@ -30,7 +32,13 @@ struct ur_device_handle_t_ { } } - ~ur_device_handle_t_() {} + ~ur_device_handle_t_() { clReleaseDevice(Device); } + + uint32_t incrementReferenceCount() noexcept { return ++RefCount; } + + uint32_t decrementReferenceCount() noexcept { return --RefCount; } + + uint32_t getReferenceCount() const noexcept { return RefCount; } native_type get() { return Device; } diff --git a/source/adapters/opencl/event.cpp b/source/adapters/opencl/event.cpp index cd825a3406..55510d4920 100644 --- a/source/adapters/opencl/event.cpp +++ b/source/adapters/opencl/event.cpp @@ -126,7 +126,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( } if (!pProperties || !pProperties->isNativeHandleOwned) { - return urEventRetain(*phEvent); + CL_RETURN_ON_FAILURE(clRetainEvent(NativeHandle)); } return UR_RESULT_SUCCESS; } @@ -137,14 +137,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle( } UR_APIEXPORT ur_result_t UR_APICALL urEventRelease(ur_event_handle_t hEvent) { - cl_int RetErr = clReleaseEvent(hEvent->get()); - CL_RETURN_ON_FAILURE(RetErr); + if (hEvent->decrementReferenceCount() == 0) { + delete hEvent; + } return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEventRetain(ur_event_handle_t hEvent) { - cl_int RetErr = clRetainEvent(hEvent->get()); - CL_RETURN_ON_FAILURE(RetErr); + hEvent->incrementReferenceCount(); return UR_RESULT_SUCCESS; } @@ -167,42 +167,51 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent, cl_event_info CLEventInfo = convertUREventInfoToCL(propName); UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); - if (CLEventInfo == CL_EVENT_CONTEXT) { + switch (propName) { + case UR_EVENT_INFO_CONTEXT: { return ReturnValue(hEvent->Context); } - if (CLEventInfo == CL_EVENT_COMMAND_QUEUE) { + case UR_EVENT_INFO_COMMAND_QUEUE: { return ReturnValue(hEvent->Queue); } - size_t CheckPropSize = 0; - cl_int RetErr = clGetEventInfo(hEvent->get(), CLEventInfo, propSize, - pPropValue, &CheckPropSize); - if (pPropValue && CheckPropSize != propSize) { - return UR_RESULT_ERROR_INVALID_SIZE; - } - CL_RETURN_ON_FAILURE(RetErr); - if (pPropSizeRet) { - *pPropSizeRet = CheckPropSize; + case UR_EVENT_INFO_REFERENCE_COUNT: { + return ReturnValue(hEvent->getReferenceCount()); } + default: { + size_t CheckPropSize = 0; + cl_int RetErr = clGetEventInfo(hEvent->get(), CLEventInfo, propSize, + pPropValue, &CheckPropSize); + if (pPropValue && CheckPropSize != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + CL_RETURN_ON_FAILURE(RetErr); + if (pPropSizeRet) { + *pPropSizeRet = CheckPropSize; + } - if (pPropValue) { - if (propName == UR_EVENT_INFO_COMMAND_TYPE) { - *reinterpret_cast(pPropValue) = convertCLCommandTypeToUR( - *reinterpret_cast(pPropValue)); - } else if (propName == UR_EVENT_INFO_COMMAND_EXECUTION_STATUS) { - /* If the CL_EVENT_COMMAND_EXECUTION_STATUS info value is CL_QUEUED, - * change it to CL_SUBMITTED. sycl::info::event::event_command_status has - * no equivalent to CL_QUEUED. - * - * FIXME UR Port: This should not be part of the UR adapter. Since - * PI_QUEUED exists, SYCL RT should be changed to handle this situation. - * In addition, SYCL RT is relying on PI_QUEUED status to make sure that - * the queues are flushed. */ - const auto param_value_int = static_cast(pPropValue); - if (*param_value_int == UR_EVENT_STATUS_QUEUED) { - *param_value_int = UR_EVENT_STATUS_SUBMITTED; + if (pPropValue) { + if (propName == UR_EVENT_INFO_COMMAND_TYPE) { + *reinterpret_cast(pPropValue) = + convertCLCommandTypeToUR( + *reinterpret_cast(pPropValue)); + } else if (propName == UR_EVENT_INFO_COMMAND_EXECUTION_STATUS) { + /* If the CL_EVENT_COMMAND_EXECUTION_STATUS info value is CL_QUEUED, + * change it to CL_SUBMITTED. sycl::info::event::event_command_status + * has no equivalent to CL_QUEUED. + * + * FIXME UR Port: This should not be part of the UR adapter. Since + * PI_QUEUED exists, SYCL RT should be changed to handle this situation. + * In addition, SYCL RT is relying on PI_QUEUED status to make sure that + * the queues are flushed. */ + const auto param_value_int = + static_cast(pPropValue); + if (*param_value_int == UR_EVENT_STATUS_QUEUED) { + *param_value_int = UR_EVENT_STATUS_SUBMITTED; + } } } } + } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/event.hpp b/source/adapters/opencl/event.hpp index 7574e218a2..70b577eb7e 100644 --- a/source/adapters/opencl/event.hpp +++ b/source/adapters/opencl/event.hpp @@ -18,12 +18,35 @@ struct ur_event_handle_t_ { native_type Event; ur_context_handle_t Context; ur_queue_handle_t Queue; + std::atomic RefCount = 0; ur_event_handle_t_(native_type Event, ur_context_handle_t Ctx, ur_queue_handle_t Queue) - : Event(Event), Context(Ctx), Queue(Queue) {} + : Event(Event), Context(Ctx), Queue(Queue) { + RefCount = 1; + if (Context) { + urContextRetain(Context); + } + if (Queue) { + urQueueRetain(Queue); + } + } - ~ur_event_handle_t_() {} + ~ur_event_handle_t_() { + if (Context) { + urContextRelease(Context); + } + if (Queue) { + urQueueRelease(Queue); + } + clReleaseEvent(Event); + } + + uint32_t incrementReferenceCount() noexcept { return ++RefCount; } + + uint32_t decrementReferenceCount() noexcept { return --RefCount; } + + uint32_t getReferenceCount() const noexcept { return RefCount; } native_type get() { return Event; } }; diff --git a/source/adapters/opencl/kernel.cpp b/source/adapters/opencl/kernel.cpp index 5295740af5..6e2e13e5f1 100644 --- a/source/adapters/opencl/kernel.cpp +++ b/source/adapters/opencl/kernel.cpp @@ -87,7 +87,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel, // We need this little bit of ugliness because the UR NUM_ARGS property is // size_t whereas the CL one is cl_uint. We should consider changing that see // #1038 - if (propName == UR_KERNEL_INFO_NUM_ARGS) { + switch (propName) { + case UR_KERNEL_INFO_NUM_ARGS: { if (pPropSizeRet) *pPropSizeRet = sizeof(size_t); cl_uint NumArgs = 0; @@ -99,11 +100,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel, return UR_RESULT_ERROR_INVALID_SIZE; *static_cast(pPropValue) = static_cast(NumArgs); } - } else if (propName == UR_KERNEL_INFO_PROGRAM) { + return UR_RESULT_SUCCESS; + } + case UR_KERNEL_INFO_PROGRAM: { return ReturnValue(hKernel->Program); - } else if (propName == UR_KERNEL_INFO_CONTEXT) { + } + case UR_KERNEL_INFO_CONTEXT: { return ReturnValue(hKernel->Context); - } else { + } + case UR_KERNEL_INFO_REFERENCE_COUNT: { + return ReturnValue(hKernel->getReferenceCount()); + } + default: { size_t CheckPropSize = 0; cl_int ClResult = clGetKernelInfo(hKernel->get(), mapURKernelInfoToCL(propName), propSize, @@ -116,6 +124,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel, *pPropSizeRet = CheckPropSize; } } + } return UR_RESULT_SUCCESS; } @@ -263,13 +272,15 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, } UR_APIEXPORT ur_result_t UR_APICALL urKernelRetain(ur_kernel_handle_t hKernel) { - CL_RETURN_ON_FAILURE(clRetainKernel(hKernel->get())); + hKernel->incrementReferenceCount(); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urKernelRelease(ur_kernel_handle_t hKernel) { - CL_RETURN_ON_FAILURE(clReleaseKernel(hKernel->get())); + if (hKernel->decrementReferenceCount() == 0) { + delete hKernel; + } return UR_RESULT_SUCCESS; } @@ -388,7 +399,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle( UR_APIEXPORT ur_result_t UR_APICALL urKernelCreateWithNativeHandle( ur_native_handle_t hNativeKernel, ur_context_handle_t hContext, ur_program_handle_t hProgram, - const ur_kernel_native_properties_t *pProperties, + [[maybe_unused]] const ur_kernel_native_properties_t *pProperties, ur_kernel_handle_t *phKernel) { cl_kernel NativeHandle = reinterpret_cast(hNativeKernel); @@ -396,7 +407,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelCreateWithNativeHandle( NativeHandle, hProgram, hContext, *phKernel)); if (!pProperties || !pProperties->isNativeHandleOwned) { - return urKernelRetain(*phKernel); + CL_RETURN_ON_FAILURE(clRetainKernel((*phKernel)->get())); } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/kernel.hpp b/source/adapters/opencl/kernel.hpp index c1e452cadc..f4ebb3e61b 100644 --- a/source/adapters/opencl/kernel.hpp +++ b/source/adapters/opencl/kernel.hpp @@ -19,12 +19,35 @@ struct ur_kernel_handle_t_ { native_type Kernel; ur_program_handle_t Program; ur_context_handle_t Context; + std::atomic RefCount = 0; ur_kernel_handle_t_(native_type Kernel, ur_program_handle_t Program, ur_context_handle_t Context) - : Kernel(Kernel), Program(Program), Context(Context) {} + : Kernel(Kernel), Program(Program), Context(Context) { + RefCount = 1; + if (Program) { + urProgramRetain(Program); + } + if (Context) { + urContextRetain(Context); + } + } + + ~ur_kernel_handle_t_() { + clReleaseKernel(Kernel); + if (Program) { + urProgramRelease(Program); + } + if (Context) { + urContextRelease(Context); + } + } + + uint32_t incrementReferenceCount() noexcept { return ++RefCount; } + + uint32_t decrementReferenceCount() noexcept { return --RefCount; } - ~ur_kernel_handle_t_() {} + uint32_t getReferenceCount() const noexcept { return RefCount; } static ur_result_t makeWithNative(native_type NativeKernel, ur_program_handle_t Program, @@ -42,6 +65,7 @@ struct ur_kernel_handle_t_ { reinterpret_cast(CLProgram); UR_RETURN_ON_FAILURE(urProgramCreateWithNativeHandle( NativeProgram, nullptr, nullptr, &(URKernel->Program))); + UR_RETURN_ON_FAILURE(urProgramRetain(URKernel->Program)); } cl_context CLContext; CL_RETURN_ON_FAILURE(clGetKernelInfo(NativeKernel, CL_KERNEL_CONTEXT, @@ -52,6 +76,7 @@ struct ur_kernel_handle_t_ { reinterpret_cast(CLContext); UR_RETURN_ON_FAILURE(urContextCreateWithNativeHandle( NativeContext, 0, nullptr, nullptr, &(URKernel->Context))); + UR_RETURN_ON_FAILURE(urContextRetain(URKernel->Context)); } else if (Context->get() != CLContext) { return UR_RESULT_ERROR_INVALID_CONTEXT; } diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp index 9056188848..2de2287c91 100644 --- a/source/adapters/opencl/memory.cpp +++ b/source/adapters/opencl/memory.cpp @@ -227,7 +227,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( ur_context_handle_t hContext, ur_mem_flags_t flags, size_t size, const ur_buffer_properties_t *pProperties, ur_mem_handle_t *phBuffer) { cl_int RetErr = CL_INVALID_OPERATION; - UR_RETURN_ON_FAILURE(urContextRetain(hContext)); if (pProperties) { // TODO: need to check if all properties are supported by OpenCL RT and // ignore unsupported @@ -346,6 +345,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( cl_mem Buffer = clCreateSubBuffer( hBuffer->get(), static_cast(flags), BufferCreateType, &BufferRegion, cl_adapter::cast(&RetErr)); + if (RetErr == CL_INVALID_VALUE) { + size_t BufferSize = 0; + CL_RETURN_ON_FAILURE(clGetMemObjectInfo(hBuffer->get(), CL_MEM_SIZE, + sizeof(BufferSize), &BufferSize, + nullptr)); + if (BufferRegion.size + BufferRegion.origin > BufferSize) + return UR_RESULT_ERROR_INVALID_BUFFER_SIZE; + } CL_RETURN_ON_FAILURE(RetErr); auto URMem = std::make_unique(Buffer, hBuffer->Context); *phMem = URMem.release(); @@ -354,14 +361,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( } catch (...) { return UR_RESULT_ERROR_UNKNOWN; } - - if (RetErr == CL_INVALID_VALUE) { - size_t BufferSize = 0; - CL_RETURN_ON_FAILURE(clGetMemObjectInfo( - hBuffer->get(), CL_MEM_SIZE, sizeof(BufferSize), &BufferSize, nullptr)); - if (BufferRegion.size + BufferRegion.origin > BufferSize) - return UR_RESULT_ERROR_INVALID_BUFFER_SIZE; - } return mapCLErrorToUR(RetErr); } @@ -377,7 +376,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( UR_RETURN_ON_FAILURE( ur_mem_handle_t_::makeWithNative(NativeHandle, hContext, *phMem)); if (!pProperties || !pProperties->isNativeHandleOwned) { - return urMemRetain(*phMem); + CL_RETURN_ON_FAILURE(clRetainMemObject((*phMem)->get())); } return UR_RESULT_SUCCESS; } @@ -391,7 +390,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( UR_RETURN_ON_FAILURE( ur_mem_handle_t_::makeWithNative(NativeHandle, hContext, *phMem)); if (!pProperties || !pProperties->isNativeHandleOwned) { - return urMemRetain(*phMem); + CL_RETURN_ON_FAILURE(clRetainMemObject((*phMem)->get())); } return UR_RESULT_SUCCESS; } @@ -449,11 +448,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory, } UR_APIEXPORT ur_result_t UR_APICALL urMemRetain(ur_mem_handle_t hMem) { - CL_RETURN_ON_FAILURE(clRetainMemObject(hMem->get())); + hMem->incrementReferenceCount(); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urMemRelease(ur_mem_handle_t hMem) { - CL_RETURN_ON_FAILURE(clReleaseMemObject(hMem->get())); + if (hMem->decrementReferenceCount() == 0) { + delete hMem; + } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/memory.hpp b/source/adapters/opencl/memory.hpp index 48fe0429b6..a91001d1a6 100644 --- a/source/adapters/opencl/memory.hpp +++ b/source/adapters/opencl/memory.hpp @@ -17,11 +17,28 @@ struct ur_mem_handle_t_ { using native_type = cl_mem; native_type Memory; ur_context_handle_t Context; + std::atomic RefCount = 0; ur_mem_handle_t_(native_type Mem, ur_context_handle_t Ctx) - : Memory(Mem), Context(Ctx) {} + : Memory(Mem), Context(Ctx) { + RefCount = 1; + if (Context) { + urContextRetain(Context); + } + } + + ~ur_mem_handle_t_() { + clReleaseMemObject(Memory); + if (Context) { + urContextRelease(Context); + } + } + + uint32_t incrementReferenceCount() noexcept { return ++RefCount; } + + uint32_t decrementReferenceCount() noexcept { return --RefCount; } - ~ur_mem_handle_t_() {} + uint32_t getReferenceCount() const noexcept { return RefCount; } static ur_result_t makeWithNative(native_type NativeMem, ur_context_handle_t Ctx, @@ -36,6 +53,7 @@ struct ur_mem_handle_t_ { reinterpret_cast(CLContext); UR_RETURN_ON_FAILURE(urContextCreateWithNativeHandle( NativeContext, 0, nullptr, nullptr, &(URMem->Context))); + UR_RETURN_ON_FAILURE(urContextRetain(URMem->Context)); } Mem = URMem.release(); } catch (std::bad_alloc &) { diff --git a/source/adapters/opencl/platform.cpp b/source/adapters/opencl/platform.cpp index 7f02da4468..7460491eb8 100644 --- a/source/adapters/opencl/platform.cpp +++ b/source/adapters/opencl/platform.cpp @@ -68,7 +68,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, ur_platform_handle_t *phPlatforms, uint32_t *pNumPlatforms) { - static std::vector URPlatforms; + static std::vector> URPlatforms; static std::once_flag InitFlag; static uint32_t NumPlatforms = 0; cl_int Result = CL_SUCCESS; @@ -113,7 +113,7 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, } if (NumEntries && phPlatforms) { for (uint32_t i = 0; i < NumEntries; i++) { - phPlatforms[i] = URPlatforms[i]; + phPlatforms[i] = &(*URPlatforms[i]); } } return mapCLErrorToUR(Result); diff --git a/source/adapters/opencl/platform.hpp b/source/adapters/opencl/platform.hpp index b33b9a9365..09cc7742f5 100644 --- a/source/adapters/opencl/platform.hpp +++ b/source/adapters/opencl/platform.hpp @@ -21,7 +21,12 @@ struct ur_platform_handle_t_ { ur_platform_handle_t_(native_type Plat) : Platform(Plat) {} - ~ur_platform_handle_t_() {} + ~ur_platform_handle_t_() { + for (auto &Dev : Devices) { + Dev.reset(); + } + Devices.clear(); + } template ur_result_t getExtFunc(T CachedExtFunc, const char *FuncName, T *Fptr) { diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index 381404ef34..11ebaec443 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -16,27 +16,6 @@ #include -static ur_result_t getDevicesFromProgram( - ur_program_handle_t hProgram, - std::unique_ptr> &DevicesInProgram) { - - if (!hProgram->Context || !hProgram->Context->DeviceCount) { - return UR_RESULT_ERROR_INVALID_PROGRAM; - } - cl_uint DeviceCount = hProgram->Context->DeviceCount; - try { - DevicesInProgram = std::make_unique>(DeviceCount); - for (uint32_t i = 0; i < DeviceCount; i++) { - (*DevicesInProgram)[i] = hProgram->Context->Devices[i]->get(); - } - } catch (std::bad_alloc &) { - return UR_RESULT_ERROR_OUT_OF_RESOURCES; - } catch (...) { - return UR_RESULT_ERROR_UNKNOWN; - } - return UR_RESULT_SUCCESS; -} - UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( ur_context_handle_t hContext, const void *pIL, size_t length, const ur_program_properties_t *, ur_program_handle_t *phProgram) { @@ -157,12 +136,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCompile([[maybe_unused]] ur_context_handle_t hContext, ur_program_handle_t hProgram, const char *pOptions) { - std::unique_ptr> DevicesInProgram; - CL_RETURN_ON_FAILURE(getDevicesFromProgram(hProgram, DevicesInProgram)); + uint32_t DeviceCount = hProgram->Context->DeviceCount; + std::vector CLDevicesInProgram(DeviceCount); + for (uint32_t i = 0; i < DeviceCount; i++) { + CLDevicesInProgram[i] = hProgram->Context->Devices[i]->get(); + } - CL_RETURN_ON_FAILURE(clCompileProgram( - hProgram->get(), DevicesInProgram->size(), DevicesInProgram->data(), - pOptions, 0, nullptr, nullptr, nullptr, nullptr)); + CL_RETURN_ON_FAILURE(clCompileProgram(hProgram->get(), DeviceCount, + CLDevicesInProgram.data(), pOptions, 0, + nullptr, nullptr, nullptr, nullptr)); return UR_RESULT_SUCCESS; } @@ -216,6 +198,9 @@ urProgramGetInfo(ur_program_handle_t hProgram, ur_program_info_t propName, return ReturnValue(&hProgram->Context->Devices[0], hProgram->Context->DeviceCount); } + case UR_PROGRAM_INFO_REFERENCE_COUNT: { + return ReturnValue(hProgram->getReferenceCount()); + } default: { size_t CheckPropSize = 0; auto ClResult = clGetProgramInfo(hProgram->get(), CLPropName, propSize, @@ -237,12 +222,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild([[maybe_unused]] ur_context_handle_t hContext, ur_program_handle_t hProgram, const char *pOptions) { - std::unique_ptr> DevicesInProgram; - CL_RETURN_ON_FAILURE(getDevicesFromProgram(hProgram, DevicesInProgram)); - - CL_RETURN_ON_FAILURE(clBuildProgram(hProgram->get(), DevicesInProgram->size(), - DevicesInProgram->data(), pOptions, - nullptr, nullptr)); + uint32_t DeviceCount = hProgram->Context->DeviceCount; + std::vector CLDevicesInProgram(DeviceCount); + for (uint32_t i = 0; i < DeviceCount; i++) { + CLDevicesInProgram[i] = hProgram->Context->Devices[i]->get(); + } + CL_RETURN_ON_FAILURE( + clBuildProgram(hProgram->get(), cl_adapter::cast(DeviceCount), + CLDevicesInProgram.data(), pOptions, nullptr, nullptr)); return UR_RESULT_SUCCESS; } @@ -360,15 +347,15 @@ urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t hDevice, UR_APIEXPORT ur_result_t UR_APICALL urProgramRetain(ur_program_handle_t hProgram) { - - CL_RETURN_ON_FAILURE(clRetainProgram(hProgram->get())); + hProgram->incrementReferenceCount(); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urProgramRelease(ur_program_handle_t hProgram) { - - CL_RETURN_ON_FAILURE(clReleaseProgram(hProgram->get())); + if (hProgram->decrementReferenceCount() == 0) { + delete hProgram; + } return UR_RESULT_SUCCESS; } @@ -388,7 +375,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithNativeHandle( UR_RETURN_ON_FAILURE( ur_program_handle_t_::makeWithNative(NativeHandle, hContext, *phProgram)); if (!pProperties || !pProperties->isNativeHandleOwned) { - return urProgramRetain(*phProgram); + CL_RETURN_ON_FAILURE(clRetainProgram((*phProgram)->get())); } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/program.hpp b/source/adapters/opencl/program.hpp index a90b30e473..85ddfde6da 100644 --- a/source/adapters/opencl/program.hpp +++ b/source/adapters/opencl/program.hpp @@ -17,11 +17,28 @@ struct ur_program_handle_t_ { using native_type = cl_program; native_type Program; ur_context_handle_t Context; + std::atomic RefCount = 0; ur_program_handle_t_(native_type Prog, ur_context_handle_t Ctx) - : Program(Prog), Context(Ctx) {} + : Program(Prog), Context(Ctx) { + RefCount = 1; + if (Context) { + urContextRetain(Context); + } + } + + ~ur_program_handle_t_() { + clReleaseProgram(Program); + if (Context) { + urContextRelease(Context); + } + } + + uint32_t incrementReferenceCount() noexcept { return ++RefCount; } + + uint32_t decrementReferenceCount() noexcept { return --RefCount; } - ~ur_program_handle_t_() {} + uint32_t getReferenceCount() const noexcept { return RefCount; } static ur_result_t makeWithNative(native_type NativeProg, ur_context_handle_t Context, @@ -38,6 +55,7 @@ struct ur_program_handle_t_ { reinterpret_cast(CLContext); UR_RETURN_ON_FAILURE(urContextCreateWithNativeHandle( NativeContext, 0, nullptr, nullptr, &(URProgram->Context))); + UR_RETURN_ON_FAILURE(urContextRetain(URProgram->Context)); } Program = URProgram.release(); } catch (std::bad_alloc &) { diff --git a/source/adapters/opencl/queue.cpp b/source/adapters/opencl/queue.cpp index 9a9e741dcd..605ef2fcf2 100644 --- a/source/adapters/opencl/queue.cpp +++ b/source/adapters/opencl/queue.cpp @@ -137,23 +137,27 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, } cl_command_queue_info CLCommandQueueInfo = mapURQueueInfoToCL(propName); UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); - - if (propName == UR_QUEUE_INFO_CONTEXT) { + switch (propName) { + case UR_QUEUE_INFO_CONTEXT: { return ReturnValue(hQueue->Context); } - if (propName == UR_QUEUE_INFO_DEVICE) { + case UR_QUEUE_INFO_DEVICE: { return ReturnValue(hQueue->Device); } // Unfortunately the size of cl_bitfield (unsigned long) doesn't line up with // our enums (forced to be sizeof(uint32_t)) so this needs special handling. - if (propName == UR_QUEUE_INFO_FLAGS) { + case UR_QUEUE_INFO_FLAGS: { cl_command_queue_properties QueueProperties = 0; CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( hQueue->get(), CLCommandQueueInfo, sizeof(QueueProperties), &QueueProperties, nullptr)); return ReturnValue(mapCLQueuePropsToUR(QueueProperties)); - } else { + } + case UR_QUEUE_INFO_REFERENCE_COUNT: { + return ReturnValue(hQueue->getReferenceCount()); + } + default: { size_t CheckPropSize = 0; cl_int RetErr = clGetCommandQueueInfo(hQueue->get(), CLCommandQueueInfo, propSize, pPropValue, &CheckPropSize); @@ -165,6 +169,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, *pPropSizeRet = CheckPropSize; } } + } return UR_RESULT_SUCCESS; } @@ -205,13 +210,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueFlush(ur_queue_handle_t hQueue) { } UR_APIEXPORT ur_result_t UR_APICALL urQueueRetain(ur_queue_handle_t hQueue) { - cl_int RetErr = clRetainCommandQueue(hQueue->get()); - CL_RETURN_ON_FAILURE(RetErr); + hQueue->incrementReferenceCount(); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(ur_queue_handle_t hQueue) { - cl_int RetErr = clReleaseCommandQueue(hQueue->get()); - CL_RETURN_ON_FAILURE(RetErr); + if (hQueue->decrementReferenceCount() == 0) { + delete hQueue; + } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/queue.hpp b/source/adapters/opencl/queue.hpp index 8bbdb04d2a..63d6a6ea50 100644 --- a/source/adapters/opencl/queue.hpp +++ b/source/adapters/opencl/queue.hpp @@ -18,10 +18,19 @@ struct ur_queue_handle_t_ { native_type Queue; ur_context_handle_t Context; ur_device_handle_t Device; + std::atomic RefCount = 0; ur_queue_handle_t_(native_type Queue, ur_context_handle_t Ctx, ur_device_handle_t Dev) - : Queue(Queue), Context(Ctx), Device(Dev) {} + : Queue(Queue), Context(Ctx), Device(Dev) { + RefCount = 1; + if (Device) { + urDeviceRetain(Device); + } + if (Context) { + urContextRetain(Context); + } + } static ur_result_t makeWithNative(native_type NativeQueue, ur_context_handle_t Context, @@ -39,6 +48,7 @@ struct ur_queue_handle_t_ { reinterpret_cast(CLContext); UR_RETURN_ON_FAILURE(urContextCreateWithNativeHandle( NativeContext, 0, nullptr, nullptr, &(URQueue->Context))); + UR_RETURN_ON_FAILURE(urContextRetain(URQueue->Context)); } if (!Device) { cl_device_id CLDevice; @@ -49,6 +59,7 @@ struct ur_queue_handle_t_ { reinterpret_cast(CLDevice); UR_RETURN_ON_FAILURE(urDeviceCreateWithNativeHandle( NativeDevice, nullptr, nullptr, &(URQueue->Device))); + UR_RETURN_ON_FAILURE(urDeviceRetain(URQueue->Device)); } Queue = URQueue.release(); } catch (std::bad_alloc &) { @@ -59,7 +70,21 @@ struct ur_queue_handle_t_ { return UR_RESULT_SUCCESS; } - ~ur_queue_handle_t_() {} + ~ur_queue_handle_t_() { + clReleaseCommandQueue(Queue); + if (Device) { + urDeviceRelease(Device); + } + if (Context) { + urContextRelease(Context); + } + } + + uint32_t incrementReferenceCount() noexcept { return ++RefCount; } + + uint32_t decrementReferenceCount() noexcept { return --RefCount; } + + uint32_t getReferenceCount() const noexcept { return RefCount; } native_type get() { return Queue; } }; diff --git a/source/adapters/opencl/sampler.cpp b/source/adapters/opencl/sampler.cpp index 055e34038d..0f88bf3a29 100644 --- a/source/adapters/opencl/sampler.cpp +++ b/source/adapters/opencl/sampler.cpp @@ -168,34 +168,45 @@ urSamplerGetInfo(ur_sampler_handle_t hSampler, ur_sampler_info_t propName, sizeof(ur_sampler_addressing_mode_t)); UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); - if (SamplerInfo == CL_SAMPLER_CONTEXT) { + switch (propName) { + case UR_SAMPLER_INFO_CONTEXT: { return ReturnValue(hSampler->Context); } - size_t CheckPropSize = 0; - ur_result_t Err = mapCLErrorToUR(clGetSamplerInfo( - hSampler->get(), SamplerInfo, propSize, pPropValue, &CheckPropSize)); - if (pPropValue && CheckPropSize != propSize) { - return UR_RESULT_ERROR_INVALID_SIZE; + case UR_SAMPLER_INFO_REFERENCE_COUNT: { + return ReturnValue(hSampler->getReferenceCount()); + } + default: { + size_t CheckPropSize = 0; + ur_result_t Err = mapCLErrorToUR(clGetSamplerInfo( + hSampler->get(), SamplerInfo, propSize, pPropValue, &CheckPropSize)); + if (pPropValue && CheckPropSize != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + CL_RETURN_ON_FAILURE(Err); + if (pPropSizeRet) { + *pPropSizeRet = CheckPropSize; + } + + // Convert OpenCL returns to UR + cl2URSamplerInfoValue(SamplerInfo, pPropValue); } - CL_RETURN_ON_FAILURE(Err); - if (pPropSizeRet) { - *pPropSizeRet = CheckPropSize; } - - // Convert OpenCL returns to UR - cl2URSamplerInfoValue(SamplerInfo, pPropValue); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urSamplerRetain(ur_sampler_handle_t hSampler) { - return mapCLErrorToUR(clRetainSampler(hSampler->get())); + hSampler->incrementReferenceCount(); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urSamplerRelease(ur_sampler_handle_t hSampler) { - return mapCLErrorToUR(clReleaseSampler(hSampler->get())); + if (hSampler->decrementReferenceCount() == 0) { + delete hSampler; + } + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetNativeHandle( @@ -206,7 +217,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetNativeHandle( UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( ur_native_handle_t hNativeSampler, ur_context_handle_t hContext, - const ur_sampler_native_properties_t *pProperties, ur_sampler_handle_t *phSampler) { + const ur_sampler_native_properties_t *pProperties, + ur_sampler_handle_t *phSampler) { cl_sampler NativeHandle = reinterpret_cast(hNativeSampler); try { auto URSampler = @@ -217,8 +229,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( } catch (...) { return UR_RESULT_ERROR_UNKNOWN; } + if (!pProperties || !pProperties->isNativeHandleOwned) { - return clRetainSampler(NativeHandle); + CL_RETURN_ON_FAILURE(clRetainSampler(NativeHandle)); } + return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/sampler.hpp b/source/adapters/opencl/sampler.hpp index 8bee3c50e4..c3f01e124b 100644 --- a/source/adapters/opencl/sampler.hpp +++ b/source/adapters/opencl/sampler.hpp @@ -17,11 +17,28 @@ struct ur_sampler_handle_t_ { using native_type = cl_sampler; native_type Sampler; ur_context_handle_t Context; + std::atomic RefCount = 0; ur_sampler_handle_t_(native_type Sampler, ur_context_handle_t Ctx) - : Sampler(Sampler), Context(Ctx) {} + : Sampler(Sampler), Context(Ctx) { + RefCount = 1; + if (Context) { + urContextRetain(Context); + } + } - ~ur_sampler_handle_t_() {} + ~ur_sampler_handle_t_() { + clReleaseSampler(Sampler); + if (Context) { + urContextRelease(Context); + } + } + + uint32_t incrementReferenceCount() noexcept { return ++RefCount; } + + uint32_t decrementReferenceCount() noexcept { return --RefCount; } + + uint32_t getReferenceCount() const noexcept { return RefCount; } native_type get() { return Sampler; } }; From 51dacd4d4781d94fd61bf068a74fa101bef9e919 Mon Sep 17 00:00:00 2001 From: omarahmed1111 Date: Thu, 25 Jan 2024 13:06:48 +0000 Subject: [PATCH 17/19] Modify handling of CreateWithNative --- source/adapters/opencl/context.cpp | 12 ++++--- source/adapters/opencl/context.hpp | 41 ++++++++++----------- source/adapters/opencl/device.cpp | 8 +++++ source/adapters/opencl/event.hpp | 8 ++--- source/adapters/opencl/kernel.cpp | 2 +- source/adapters/opencl/kernel.hpp | 51 ++++++++++---------------- source/adapters/opencl/memory.cpp | 6 ++-- source/adapters/opencl/memory.hpp | 29 +++++++-------- source/adapters/opencl/platform.hpp | 1 - source/adapters/opencl/program.cpp | 11 +++--- source/adapters/opencl/program.hpp | 30 +++++++--------- source/adapters/opencl/queue.cpp | 7 ++-- source/adapters/opencl/queue.hpp | 56 ++++++++++++----------------- source/adapters/opencl/sampler.hpp | 8 ++--- 14 files changed, 119 insertions(+), 151 deletions(-) diff --git a/source/adapters/opencl/context.cpp b/source/adapters/opencl/context.cpp index 59b3fefd0a..1382ec3c47 100644 --- a/source/adapters/opencl/context.cpp +++ b/source/adapters/opencl/context.cpp @@ -97,17 +97,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle( } UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle( - ur_native_handle_t hNativeContext, uint32_t numDevices, const ur_device_handle_t *phDevices, - const ur_context_native_properties_t *pProperties, ur_context_handle_t *phContext) { + ur_native_handle_t hNativeContext, uint32_t numDevices, + const ur_device_handle_t *phDevices, + const ur_context_native_properties_t *pProperties, + ur_context_handle_t *phContext) { cl_context NativeHandle = reinterpret_cast(hNativeContext); - auto URContext = std::make_unique( - NativeHandle, numDevices, phDevices); UR_RETURN_ON_FAILURE(ur_context_handle_t_::makeWithNative( NativeHandle, numDevices, phDevices, *phContext)); + if (!pProperties || !pProperties->isNativeHandleOwned) { - return clRetainContext(NativeHandle); + CL_RETURN_ON_FAILURE(clRetainContext(NativeHandle)); } + return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/context.hpp b/source/adapters/opencl/context.hpp index 8c40e3bcf5..555636b1b8 100644 --- a/source/adapters/opencl/context.hpp +++ b/source/adapters/opencl/context.hpp @@ -40,29 +40,28 @@ struct ur_context_handle_t_ { static ur_result_t makeWithNative(native_type Ctx, uint32_t DevCount, const ur_device_handle_t *phDevices, ur_context_handle_t &Context) { + if (!phDevices) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } try { - auto URContext = - std::make_unique(Ctx, DevCount, phDevices); - CL_RETURN_ON_FAILURE(clRetainContext(Ctx)); - native_type &NativeContext = URContext->Context; - uint32_t &DeviceCount = URContext->DeviceCount; - if (!DeviceCount) { - CL_RETURN_ON_FAILURE( - clGetContextInfo(NativeContext, CL_CONTEXT_NUM_DEVICES, - sizeof(DeviceCount), &DeviceCount, nullptr)); - std::vector CLDevices(DeviceCount); - CL_RETURN_ON_FAILURE(clGetContextInfo(NativeContext, CL_CONTEXT_DEVICES, - sizeof(CLDevices), - CLDevices.data(), nullptr)); - URContext->Devices.resize(DeviceCount); - for (uint32_t i = 0; i < DeviceCount; i++) { - ur_native_handle_t NativeDevice = - reinterpret_cast(CLDevices[i]); - UR_RETURN_ON_FAILURE(urDeviceCreateWithNativeHandle( - NativeDevice, nullptr, nullptr, &(URContext->Devices[i]))); - UR_RETURN_ON_FAILURE(urDeviceRetain(URContext->Devices[i])); + uint32_t CLDeviceCount; + CL_RETURN_ON_FAILURE(clGetContextInfo(Ctx, CL_CONTEXT_NUM_DEVICES, + sizeof(CLDeviceCount), + &CLDeviceCount, nullptr)); + std::vector CLDevices(CLDeviceCount); + CL_RETURN_ON_FAILURE(clGetContextInfo(Ctx, CL_CONTEXT_DEVICES, + sizeof(CLDevices), CLDevices.data(), + nullptr)); + if (DevCount != CLDeviceCount) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + for (uint32_t i = 0; i < DevCount; i++) { + if (phDevices[i]->get() != CLDevices[i]) { + return UR_RESULT_ERROR_INVALID_CONTEXT; } } + auto URContext = + std::make_unique(Ctx, DevCount, phDevices); Context = URContext.release(); } catch (std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_RESOURCES; @@ -81,4 +80,6 @@ struct ur_context_handle_t_ { } native_type get() { return Context; } + + const std::vector &getDevices() { return Devices; } }; diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index 88bc25b803..8d323c841a 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -988,8 +988,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( CLSubDevices[i], hDevice->Platform, hDevice); phSubDevices[i] = URSubDevice.release(); } catch (std::bad_alloc &) { + // Delete all the successfully created subdevices before the failed one. + for (uint32_t j = 0; j < i; j++) { + delete phSubDevices[j]; + } return UR_RESULT_ERROR_OUT_OF_RESOURCES; } catch (...) { + // Delete all the successfully created subdevices before the failed one. + for (uint32_t j = 0; j < i; j++) { + delete phSubDevices[j]; + } return UR_RESULT_ERROR_UNKNOWN; } } diff --git a/source/adapters/opencl/event.hpp b/source/adapters/opencl/event.hpp index 70b577eb7e..f7f17e7e1e 100644 --- a/source/adapters/opencl/event.hpp +++ b/source/adapters/opencl/event.hpp @@ -24,18 +24,14 @@ struct ur_event_handle_t_ { ur_queue_handle_t Queue) : Event(Event), Context(Ctx), Queue(Queue) { RefCount = 1; - if (Context) { - urContextRetain(Context); - } + urContextRetain(Context); if (Queue) { urQueueRetain(Queue); } } ~ur_event_handle_t_() { - if (Context) { - urContextRelease(Context); - } + urContextRelease(Context); if (Queue) { urQueueRelease(Queue); } diff --git a/source/adapters/opencl/kernel.cpp b/source/adapters/opencl/kernel.cpp index 6e2e13e5f1..899af56fe7 100644 --- a/source/adapters/opencl/kernel.cpp +++ b/source/adapters/opencl/kernel.cpp @@ -407,7 +407,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelCreateWithNativeHandle( NativeHandle, hProgram, hContext, *phKernel)); if (!pProperties || !pProperties->isNativeHandleOwned) { - CL_RETURN_ON_FAILURE(clRetainKernel((*phKernel)->get())); + CL_RETURN_ON_FAILURE(clRetainKernel(NativeHandle)); } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/kernel.hpp b/source/adapters/opencl/kernel.hpp index f4ebb3e61b..44651ebfc7 100644 --- a/source/adapters/opencl/kernel.hpp +++ b/source/adapters/opencl/kernel.hpp @@ -11,6 +11,7 @@ #include "common.hpp" #include "context.hpp" +#include "program.hpp" #include @@ -25,22 +26,14 @@ struct ur_kernel_handle_t_ { ur_context_handle_t Context) : Kernel(Kernel), Program(Program), Context(Context) { RefCount = 1; - if (Program) { - urProgramRetain(Program); - } - if (Context) { - urContextRetain(Context); - } + urProgramRetain(Program); + urContextRetain(Context); } ~ur_kernel_handle_t_() { clReleaseKernel(Kernel); - if (Program) { - urProgramRelease(Program); - } - if (Context) { - urContextRelease(Context); - } + urProgramRelease(Program); + urContextRelease(Context); } uint32_t incrementReferenceCount() noexcept { return ++RefCount; } @@ -53,33 +46,27 @@ struct ur_kernel_handle_t_ { ur_program_handle_t Program, ur_context_handle_t Context, ur_kernel_handle_t &Kernel) { + if (!Program || !Context) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } try { - auto URKernel = - std::make_unique(NativeKernel, Program, Context); - if (!Program) { - cl_program CLProgram; - CL_RETURN_ON_FAILURE(clGetKernelInfo(NativeKernel, CL_KERNEL_PROGRAM, - sizeof(CLProgram), &CLProgram, - nullptr)); - ur_native_handle_t NativeProgram = - reinterpret_cast(CLProgram); - UR_RETURN_ON_FAILURE(urProgramCreateWithNativeHandle( - NativeProgram, nullptr, nullptr, &(URKernel->Program))); - UR_RETURN_ON_FAILURE(urProgramRetain(URKernel->Program)); - } cl_context CLContext; CL_RETURN_ON_FAILURE(clGetKernelInfo(NativeKernel, CL_KERNEL_CONTEXT, sizeof(CLContext), &CLContext, nullptr)); - if (!Context) { - ur_native_handle_t NativeContext = - reinterpret_cast(CLContext); - UR_RETURN_ON_FAILURE(urContextCreateWithNativeHandle( - NativeContext, 0, nullptr, nullptr, &(URKernel->Context))); - UR_RETURN_ON_FAILURE(urContextRetain(URKernel->Context)); - } else if (Context->get() != CLContext) { + cl_program CLProgram; + CL_RETURN_ON_FAILURE(clGetKernelInfo(NativeKernel, CL_KERNEL_PROGRAM, + sizeof(CLProgram), &CLProgram, + nullptr)); + + if (Context->get() != CLContext) { return UR_RESULT_ERROR_INVALID_CONTEXT; } + if (Program->get() != CLProgram) { + return UR_RESULT_ERROR_INVALID_PROGRAM; + } + auto URKernel = + std::make_unique(NativeKernel, Program, Context); Kernel = URKernel.release(); } catch (std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_RESOURCES; diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp index 2de2287c91..edeb9b9192 100644 --- a/source/adapters/opencl/memory.cpp +++ b/source/adapters/opencl/memory.cpp @@ -364,8 +364,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( return mapCLErrorToUR(RetErr); } -UR_APIEXPORT ur_result_t UR_APICALL -urMemGetNativeHandle(ur_mem_handle_t hMem, ur_native_handle_t *phNativeMem) { +UR_APIEXPORT ur_result_t UR_APICALL urMemGetNativeHandle( + ur_mem_handle_t hMem, ur_device_handle_t, ur_native_handle_t *phNativeMem) { return getNativeHandle(hMem->get(), phNativeMem); } @@ -390,7 +390,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( UR_RETURN_ON_FAILURE( ur_mem_handle_t_::makeWithNative(NativeHandle, hContext, *phMem)); if (!pProperties || !pProperties->isNativeHandleOwned) { - CL_RETURN_ON_FAILURE(clRetainMemObject((*phMem)->get())); + CL_RETURN_ON_FAILURE(clRetainMemObject(NativeHandle)); } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/memory.hpp b/source/adapters/opencl/memory.hpp index a91001d1a6..df8794c897 100644 --- a/source/adapters/opencl/memory.hpp +++ b/source/adapters/opencl/memory.hpp @@ -10,6 +10,7 @@ #pragma once #include "common.hpp" +#include "context.hpp" #include @@ -22,16 +23,12 @@ struct ur_mem_handle_t_ { ur_mem_handle_t_(native_type Mem, ur_context_handle_t Ctx) : Memory(Mem), Context(Ctx) { RefCount = 1; - if (Context) { - urContextRetain(Context); - } + urContextRetain(Context); } ~ur_mem_handle_t_() { clReleaseMemObject(Memory); - if (Context) { - urContextRelease(Context); - } + urContextRelease(Context); } uint32_t incrementReferenceCount() noexcept { return ++RefCount; } @@ -43,18 +40,18 @@ struct ur_mem_handle_t_ { static ur_result_t makeWithNative(native_type NativeMem, ur_context_handle_t Ctx, ur_mem_handle_t &Mem) { + if (!Ctx) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } try { - auto URMem = std::make_unique(NativeMem, Ctx); - if (!Ctx) { - cl_context CLContext; - CL_RETURN_ON_FAILURE(clGetMemObjectInfo( - NativeMem, CL_MEM_CONTEXT, sizeof(CLContext), &CLContext, nullptr)); - ur_native_handle_t NativeContext = - reinterpret_cast(CLContext); - UR_RETURN_ON_FAILURE(urContextCreateWithNativeHandle( - NativeContext, 0, nullptr, nullptr, &(URMem->Context))); - UR_RETURN_ON_FAILURE(urContextRetain(URMem->Context)); + cl_context CLContext; + CL_RETURN_ON_FAILURE(clGetMemObjectInfo( + NativeMem, CL_MEM_CONTEXT, sizeof(CLContext), &CLContext, nullptr)); + + if (Ctx->get() != CLContext) { + return UR_RESULT_ERROR_INVALID_CONTEXT; } + auto URMem = std::make_unique(NativeMem, Ctx); Mem = URMem.release(); } catch (std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_RESOURCES; diff --git a/source/adapters/opencl/platform.hpp b/source/adapters/opencl/platform.hpp index 09cc7742f5..6a9c49eb37 100644 --- a/source/adapters/opencl/platform.hpp +++ b/source/adapters/opencl/platform.hpp @@ -71,7 +71,6 @@ struct ur_platform_handle_t_ { } ur_result_t getPlatformVersion(oclv::OpenCLVersion &Version) { - size_t PlatVerSize = 0; CL_RETURN_ON_FAILURE(clGetPlatformInfo(Platform, CL_PLATFORM_VERSION, 0, nullptr, &PlatVerSize)); diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index 11ebaec443..c26f6b0886 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -33,7 +33,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( if (PlatVer >= oclv::V2_1) { /* Make sure all devices support CL 2.1 or newer as well. */ - for (ur_device_handle_t URDev : hContext->Devices) { + for (ur_device_handle_t URDev : hContext->getDevices()) { oclv::OpenCLVersion DevVer; CL_RETURN_ON_FAILURE_AND_SET_NULL(URDev->getDeviceVersion(DevVer), @@ -70,7 +70,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( /* If none of the devices conform with CL 2.1 or newer make sure they all * support the cl_khr_il_program extension. */ - for (ur_device_handle_t URDev : hContext->Devices) { + for (ur_device_handle_t URDev : hContext->getDevices()) { bool Supported = false; CL_RETURN_ON_FAILURE_AND_SET_NULL( URDev->checkDeviceExtensions({"cl_khr_il_program"}, Supported), @@ -178,7 +178,6 @@ static cl_int mapURProgramInfoToCL(ur_program_info_t URPropName) { UR_APIEXPORT ur_result_t UR_APICALL urProgramGetInfo(ur_program_handle_t hProgram, ur_program_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); const cl_program_info CLPropName = mapURProgramInfoToCL(propName); @@ -375,7 +374,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithNativeHandle( UR_RETURN_ON_FAILURE( ur_program_handle_t_::makeWithNative(NativeHandle, hContext, *phProgram)); if (!pProperties || !pProperties->isNativeHandleOwned) { - CL_RETURN_ON_FAILURE(clRetainProgram((*phProgram)->get())); + CL_RETURN_ON_FAILURE(clRetainProgram(NativeHandle)); } return UR_RESULT_SUCCESS; } @@ -393,8 +392,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( return UR_RESULT_ERROR_INVALID_CONTEXT; } - std::vector &DevicesInCtx = Ctx->Devices; - ur_platform_handle_t CurPlatform = Ctx->Devices[0]->Platform; oclv::OpenCLVersion PlatVer; @@ -404,7 +401,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( if (PlatVer < oclv::V2_2) { UseExtensionLookup = true; } else { - for (ur_device_handle_t Dev : DevicesInCtx) { + for (ur_device_handle_t Dev : Ctx->getDevices()) { oclv::OpenCLVersion DevVer; UR_RETURN_ON_FAILURE(Dev->getDeviceVersion(DevVer)); diff --git a/source/adapters/opencl/program.hpp b/source/adapters/opencl/program.hpp index 85ddfde6da..1c6bae2e8d 100644 --- a/source/adapters/opencl/program.hpp +++ b/source/adapters/opencl/program.hpp @@ -10,6 +10,7 @@ #pragma once #include "common.hpp" +#include "context.hpp" #include @@ -22,16 +23,12 @@ struct ur_program_handle_t_ { ur_program_handle_t_(native_type Prog, ur_context_handle_t Ctx) : Program(Prog), Context(Ctx) { RefCount = 1; - if (Context) { - urContextRetain(Context); - } + urContextRetain(Context); } ~ur_program_handle_t_() { clReleaseProgram(Program); - if (Context) { - urContextRelease(Context); - } + urContextRelease(Context); } uint32_t incrementReferenceCount() noexcept { return ++RefCount; } @@ -43,20 +40,19 @@ struct ur_program_handle_t_ { static ur_result_t makeWithNative(native_type NativeProg, ur_context_handle_t Context, ur_program_handle_t &Program) { + if (!Context) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } try { + cl_context CLContext; + CL_RETURN_ON_FAILURE(clGetProgramInfo(NativeProg, CL_PROGRAM_CONTEXT, + sizeof(CLContext), &CLContext, + nullptr)); + if (Context->get() != CLContext) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } auto URProgram = std::make_unique(NativeProg, Context); - if (!Context) { - cl_context CLContext; - CL_RETURN_ON_FAILURE(clGetProgramInfo(NativeProg, CL_PROGRAM_CONTEXT, - sizeof(CLContext), &CLContext, - nullptr)); - ur_native_handle_t NativeContext = - reinterpret_cast(CLContext); - UR_RETURN_ON_FAILURE(urContextCreateWithNativeHandle( - NativeContext, 0, nullptr, nullptr, &(URProgram->Context))); - UR_RETURN_ON_FAILURE(urContextRetain(URProgram->Context)); - } Program = URProgram.release(); } catch (std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_RESOURCES; diff --git a/source/adapters/opencl/queue.cpp b/source/adapters/opencl/queue.cpp index 605ef2fcf2..9328e32092 100644 --- a/source/adapters/opencl/queue.cpp +++ b/source/adapters/opencl/queue.cpp @@ -182,8 +182,7 @@ urQueueGetNativeHandle(ur_queue_handle_t hQueue, ur_queue_native_desc_t *, UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( ur_native_handle_t hNativeQueue, ur_context_handle_t hContext, - ur_device_handle_t hDevice, - [[maybe_unused]] const ur_queue_native_properties_t *pProperties, + ur_device_handle_t hDevice, const ur_queue_native_properties_t *pProperties, ur_queue_handle_t *phQueue) { cl_command_queue NativeHandle = @@ -192,7 +191,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( UR_RETURN_ON_FAILURE(ur_queue_handle_t_::makeWithNative( NativeHandle, hContext, hDevice, *phQueue)); - CL_RETURN_ON_FAILURE(clRetainCommandQueue(NativeHandle)); + if (!pProperties || !pProperties->isNativeHandleOwned) { + CL_RETURN_ON_FAILURE(clRetainCommandQueue(NativeHandle)); + } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/queue.hpp b/source/adapters/opencl/queue.hpp index 63d6a6ea50..e44af5f4d9 100644 --- a/source/adapters/opencl/queue.hpp +++ b/source/adapters/opencl/queue.hpp @@ -10,6 +10,8 @@ #pragma once #include "common.hpp" +#include "context.hpp" +#include "device.hpp" #include @@ -24,43 +26,33 @@ struct ur_queue_handle_t_ { ur_device_handle_t Dev) : Queue(Queue), Context(Ctx), Device(Dev) { RefCount = 1; - if (Device) { - urDeviceRetain(Device); - } - if (Context) { - urContextRetain(Context); - } + urDeviceRetain(Device); + urContextRetain(Context); } static ur_result_t makeWithNative(native_type NativeQueue, ur_context_handle_t Context, ur_device_handle_t Device, ur_queue_handle_t &Queue) { + if (!Context || !Device) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } try { - auto URQueue = - std::make_unique(NativeQueue, Context, Device); - if (!Context) { - cl_context CLContext; - CL_RETURN_ON_FAILURE( - clGetCommandQueueInfo(NativeQueue, CL_QUEUE_CONTEXT, - sizeof(CLContext), &CLContext, nullptr)); - ur_native_handle_t NativeContext = - reinterpret_cast(CLContext); - UR_RETURN_ON_FAILURE(urContextCreateWithNativeHandle( - NativeContext, 0, nullptr, nullptr, &(URQueue->Context))); - UR_RETURN_ON_FAILURE(urContextRetain(URQueue->Context)); + cl_context CLContext; + CL_RETURN_ON_FAILURE(clGetCommandQueueInfo(NativeQueue, CL_QUEUE_CONTEXT, + sizeof(CLContext), &CLContext, + nullptr)); + cl_device_id CLDevice; + CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( + NativeQueue, CL_QUEUE_DEVICE, sizeof(CLDevice), &CLDevice, nullptr)); + if (Context->get() != CLContext) { + return UR_RESULT_ERROR_INVALID_CONTEXT; } - if (!Device) { - cl_device_id CLDevice; - CL_RETURN_ON_FAILURE(clGetCommandQueueInfo(NativeQueue, CL_QUEUE_DEVICE, - sizeof(CLDevice), &CLDevice, - nullptr)); - ur_native_handle_t NativeDevice = - reinterpret_cast(CLDevice); - UR_RETURN_ON_FAILURE(urDeviceCreateWithNativeHandle( - NativeDevice, nullptr, nullptr, &(URQueue->Device))); - UR_RETURN_ON_FAILURE(urDeviceRetain(URQueue->Device)); + if (Device->get() != CLDevice) { + return UR_RESULT_ERROR_INVALID_DEVICE; } + auto URQueue = + std::make_unique(NativeQueue, Context, Device); Queue = URQueue.release(); } catch (std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_RESOURCES; @@ -72,12 +64,8 @@ struct ur_queue_handle_t_ { ~ur_queue_handle_t_() { clReleaseCommandQueue(Queue); - if (Device) { - urDeviceRelease(Device); - } - if (Context) { - urContextRelease(Context); - } + urDeviceRelease(Device); + urContextRelease(Context); } uint32_t incrementReferenceCount() noexcept { return ++RefCount; } diff --git a/source/adapters/opencl/sampler.hpp b/source/adapters/opencl/sampler.hpp index c3f01e124b..238ee1cecc 100644 --- a/source/adapters/opencl/sampler.hpp +++ b/source/adapters/opencl/sampler.hpp @@ -22,16 +22,12 @@ struct ur_sampler_handle_t_ { ur_sampler_handle_t_(native_type Sampler, ur_context_handle_t Ctx) : Sampler(Sampler), Context(Ctx) { RefCount = 1; - if (Context) { - urContextRetain(Context); - } + urContextRetain(Context); } ~ur_sampler_handle_t_() { clReleaseSampler(Sampler); - if (Context) { - urContextRelease(Context); - } + urContextRelease(Context); } uint32_t incrementReferenceCount() noexcept { return ++RefCount; } From 456e1c6225196da6d79c6bae62241a35dd7230e1 Mon Sep 17 00:00:00 2001 From: omarahmed1111 Date: Thu, 1 Feb 2024 15:41:45 +0000 Subject: [PATCH 18/19] sync retain/release cl handles with ur handles --- source/adapters/opencl/context.cpp | 3 +++ source/adapters/opencl/device.cpp | 10 +++++++--- source/adapters/opencl/device.hpp | 2 +- source/adapters/opencl/event.cpp | 3 +++ source/adapters/opencl/kernel.cpp | 3 +++ source/adapters/opencl/memory.cpp | 3 +++ source/adapters/opencl/program.cpp | 3 +++ source/adapters/opencl/queue.cpp | 3 +++ source/adapters/opencl/sampler.cpp | 3 +++ 9 files changed, 29 insertions(+), 4 deletions(-) diff --git a/source/adapters/opencl/context.cpp b/source/adapters/opencl/context.cpp index 1382ec3c47..75e193b6ed 100644 --- a/source/adapters/opencl/context.cpp +++ b/source/adapters/opencl/context.cpp @@ -79,12 +79,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextRelease(ur_context_handle_t hContext) { if (hContext->decrementReferenceCount() == 0) { delete hContext; + } else { + CL_RETURN_ON_FAILURE(clReleaseContext(hContext->get())); } return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urContextRetain(ur_context_handle_t hContext) { + CL_RETURN_ON_FAILURE(clRetainContext(hContext->get())); hContext->incrementReferenceCount(); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index 8d323c841a..62ae68b7cc 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -1009,6 +1009,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( // Root devices ref count are unchanged through out the program lifetime. UR_APIEXPORT ur_result_t UR_APICALL urDeviceRetain(ur_device_handle_t hDevice) { if (hDevice->ParentDevice) { + CL_RETURN_ON_FAILURE(clRetainDevice(hDevice->get())); hDevice->incrementReferenceCount(); } @@ -1018,10 +1019,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceRetain(ur_device_handle_t hDevice) { // Root devices ref count are unchanged through out the program lifetime. UR_APIEXPORT ur_result_t UR_APICALL urDeviceRelease(ur_device_handle_t hDevice) { - if (hDevice->ParentDevice && hDevice->decrementReferenceCount() == 0) { - delete hDevice; + if (hDevice->ParentDevice) { + if (hDevice->decrementReferenceCount() == 0) { + delete hDevice; + } else { + CL_RETURN_ON_FAILURE(clReleaseDevice(hDevice->get())); + } } - return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/device.hpp b/source/adapters/opencl/device.hpp index 3bea8b6b94..22f554ce73 100644 --- a/source/adapters/opencl/device.hpp +++ b/source/adapters/opencl/device.hpp @@ -32,7 +32,7 @@ struct ur_device_handle_t_ { } } - ~ur_device_handle_t_() { clReleaseDevice(Device); } + ~ur_device_handle_t_() {} uint32_t incrementReferenceCount() noexcept { return ++RefCount; } diff --git a/source/adapters/opencl/event.cpp b/source/adapters/opencl/event.cpp index 55510d4920..018574c1ea 100644 --- a/source/adapters/opencl/event.cpp +++ b/source/adapters/opencl/event.cpp @@ -139,11 +139,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle( UR_APIEXPORT ur_result_t UR_APICALL urEventRelease(ur_event_handle_t hEvent) { if (hEvent->decrementReferenceCount() == 0) { delete hEvent; + } else { + CL_RETURN_ON_FAILURE(clReleaseEvent(hEvent->get())); } return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEventRetain(ur_event_handle_t hEvent) { + CL_RETURN_ON_FAILURE(clRetainEvent(hEvent->get())); hEvent->incrementReferenceCount(); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/kernel.cpp b/source/adapters/opencl/kernel.cpp index 899af56fe7..c1e283029c 100644 --- a/source/adapters/opencl/kernel.cpp +++ b/source/adapters/opencl/kernel.cpp @@ -272,6 +272,7 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, } UR_APIEXPORT ur_result_t UR_APICALL urKernelRetain(ur_kernel_handle_t hKernel) { + CL_RETURN_ON_FAILURE(clRetainKernel(hKernel->get())); hKernel->incrementReferenceCount(); return UR_RESULT_SUCCESS; } @@ -280,6 +281,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelRelease(ur_kernel_handle_t hKernel) { if (hKernel->decrementReferenceCount() == 0) { delete hKernel; + } else { + CL_RETURN_ON_FAILURE(clReleaseKernel(hKernel->get())); } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp index edeb9b9192..219c056ef3 100644 --- a/source/adapters/opencl/memory.cpp +++ b/source/adapters/opencl/memory.cpp @@ -448,6 +448,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory, } UR_APIEXPORT ur_result_t UR_APICALL urMemRetain(ur_mem_handle_t hMem) { + CL_RETURN_ON_FAILURE(clRetainMemObject(hMem->get())); hMem->incrementReferenceCount(); return UR_RESULT_SUCCESS; } @@ -455,6 +456,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemRetain(ur_mem_handle_t hMem) { UR_APIEXPORT ur_result_t UR_APICALL urMemRelease(ur_mem_handle_t hMem) { if (hMem->decrementReferenceCount() == 0) { delete hMem; + } else { + CL_RETURN_ON_FAILURE(clReleaseMemObject(hMem->get())); } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index c26f6b0886..799edf8d9c 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -346,6 +346,7 @@ urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t hDevice, UR_APIEXPORT ur_result_t UR_APICALL urProgramRetain(ur_program_handle_t hProgram) { + CL_RETURN_ON_FAILURE(clRetainProgram(hProgram->get())); hProgram->incrementReferenceCount(); return UR_RESULT_SUCCESS; } @@ -354,6 +355,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramRelease(ur_program_handle_t hProgram) { if (hProgram->decrementReferenceCount() == 0) { delete hProgram; + } else { + CL_RETURN_ON_FAILURE(clReleaseProgram(hProgram->get())); } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/queue.cpp b/source/adapters/opencl/queue.cpp index 9328e32092..2cbeb6f738 100644 --- a/source/adapters/opencl/queue.cpp +++ b/source/adapters/opencl/queue.cpp @@ -211,6 +211,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueFlush(ur_queue_handle_t hQueue) { } UR_APIEXPORT ur_result_t UR_APICALL urQueueRetain(ur_queue_handle_t hQueue) { + CL_RETURN_ON_FAILURE(clRetainCommandQueue(hQueue->get())); hQueue->incrementReferenceCount(); return UR_RESULT_SUCCESS; } @@ -218,6 +219,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueRetain(ur_queue_handle_t hQueue) { UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(ur_queue_handle_t hQueue) { if (hQueue->decrementReferenceCount() == 0) { delete hQueue; + } else { + CL_RETURN_ON_FAILURE(clReleaseCommandQueue(hQueue->get())); } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/sampler.cpp b/source/adapters/opencl/sampler.cpp index 0f88bf3a29..d52c0d6f42 100644 --- a/source/adapters/opencl/sampler.cpp +++ b/source/adapters/opencl/sampler.cpp @@ -197,6 +197,7 @@ urSamplerGetInfo(ur_sampler_handle_t hSampler, ur_sampler_info_t propName, UR_APIEXPORT ur_result_t UR_APICALL urSamplerRetain(ur_sampler_handle_t hSampler) { + CL_RETURN_ON_FAILURE(clRetainSampler(hSampler->get())); hSampler->incrementReferenceCount(); return UR_RESULT_SUCCESS; } @@ -205,6 +206,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerRelease(ur_sampler_handle_t hSampler) { if (hSampler->decrementReferenceCount() == 0) { delete hSampler; + } else { + CL_RETURN_ON_FAILURE(clRetainSampler(hSampler->get())); } return UR_RESULT_SUCCESS; } From 9bfbf6e654ed3813926e041f9470689b46569907 Mon Sep 17 00:00:00 2001 From: omarahmed1111 Date: Thu, 8 Feb 2024 14:38:21 +0000 Subject: [PATCH 19/19] Refactor ext-function caching --- source/adapters/opencl/adapter.cpp | 8 +- source/adapters/opencl/command_buffer.cpp | 148 ++++++-------- source/adapters/opencl/command_buffer.hpp | 4 + source/adapters/opencl/common.hpp | 104 ---------- source/adapters/opencl/context.hpp | 2 + source/adapters/opencl/device.hpp | 1 - source/adapters/opencl/enqueue.cpp | 132 ++++++------ source/adapters/opencl/kernel.cpp | 59 +++--- source/adapters/opencl/kernel.hpp | 2 + source/adapters/opencl/memory.cpp | 21 +- source/adapters/opencl/platform.hpp | 94 +++++++-- source/adapters/opencl/program.cpp | 37 ++-- source/adapters/opencl/queue.hpp | 2 + source/adapters/opencl/usm.cpp | 237 +++++++++++----------- 14 files changed, 386 insertions(+), 465 deletions(-) diff --git a/source/adapters/opencl/adapter.cpp b/source/adapters/opencl/adapter.cpp index 8ae1e77755..fbbdd84e59 100644 --- a/source/adapters/opencl/adapter.cpp +++ b/source/adapters/opencl/adapter.cpp @@ -22,9 +22,7 @@ urAdapterGet(uint32_t NumEntries, ur_adapter_handle_t *phAdapters, uint32_t *pNumAdapters) { if (NumEntries > 0 && phAdapters) { std::lock_guard Lock{adapter.Mutex}; - if (adapter.RefCount++ == 0) { - cl_ext::ExtFuncPtrCache = std::make_unique(); - } + adapter.RefCount++; *phAdapters = &adapter; } @@ -43,9 +41,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) { UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) { std::lock_guard Lock{adapter.Mutex}; - if (--adapter.RefCount == 0) { - cl_ext::ExtFuncPtrCache.reset(); - } + --adapter.RefCount; return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 1c57246eca..815dfc9c06 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -14,6 +14,7 @@ #include "event.hpp" #include "kernel.hpp" #include "memory.hpp" +#include "platform.hpp" #include "queue.hpp" UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( @@ -24,15 +25,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( ur_queue_handle_t Queue = nullptr; UR_RETURN_ON_FAILURE(urQueueCreate(hContext, hDevice, nullptr, &Queue)); - cl_context CLContext = hContext->get(); - cl_ext::clCreateCommandBufferKHR_fn clCreateCommandBufferKHR = nullptr; - cl_int Res = - cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clCreateCommandBufferKHRCache, - cl_ext::CreateCommandBufferName, &clCreateCommandBufferKHR); + ur_platform_handle_t Platform = hDevice->Platform; + cl_ext::clCreateCommandBufferKHR_fn clCreateCommandBufferKHR = + Platform->ExtFuncPtr->clCreateCommandBufferKHRCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clCreateCommandBufferKHR, + cl_ext::CreateCommandBufferName, + "cl_khr_command_buffer")); - if (!clCreateCommandBufferKHR || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; + cl_int Res = 0; cl_command_queue CLQueue = Queue->get(); auto CLCommandBuffer = clCreateCommandBufferKHR(1, &CLQueue, nullptr, &Res); CL_RETURN_ON_FAILURE_AND_SET_NULL(Res, phCommandBuffer); @@ -55,14 +55,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer) { UR_RETURN_ON_FAILURE(urQueueRetain(hCommandBuffer->hInternalQueue)); - cl_context CLContext = hCommandBuffer->hContext->get(); - cl_ext::clRetainCommandBufferKHR_fn clRetainCommandBuffer = nullptr; - cl_int Res = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clRetainCommandBufferKHRCache, - cl_ext::RetainCommandBufferName, &clRetainCommandBuffer); - - if (!clRetainCommandBuffer || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; + ur_platform_handle_t Platform = hCommandBuffer->getPlatform(); + cl_ext::clRetainCommandBufferKHR_fn clRetainCommandBuffer = + Platform->ExtFuncPtr->clRetainCommandBufferKHRCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clRetainCommandBuffer, + cl_ext::RetainCommandBufferName, + "cl_khr_command_buffer")); CL_RETURN_ON_FAILURE(clRetainCommandBuffer(hCommandBuffer->CLCommandBuffer)); return UR_RESULT_SUCCESS; @@ -72,15 +70,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) { UR_RETURN_ON_FAILURE(urQueueRelease(hCommandBuffer->hInternalQueue)); - cl_context CLContext = hCommandBuffer->hContext->get(); - cl_ext::clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHR = nullptr; - cl_int Res = - cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clReleaseCommandBufferKHRCache, - cl_ext::ReleaseCommandBufferName, &clReleaseCommandBufferKHR); - - if (!clReleaseCommandBufferKHR || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; + ur_platform_handle_t Platform = hCommandBuffer->getPlatform(); + cl_ext::clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHR = + Platform->ExtFuncPtr->clReleaseCommandBufferKHRCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clReleaseCommandBufferKHR, + cl_ext::ReleaseCommandBufferName, + "cl_khr_command_buffer")); CL_RETURN_ON_FAILURE( clReleaseCommandBufferKHR(hCommandBuffer->CLCommandBuffer)); @@ -89,15 +84,12 @@ urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) { UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) { - cl_context CLContext = hCommandBuffer->hContext->get(); - cl_ext::clFinalizeCommandBufferKHR_fn clFinalizeCommandBufferKHR = nullptr; - cl_int Res = - cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clFinalizeCommandBufferKHRCache, - cl_ext::FinalizeCommandBufferName, &clFinalizeCommandBufferKHR); - - if (!clFinalizeCommandBufferKHR || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; + ur_platform_handle_t Platform = hCommandBuffer->getPlatform(); + cl_ext::clFinalizeCommandBufferKHR_fn clFinalizeCommandBufferKHR = + Platform->ExtFuncPtr->clFinalizeCommandBufferKHRCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clFinalizeCommandBufferKHR, + cl_ext::FinalizeCommandBufferName, + "cl_khr_command_buffer")); CL_RETURN_ON_FAILURE( clFinalizeCommandBufferKHR(hCommandBuffer->CLCommandBuffer)); @@ -113,15 +105,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_exp_command_buffer_command_handle_t *) { - cl_context CLContext = hCommandBuffer->hContext->get(); - cl_ext::clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR = nullptr; - cl_int Res = - cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clCommandNDRangeKernelKHRCache, - cl_ext::CommandNRRangeKernelName, &clCommandNDRangeKernelKHR); - - if (!clCommandNDRangeKernelKHR || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; + ur_platform_handle_t Platform = hCommandBuffer->getPlatform(); + cl_ext::clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR = + Platform->ExtFuncPtr->clCommandNDRangeKernelKHRCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clCommandNDRangeKernelKHR, + cl_ext::CommandNRRangeKernelName, + "cl_khr_command_buffer")); CL_RETURN_ON_FAILURE(clCommandNDRangeKernelKHR( hCommandBuffer->CLCommandBuffer, nullptr, nullptr, hKernel->get(), @@ -160,14 +149,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ur_exp_command_buffer_sync_point_t *pSyncPoint) { - cl_context CLContext = hCommandBuffer->hContext->get(); - cl_ext::clCommandCopyBufferKHR_fn clCommandCopyBufferKHR = nullptr; - cl_int Res = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferKHRCache, - cl_ext::CommandCopyBufferName, &clCommandCopyBufferKHR); - - if (!clCommandCopyBufferKHR || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; + ur_platform_handle_t Platform = hCommandBuffer->getPlatform(); + cl_ext::clCommandCopyBufferKHR_fn clCommandCopyBufferKHR = + Platform->ExtFuncPtr->clCommandCopyBufferKHRCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clCommandCopyBufferKHR, + cl_ext::CommandCopyBufferName, + "cl_khr_command_buffer")); CL_RETURN_ON_FAILURE(clCommandCopyBufferKHR( hCommandBuffer->CLCommandBuffer, nullptr, hSrcMem->get(), hDstMem->get(), @@ -195,15 +182,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( size_t OpenCLDstRect[3]{dstOrigin.x, dstOrigin.y, dstOrigin.z}; size_t OpenCLRegion[3]{region.width, region.height, region.depth}; - cl_context CLContext = hCommandBuffer->hContext->get(); - cl_ext::clCommandCopyBufferRectKHR_fn clCommandCopyBufferRectKHR = nullptr; - cl_int Res = - cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferRectKHRCache, - cl_ext::CommandCopyBufferRectName, &clCommandCopyBufferRectKHR); - - if (!clCommandCopyBufferRectKHR || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; + ur_platform_handle_t Platform = hCommandBuffer->getPlatform(); + cl_ext::clCommandCopyBufferRectKHR_fn clCommandCopyBufferRectKHR = + Platform->ExtFuncPtr->clCommandCopyBufferRectKHRCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clCommandCopyBufferRectKHR, + cl_ext::CommandCopyBufferRectName, + "cl_khr_command_buffer")); CL_RETURN_ON_FAILURE(clCommandCopyBufferRectKHR( hCommandBuffer->CLCommandBuffer, nullptr, hSrcMem->get(), hDstMem->get(), @@ -284,14 +268,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ur_exp_command_buffer_sync_point_t *pSyncPoint) { - cl_context CLContext = hCommandBuffer->hContext->get(); - cl_ext::clCommandFillBufferKHR_fn clCommandFillBufferKHR = nullptr; - cl_int Res = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clCommandFillBufferKHRCache, - cl_ext::CommandFillBufferName, &clCommandFillBufferKHR); - - if (!clCommandFillBufferKHR || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; + ur_platform_handle_t Platform = hCommandBuffer->getPlatform(); + cl_ext::clCommandFillBufferKHR_fn clCommandFillBufferKHR = + Platform->ExtFuncPtr->clCommandFillBufferKHRCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clCommandFillBufferKHR, + cl_ext::CommandFillBufferName, + "cl_khr_command_buffer")); CL_RETURN_ON_FAILURE(clCommandFillBufferKHR( hCommandBuffer->CLCommandBuffer, nullptr, hBuffer->get(), pPattern, @@ -340,15 +322,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - cl_context CLContext = hCommandBuffer->hContext->get(); - cl_ext::clEnqueueCommandBufferKHR_fn clEnqueueCommandBufferKHR = nullptr; - cl_int Res = - cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clEnqueueCommandBufferKHRCache, - cl_ext::EnqueueCommandBufferName, &clEnqueueCommandBufferKHR); - - if (!clEnqueueCommandBufferKHR || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; + ur_platform_handle_t Platform = hCommandBuffer->getPlatform(); + cl_ext::clEnqueueCommandBufferKHR_fn clEnqueueCommandBufferKHR = + Platform->ExtFuncPtr->clEnqueueCommandBufferKHRCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clEnqueueCommandBufferKHR, + cl_ext::EnqueueCommandBufferName, + "cl_khr_command_buffer")); const uint32_t NumberOfQueues = 1; cl_event Event; @@ -396,15 +375,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( ur_exp_command_buffer_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); - cl_ext::clGetCommandBufferInfoKHR_fn clGetCommandBufferInfoKHR = nullptr; - cl_int Res = - cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clGetCommandBufferInfoKHRCache, - cl_ext::GetCommandBufferInfoName, &clGetCommandBufferInfoKHR); - - if (!clGetCommandBufferInfoKHR || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; + ur_platform_handle_t Platform = hCommandBuffer->getPlatform(); + cl_ext::clGetCommandBufferInfoKHR_fn clGetCommandBufferInfoKHR = + Platform->ExtFuncPtr->clGetCommandBufferInfoKHRCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clGetCommandBufferInfoKHR, + cl_ext::GetCommandBufferInfoName, + "cl_khr_command_buffer")); if (propName != UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT) { return UR_RESULT_ERROR_INVALID_ENUMERATION; diff --git a/source/adapters/opencl/command_buffer.hpp b/source/adapters/opencl/command_buffer.hpp index d80f29594b..17e4e1f7b6 100644 --- a/source/adapters/opencl/command_buffer.hpp +++ b/source/adapters/opencl/command_buffer.hpp @@ -11,6 +11,8 @@ #include #include +#include "context.hpp" + struct ur_exp_command_buffer_handle_t_ { ur_queue_handle_t hInternalQueue; ur_context_handle_t hContext; @@ -21,4 +23,6 @@ struct ur_exp_command_buffer_handle_t_ { cl_command_buffer_khr CLCommandBuffer) : hInternalQueue(hQueue), hContext(hContext), CLCommandBuffer(CLCommandBuffer) {} + + ur_platform_handle_t getPlatform() { return hContext->Devices[0]->Platform; } }; diff --git a/source/adapters/opencl/common.hpp b/source/adapters/opencl/common.hpp index 0667cd3d17..2fd00afd36 100644 --- a/source/adapters/opencl/common.hpp +++ b/source/adapters/opencl/common.hpp @@ -305,110 +305,6 @@ using clGetCommandBufferInfoKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)( cl_command_buffer_khr command_buffer, cl_command_buffer_info_khr param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret); -template struct FuncPtrCache { - std::map Map; - std::mutex Mutex; -}; - -// FIXME: There's currently no mechanism for cleaning up this cache, meaning -// that it is invalidated whenever a context is destroyed. This could lead to -// reusing an invalid function pointer if another context happens to have the -// same native handle. -struct ExtFuncPtrCacheT { - FuncPtrCache clHostMemAllocINTELCache; - FuncPtrCache clDeviceMemAllocINTELCache; - FuncPtrCache clSharedMemAllocINTELCache; - FuncPtrCache clGetDeviceFunctionPointerCache; - FuncPtrCache - clCreateBufferWithPropertiesINTELCache; - FuncPtrCache clMemBlockingFreeINTELCache; - FuncPtrCache - clSetKernelArgMemPointerINTELCache; - FuncPtrCache clEnqueueMemFillINTELCache; - FuncPtrCache clEnqueueMemcpyINTELCache; - FuncPtrCache clGetMemAllocInfoINTELCache; - FuncPtrCache - clEnqueueWriteGlobalVariableCache; - FuncPtrCache clEnqueueReadGlobalVariableCache; - FuncPtrCache clEnqueueReadHostPipeINTELCache; - FuncPtrCache clEnqueueWriteHostPipeINTELCache; - FuncPtrCache - clSetProgramSpecializationConstantCache; - FuncPtrCache clCreateCommandBufferKHRCache; - FuncPtrCache clRetainCommandBufferKHRCache; - FuncPtrCache clReleaseCommandBufferKHRCache; - FuncPtrCache clFinalizeCommandBufferKHRCache; - FuncPtrCache clCommandNDRangeKernelKHRCache; - FuncPtrCache clCommandCopyBufferKHRCache; - FuncPtrCache clCommandCopyBufferRectKHRCache; - FuncPtrCache clCommandFillBufferKHRCache; - FuncPtrCache clEnqueueCommandBufferKHRCache; - FuncPtrCache clGetCommandBufferInfoKHRCache; -}; -// A raw pointer is used here since the lifetime of this map has to be tied to -// piTeardown to avoid issues with static destruction order (a user application -// might have static objects that indirectly access this cache in their -// destructor). -inline std::unique_ptr ExtFuncPtrCache; - -// USM helper function to get an extension function pointer -template -static ur_result_t getExtFuncFromContext(cl_context Context, - FuncPtrCache &FPtrCache, - const char *FuncName, T *Fptr) { - // TODO - // Potentially redo caching as UR interface changes. - // if cached, return cached FuncPtr - std::lock_guard CacheLock{FPtrCache.Mutex}; - std::map &FPtrMap = FPtrCache.Map; - auto It = FPtrMap.find(Context); - if (It != FPtrMap.end()) { - auto F = It->second; - // if cached that extension is not available return nullptr and - // UR_RESULT_ERROR_INVALID_VALUE - *Fptr = F; - return F ? UR_RESULT_SUCCESS : UR_RESULT_ERROR_INVALID_VALUE; - } - - cl_uint DeviceCount; - cl_int RetErr = clGetContextInfo(Context, CL_CONTEXT_NUM_DEVICES, - sizeof(cl_uint), &DeviceCount, nullptr); - - if (RetErr != CL_SUCCESS || DeviceCount < 1) { - return UR_RESULT_ERROR_INVALID_CONTEXT; - } - - std::vector DevicesInCtx(DeviceCount); - RetErr = clGetContextInfo(Context, CL_CONTEXT_DEVICES, - DeviceCount * sizeof(cl_device_id), - DevicesInCtx.data(), nullptr); - - if (RetErr != CL_SUCCESS) { - return UR_RESULT_ERROR_INVALID_CONTEXT; - } - - cl_platform_id CurPlatform; - RetErr = clGetDeviceInfo(DevicesInCtx[0], CL_DEVICE_PLATFORM, - sizeof(cl_platform_id), &CurPlatform, nullptr); - - if (RetErr != CL_SUCCESS) { - return UR_RESULT_ERROR_INVALID_CONTEXT; - } - - T FuncPtr = reinterpret_cast( - clGetExtensionFunctionAddressForPlatform(CurPlatform, FuncName)); - - if (!FuncPtr) { - // Cache that the extension is not available - FPtrMap[Context] = nullptr; - return UR_RESULT_ERROR_INVALID_VALUE; - } - - *Fptr = FuncPtr; - FPtrMap[Context] = FuncPtr; - - return UR_RESULT_SUCCESS; -} } // namespace cl_ext ur_result_t mapCLErrorToUR(cl_int Result); diff --git a/source/adapters/opencl/context.hpp b/source/adapters/opencl/context.hpp index 555636b1b8..cc537b2c8f 100644 --- a/source/adapters/opencl/context.hpp +++ b/source/adapters/opencl/context.hpp @@ -81,5 +81,7 @@ struct ur_context_handle_t_ { native_type get() { return Context; } + ur_platform_handle_t getPlatform() { return Devices[0]->Platform; } + const std::vector &getDevices() { return Devices; } }; diff --git a/source/adapters/opencl/device.hpp b/source/adapters/opencl/device.hpp index 22f554ce73..b1cd437e8f 100644 --- a/source/adapters/opencl/device.hpp +++ b/source/adapters/opencl/device.hpp @@ -10,7 +10,6 @@ #pragma once #include "common.hpp" -#include "platform.hpp" struct ur_device_handle_t_ { using native_type = cl_device_id; diff --git a/source/adapters/opencl/enqueue.cpp b/source/adapters/opencl/enqueue.cpp index 7ffaefd733..1f9320cace 100644 --- a/source/adapters/opencl/enqueue.cpp +++ b/source/adapters/opencl/enqueue.cpp @@ -13,6 +13,7 @@ #include "event.hpp" #include "kernel.hpp" #include "memory.hpp" +#include "platform.hpp" #include "program.hpp" #include "queue.hpp" @@ -518,22 +519,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - cl_context Ctx = hQueue->Context->get(); + ur_platform_handle_t Platform = hQueue->getPlatform(); - cl_ext::clEnqueueWriteGlobalVariable_fn F = nullptr; - cl_int Res = cl_ext::getExtFuncFromContext( - Ctx, cl_ext::ExtFuncPtrCache->clEnqueueWriteGlobalVariableCache, - cl_ext::EnqueueWriteGlobalVariableName, &F); + cl_ext::clEnqueueWriteGlobalVariable_fn clEnqueueWriteGlobalVariable = + Platform->ExtFuncPtr->clEnqueueWriteGlobalVariableCache; + UR_RETURN_ON_FAILURE( + Platform->getExtFunc(&clEnqueueWriteGlobalVariable, + cl_ext::EnqueueWriteGlobalVariableName, "")); - if (!F || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; cl_event Event; std::vector CLWaitEvents(numEventsInWaitList); for (uint32_t i = 0; i < numEventsInWaitList; i++) { CLWaitEvents[i] = phEventWaitList[i]->get(); } - Res = F(hQueue->get(), hProgram->get(), name, blockingWrite, count, offset, - pSrc, numEventsInWaitList, CLWaitEvents.data(), &Event); + cl_int Res = clEnqueueWriteGlobalVariable( + hQueue->get(), hProgram->get(), name, blockingWrite, count, offset, pSrc, + numEventsInWaitList, CLWaitEvents.data(), &Event); if (phEvent) { try { auto UREvent = @@ -554,22 +555,21 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - cl_context Ctx = hQueue->Context->get(); + ur_platform_handle_t Platform = hQueue->getPlatform(); - cl_ext::clEnqueueReadGlobalVariable_fn F = nullptr; - cl_int Res = cl_ext::getExtFuncFromContext( - Ctx, cl_ext::ExtFuncPtrCache->clEnqueueReadGlobalVariableCache, - cl_ext::EnqueueReadGlobalVariableName, &F); + cl_ext::clEnqueueReadGlobalVariable_fn clEnqueueReadGlobalVariable = + Platform->ExtFuncPtr->clEnqueueReadGlobalVariableCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc( + &clEnqueueReadGlobalVariable, cl_ext::EnqueueReadGlobalVariableName, "")); - if (!F || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; cl_event Event; std::vector CLWaitEvents(numEventsInWaitList); for (uint32_t i = 0; i < numEventsInWaitList; i++) { CLWaitEvents[i] = phEventWaitList[i]->get(); } - Res = F(hQueue->get(), hProgram->get(), name, blockingRead, count, offset, - pDst, numEventsInWaitList, CLWaitEvents.data(), &Event); + cl_int Res = clEnqueueReadGlobalVariable( + hQueue->get(), hProgram->get(), name, blockingRead, count, offset, pDst, + numEventsInWaitList, CLWaitEvents.data(), &Event); if (phEvent) { try { auto UREvent = @@ -590,33 +590,31 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - cl_context CLContext = hQueue->Context->get(); + ur_platform_handle_t Platform = hQueue->getPlatform(); - cl_ext::clEnqueueReadHostPipeINTEL_fn FuncPtr = nullptr; - ur_result_t RetVal = - cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clEnqueueReadHostPipeINTELCache, - cl_ext::EnqueueReadHostPipeName, &FuncPtr); + cl_ext::clEnqueueReadHostPipeINTEL_fn clEnqueueReadHostPipe = + Platform->ExtFuncPtr->clEnqueueReadHostPipeINTELCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc( + &clEnqueueReadHostPipe, cl_ext::EnqueueReadHostPipeName, + "cl_intel_program_scope_host_pipe")); - if (FuncPtr) { - cl_event Event; - std::vector CLWaitEvents(numEventsInWaitList); - for (uint32_t i = 0; i < numEventsInWaitList; i++) { - CLWaitEvents[i] = phEventWaitList[i]->get(); - } - RetVal = mapCLErrorToUR(FuncPtr(hQueue->get(), hProgram->get(), pipe_symbol, - blocking, pDst, size, numEventsInWaitList, - CLWaitEvents.data(), &Event)); - if (phEvent) { - try { - auto UREvent = std::make_unique( - Event, hQueue->Context, hQueue); - *phEvent = UREvent.release(); - } catch (std::bad_alloc &) { - return UR_RESULT_ERROR_OUT_OF_RESOURCES; - } catch (...) { - return UR_RESULT_ERROR_UNKNOWN; - } + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } + ur_result_t RetVal = mapCLErrorToUR(clEnqueueReadHostPipe( + hQueue->get(), hProgram->get(), pipe_symbol, blocking, pDst, size, + numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; } } @@ -629,33 +627,31 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - cl_context CLContext = hQueue->Context->get(); + ur_platform_handle_t Platform = hQueue->getPlatform(); - cl_ext::clEnqueueWriteHostPipeINTEL_fn FuncPtr = nullptr; - ur_result_t RetVal = - cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clEnqueueWriteHostPipeINTELCache, - cl_ext::EnqueueWriteHostPipeName, &FuncPtr); + cl_ext::clEnqueueWriteHostPipeINTEL_fn clEnqueueWriteHostPipe = + Platform->ExtFuncPtr->clEnqueueWriteHostPipeINTELCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc( + &clEnqueueWriteHostPipe, cl_ext::EnqueueWriteHostPipeName, + "cl_intel_program_scope_host_pipe")); - if (FuncPtr) { - cl_event Event; - std::vector CLWaitEvents(numEventsInWaitList); - for (uint32_t i = 0; i < numEventsInWaitList; i++) { - CLWaitEvents[i] = phEventWaitList[i]->get(); - } - RetVal = mapCLErrorToUR(FuncPtr(hQueue->get(), hProgram->get(), pipe_symbol, - blocking, pSrc, size, numEventsInWaitList, - CLWaitEvents.data(), &Event)); - if (phEvent) { - try { - auto UREvent = std::make_unique( - Event, hQueue->Context, hQueue); - *phEvent = UREvent.release(); - } catch (std::bad_alloc &) { - return UR_RESULT_ERROR_OUT_OF_RESOURCES; - } catch (...) { - return UR_RESULT_ERROR_UNKNOWN; - } + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } + ur_result_t RetVal = mapCLErrorToUR(clEnqueueWriteHostPipe( + hQueue->get(), hProgram->get(), pipe_symbol, blocking, pSrc, size, + numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; } } diff --git a/source/adapters/opencl/kernel.cpp b/source/adapters/opencl/kernel.cpp index c1e283029c..194824316f 100644 --- a/source/adapters/opencl/kernel.cpp +++ b/source/adapters/opencl/kernel.cpp @@ -11,6 +11,7 @@ #include "common.hpp" #include "device.hpp" #include "memory.hpp" +#include "platform.hpp" #include "program.hpp" #include "sampler.hpp" @@ -294,42 +295,38 @@ urKernelRelease(ur_kernel_handle_t hKernel) { static ur_result_t usmSetIndirectAccess(ur_kernel_handle_t hKernel) { cl_bool TrueVal = CL_TRUE; - clHostMemAllocINTEL_fn HFunc = nullptr; - clSharedMemAllocINTEL_fn SFunc = nullptr; - clDeviceMemAllocINTEL_fn DFunc = nullptr; - cl_context CLContext; - + ur_platform_handle_t Platform = hKernel->getPlatform(); /* We test that each alloc type is supported before we actually try to set * KernelExecInfo. */ - CL_RETURN_ON_FAILURE(clGetKernelInfo(hKernel->get(), CL_KERNEL_CONTEXT, - sizeof(cl_context), &CLContext, - nullptr)); - - UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache, - cl_ext::HostMemAllocName, &HFunc)); + clHostMemAllocINTEL_fn clHostMemAlloc = + Platform->ExtFuncPtr->clHostMemAllocINTELCache; + ur_result_t Res = + Platform->getExtFunc(&clHostMemAlloc, cl_ext::HostMemAllocName, + "cl_intel_unified_shared_memory"); - if (HFunc) { + if (Res == UR_RESULT_SUCCESS) { CL_RETURN_ON_FAILURE(clSetKernelExecInfo( hKernel->get(), CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, sizeof(cl_bool), &TrueVal)); } - UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clDeviceMemAllocINTELCache, - cl_ext::DeviceMemAllocName, &DFunc)); + clDeviceMemAllocINTEL_fn clDeviceMemAlloc = + Platform->ExtFuncPtr->clDeviceMemAllocINTELCache; + Res = Platform->getExtFunc(&clDeviceMemAlloc, cl_ext::DeviceMemAllocName, + "cl_intel_unified_shared_memory"); - if (DFunc) { + if (Res == UR_RESULT_SUCCESS) { CL_RETURN_ON_FAILURE(clSetKernelExecInfo( hKernel->get(), CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, sizeof(cl_bool), &TrueVal)); } - UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clSharedMemAllocINTELCache, - cl_ext::SharedMemAllocName, &SFunc)); + clSharedMemAllocINTEL_fn clSharedMemAlloc = + Platform->ExtFuncPtr->clSharedMemAllocINTELCache; + Res = Platform->getExtFunc(&clSharedMemAlloc, cl_ext::SharedMemAllocName, + "cl_intel_unified_shared_memory"); - if (SFunc) { + if (Res == UR_RESULT_SUCCESS) { CL_RETURN_ON_FAILURE(clSetKernelExecInfo( hKernel->get(), CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, sizeof(cl_bool), &TrueVal)); @@ -374,24 +371,26 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( sizeof(cl_context), &CLContext, nullptr)); - clSetKernelArgMemPointerINTEL_fn FuncPtr = nullptr; - UR_RETURN_ON_FAILURE( - cl_ext::getExtFuncFromContext( - CLContext, - cl_ext::ExtFuncPtrCache->clSetKernelArgMemPointerINTELCache, - cl_ext::SetKernelArgMemPointerName, &FuncPtr)); + ur_platform_handle_t Platform = hKernel->getPlatform(); - if (FuncPtr) { + clSetKernelArgMemPointerINTEL_fn clSetKernelArgMemPointer = + Platform->ExtFuncPtr->clSetKernelArgMemPointerINTELCache; + ur_result_t Res = Platform->getExtFunc(&clSetKernelArgMemPointer, + cl_ext::SetKernelArgMemPointerName, + "cl_intel_unified_shared_memory"); + + if (Res == UR_RESULT_SUCCESS) { /* OpenCL passes pointers by value not by reference. This means we need to * deref the arg to get the pointer value */ auto PtrToPtr = reinterpret_cast(pArgValue); auto DerefPtr = reinterpret_cast(*PtrToPtr); - CL_RETURN_ON_FAILURE( - FuncPtr(hKernel->get(), cl_adapter::cast(argIndex), DerefPtr)); + CL_RETURN_ON_FAILURE(clSetKernelArgMemPointer( + hKernel->get(), cl_adapter::cast(argIndex), DerefPtr)); } return UR_RESULT_SUCCESS; } + UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle( ur_kernel_handle_t hKernel, ur_native_handle_t *phNativeKernel) { diff --git a/source/adapters/opencl/kernel.hpp b/source/adapters/opencl/kernel.hpp index 44651ebfc7..50f48b41c8 100644 --- a/source/adapters/opencl/kernel.hpp +++ b/source/adapters/opencl/kernel.hpp @@ -78,4 +78,6 @@ struct ur_kernel_handle_t_ { } native_type get() { return Kernel; } + + ur_platform_handle_t getPlatform() { return Context->Devices[0]->Platform; } }; diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp index 219c056ef3..c16f2d6b58 100644 --- a/source/adapters/opencl/memory.cpp +++ b/source/adapters/opencl/memory.cpp @@ -11,6 +11,7 @@ #include "memory.hpp" #include "common.hpp" #include "context.hpp" +#include "platform.hpp" cl_image_format mapURImageFormatToCL(const ur_image_format_t *PImageFormat) { cl_image_format CLImageFormat; @@ -230,15 +231,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( if (pProperties) { // TODO: need to check if all properties are supported by OpenCL RT and // ignore unsupported - clCreateBufferWithPropertiesINTEL_fn FuncPtr = nullptr; - cl_context CLContext = hContext->get(); + // First we need to look up the function pointer - RetErr = - cl_ext::getExtFuncFromContext( - CLContext, - cl_ext::ExtFuncPtrCache->clCreateBufferWithPropertiesINTELCache, - cl_ext::CreateBufferWithPropertiesName, &FuncPtr); - if (FuncPtr) { + cl_context CLContext = hContext->get(); + ur_platform_handle_t Platform = hContext->getPlatform(); + clCreateBufferWithPropertiesINTEL_fn clCreateBufferWithProperties = + Platform->ExtFuncPtr->clCreateBufferWithPropertiesINTELCache; + ur_result_t Res = Platform->getExtFunc( + &clCreateBufferWithProperties, cl_ext::CreateBufferWithPropertiesName, + "cl_intel_create_buffer_with_properties"); + + if (Res == UR_RESULT_SUCCESS) { std::vector PropertiesIntel; auto Prop = static_cast(pProperties->pNext); while (Prop) { @@ -263,7 +266,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( PropertiesIntel.push_back(0); try { - cl_mem Buffer = FuncPtr( + cl_mem Buffer = clCreateBufferWithProperties( CLContext, PropertiesIntel.data(), static_cast(flags), size, pProperties->pHost, cl_adapter::cast(&RetErr)); CL_RETURN_ON_FAILURE(RetErr); diff --git a/source/adapters/opencl/platform.hpp b/source/adapters/opencl/platform.hpp index 6a9c49eb37..f1e1fe0c9e 100644 --- a/source/adapters/opencl/platform.hpp +++ b/source/adapters/opencl/platform.hpp @@ -14,32 +14,23 @@ #include +using namespace cl_ext; + struct ur_platform_handle_t_ { using native_type = cl_platform_id; native_type Platform = nullptr; std::vector> Devices; - ur_platform_handle_t_(native_type Plat) : Platform(Plat) {} + ur_platform_handle_t_(native_type Plat) : Platform(Plat) { + ExtFuncPtr = std::make_unique(); + } ~ur_platform_handle_t_() { for (auto &Dev : Devices) { Dev.reset(); } Devices.clear(); - } - - template - ur_result_t getExtFunc(T CachedExtFunc, const char *FuncName, T *Fptr) { - if (!CachedExtFunc) { - // TODO: check that the function is available - CachedExtFunc = reinterpret_cast( - clGetExtensionFunctionAddressForPlatform(Platform, FuncName)); - if (!CachedExtFunc) { - return UR_RESULT_ERROR_INVALID_VALUE; - } - } - *Fptr = CachedExtFunc; - return UR_RESULT_SUCCESS; + ExtFuncPtr.reset(); } native_type get() { return Platform; } @@ -86,4 +77,77 @@ struct ur_platform_handle_t_ { return UR_RESULT_SUCCESS; } + + ur_result_t checkPlatformExtensions(const std::vector &Exts, + bool &Supported) { + size_t ExtSize = 0; + CL_RETURN_ON_FAILURE(clGetPlatformInfo(Platform, CL_PLATFORM_EXTENSIONS, 0, + nullptr, &ExtSize)); + + std::string ExtStr(ExtSize, '\0'); + + CL_RETURN_ON_FAILURE(clGetPlatformInfo(Platform, CL_PLATFORM_EXTENSIONS, + ExtSize, ExtStr.data(), nullptr)); + + Supported = true; + for (const std::string &Ext : Exts) { + if (!(Supported = (ExtStr.find(Ext) != std::string::npos))) { + break; + } + } + + return UR_RESULT_SUCCESS; + } + + struct ExtFuncPtrT { + clHostMemAllocINTEL_fn clHostMemAllocINTELCache = nullptr; + clDeviceMemAllocINTEL_fn clDeviceMemAllocINTELCache = nullptr; + clSharedMemAllocINTEL_fn clSharedMemAllocINTELCache = nullptr; + clGetDeviceFunctionPointer_fn clGetDeviceFunctionPointerCache = nullptr; + clCreateBufferWithPropertiesINTEL_fn + clCreateBufferWithPropertiesINTELCache = nullptr; + clMemBlockingFreeINTEL_fn clMemBlockingFreeINTELCache = nullptr; + clSetKernelArgMemPointerINTEL_fn clSetKernelArgMemPointerINTELCache = + nullptr; + clEnqueueMemFillINTEL_fn clEnqueueMemFillINTELCache = nullptr; + clEnqueueMemcpyINTEL_fn clEnqueueMemcpyINTELCache = nullptr; + clGetMemAllocInfoINTEL_fn clGetMemAllocInfoINTELCache = nullptr; + clEnqueueWriteGlobalVariable_fn clEnqueueWriteGlobalVariableCache = nullptr; + clEnqueueReadGlobalVariable_fn clEnqueueReadGlobalVariableCache = nullptr; + clEnqueueReadHostPipeINTEL_fn clEnqueueReadHostPipeINTELCache = nullptr; + clEnqueueWriteHostPipeINTEL_fn clEnqueueWriteHostPipeINTELCache = nullptr; + clSetProgramSpecializationConstant_fn + clSetProgramSpecializationConstantCache = nullptr; + clCreateCommandBufferKHR_fn clCreateCommandBufferKHRCache = nullptr; + clRetainCommandBufferKHR_fn clRetainCommandBufferKHRCache = nullptr; + clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHRCache = nullptr; + clFinalizeCommandBufferKHR_fn clFinalizeCommandBufferKHRCache = nullptr; + clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHRCache = nullptr; + clCommandCopyBufferKHR_fn clCommandCopyBufferKHRCache = nullptr; + clCommandCopyBufferRectKHR_fn clCommandCopyBufferRectKHRCache = nullptr; + clCommandFillBufferKHR_fn clCommandFillBufferKHRCache = nullptr; + clEnqueueCommandBufferKHR_fn clEnqueueCommandBufferKHRCache = nullptr; + clGetCommandBufferInfoKHR_fn clGetCommandBufferInfoKHRCache = nullptr; + }; + + std::unique_ptr ExtFuncPtr; + template + ur_result_t getExtFunc(T *CachedExtFunc, const char *FuncName, + const char *Extension) { + if (!(*CachedExtFunc)) { + // Check that the function ext is supported by the platform. + bool Supported = false; + UR_RETURN_ON_FAILURE(checkPlatformExtensions({Extension}, Supported)); + if (!Supported) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + + *CachedExtFunc = reinterpret_cast( + clGetExtensionFunctionAddressForPlatform(Platform, FuncName)); + if (!(*CachedExtFunc)) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + } + return UR_RESULT_SUCCESS; + } }; diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index 799edf8d9c..21b00b4add 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -424,17 +424,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( } } else { cl_ext::clSetProgramSpecializationConstant_fn - SetProgramSpecializationConstant = nullptr; - const ur_result_t URResult = cl_ext::getExtFuncFromContext< - decltype(SetProgramSpecializationConstant)>( - Ctx->get(), - cl_ext::ExtFuncPtrCache->clSetProgramSpecializationConstantCache, - cl_ext::SetProgramSpecializationConstantName, - &SetProgramSpecializationConstant); - - if (URResult != UR_RESULT_SUCCESS) { - return URResult; - } + SetProgramSpecializationConstant = + CurPlatform->ExtFuncPtr->clSetProgramSpecializationConstantCache; + UR_RETURN_ON_FAILURE(CurPlatform->getExtFunc( + &SetProgramSpecializationConstant, + cl_ext::SetProgramSpecializationConstantName, "")); for (uint32_t i = 0; i < count; ++i) { CL_RETURN_ON_FAILURE(SetProgramSpecializationConstant( @@ -475,16 +469,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( ur_device_handle_t hDevice, ur_program_handle_t hProgram, const char *pFunctionName, void **ppFunctionPointer) { - cl_context CLContext = hProgram->Context->get(); - - cl_ext::clGetDeviceFunctionPointer_fn FuncT = nullptr; - - UR_RETURN_ON_FAILURE( - cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clGetDeviceFunctionPointerCache, - cl_ext::GetDeviceFunctionPointerName, &FuncT)); + ur_platform_handle_t Platform = hDevice->Platform; + cl_ext::clGetDeviceFunctionPointer_fn clGetDeviceFunctionPointer = + Platform->ExtFuncPtr->clGetDeviceFunctionPointerCache; + ur_result_t Res = Platform->getExtFunc( + &clGetDeviceFunctionPointer, cl_ext::GetDeviceFunctionPointerName, ""); - if (!FuncT) { + if (Res != UR_RESULT_SUCCESS) { return UR_RESULT_ERROR_INVALID_FUNCTION_NAME; } @@ -511,9 +502,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( return UR_RESULT_ERROR_INVALID_KERNEL_NAME; } - const cl_int CLResult = - FuncT(hDevice->get(), hProgram->get(), pFunctionName, - reinterpret_cast(ppFunctionPointer)); + const cl_int CLResult = clGetDeviceFunctionPointer( + hDevice->get(), hProgram->get(), pFunctionName, + reinterpret_cast(ppFunctionPointer)); // GPU runtime sometimes returns CL_INVALID_ARG_VALUE if the function address // cannot be found but the kernel exists. As the kernel does exist, return // that the function name is invalid. diff --git a/source/adapters/opencl/queue.hpp b/source/adapters/opencl/queue.hpp index e44af5f4d9..e5723f3204 100644 --- a/source/adapters/opencl/queue.hpp +++ b/source/adapters/opencl/queue.hpp @@ -75,4 +75,6 @@ struct ur_queue_handle_t_ { uint32_t getReferenceCount() const noexcept { return RefCount; } native_type get() { return Queue; } + + ur_platform_handle_t getPlatform() { return Device->Platform; } }; diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index 6e1917a034..57920e4674 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -12,6 +12,7 @@ #include "context.hpp" #include "device.hpp" #include "event.hpp" +#include "platform.hpp" #include "queue.hpp" inline cl_mem_alloc_flags_intel @@ -97,24 +98,22 @@ urUSMHostAlloc(ur_context_handle_t Context, const ur_usm_desc_t *pUSMDesc, } // First we need to look up the function pointer - clHostMemAllocINTEL_fn FuncPtr = nullptr; cl_context CLContext = Context->get(); - if (auto UrResult = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache, - cl_ext::HostMemAllocName, &FuncPtr)) { - return UrResult; - } - - if (FuncPtr) { - cl_int ClResult = CL_SUCCESS; - Ptr = FuncPtr(CLContext, - AllocProperties.empty() ? nullptr : AllocProperties.data(), - size, Alignment, &ClResult); - if (ClResult == CL_INVALID_BUFFER_SIZE) { - return UR_RESULT_ERROR_INVALID_USM_SIZE; - } - CL_RETURN_ON_FAILURE(ClResult); + ur_platform_handle_t Platform = Context->getPlatform(); + clHostMemAllocINTEL_fn clHostMemAlloc = + Platform->ExtFuncPtr->clHostMemAllocINTELCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clHostMemAlloc, + cl_ext::HostMemAllocName, + "cl_intel_unified_shared_memory")); + + cl_int ClResult = CL_SUCCESS; + Ptr = clHostMemAlloc( + CLContext, AllocProperties.empty() ? nullptr : AllocProperties.data(), + size, Alignment, &ClResult); + if (ClResult == CL_INVALID_BUFFER_SIZE) { + return UR_RESULT_ERROR_INVALID_USM_SIZE; } + CL_RETURN_ON_FAILURE(ClResult); *ppMem = Ptr; @@ -140,24 +139,23 @@ urUSMDeviceAlloc(ur_context_handle_t Context, ur_device_handle_t hDevice, } // First we need to look up the function pointer - clDeviceMemAllocINTEL_fn FuncPtr = nullptr; cl_context CLContext = Context->get(); - if (auto UrResult = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clDeviceMemAllocINTELCache, - cl_ext::DeviceMemAllocName, &FuncPtr)) { - return UrResult; - } - - if (FuncPtr) { - cl_int ClResult = CL_SUCCESS; - Ptr = FuncPtr(CLContext, hDevice->get(), - AllocProperties.empty() ? nullptr : AllocProperties.data(), - size, Alignment, &ClResult); - if (ClResult == CL_INVALID_BUFFER_SIZE) { - return UR_RESULT_ERROR_INVALID_USM_SIZE; - } - CL_RETURN_ON_FAILURE(ClResult); + ur_platform_handle_t Platform = hDevice->Platform; + clDeviceMemAllocINTEL_fn clDeviceMemAlloc = + Platform->ExtFuncPtr->clDeviceMemAllocINTELCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clDeviceMemAlloc, + cl_ext::DeviceMemAllocName, + "cl_intel_unified_shared_memory")); + + cl_int ClResult = CL_SUCCESS; + Ptr = clDeviceMemAlloc(CLContext, hDevice->get(), + AllocProperties.empty() ? nullptr + : AllocProperties.data(), + size, Alignment, &ClResult); + if (ClResult == CL_INVALID_BUFFER_SIZE) { + return UR_RESULT_ERROR_INVALID_USM_SIZE; } + CL_RETURN_ON_FAILURE(ClResult); *ppMem = Ptr; @@ -183,24 +181,23 @@ urUSMSharedAlloc(ur_context_handle_t Context, ur_device_handle_t hDevice, } // First we need to look up the function pointer - clSharedMemAllocINTEL_fn FuncPtr = nullptr; cl_context CLContext = Context->get(); - if (auto UrResult = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clSharedMemAllocINTELCache, - cl_ext::SharedMemAllocName, &FuncPtr)) { - return UrResult; - } - - if (FuncPtr) { - cl_int ClResult = CL_SUCCESS; - Ptr = FuncPtr(CLContext, hDevice->get(), - AllocProperties.empty() ? nullptr : AllocProperties.data(), - size, Alignment, cl_adapter::cast(&ClResult)); - if (ClResult == CL_INVALID_BUFFER_SIZE) { - return UR_RESULT_ERROR_INVALID_USM_SIZE; - } - CL_RETURN_ON_FAILURE(ClResult); + ur_platform_handle_t Platform = hDevice->Platform; + clSharedMemAllocINTEL_fn clSharedMemAlloc = + Platform->ExtFuncPtr->clSharedMemAllocINTELCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clSharedMemAlloc, + cl_ext::SharedMemAllocName, + "cl_intel_unified_shared_memory")); + + cl_int ClResult = CL_SUCCESS; + Ptr = clSharedMemAlloc( + CLContext, hDevice->get(), + AllocProperties.empty() ? nullptr : AllocProperties.data(), size, + Alignment, cl_adapter::cast(&ClResult)); + if (ClResult == CL_INVALID_BUFFER_SIZE) { + return UR_RESULT_ERROR_INVALID_USM_SIZE; } + CL_RETURN_ON_FAILURE(ClResult); *ppMem = Ptr; @@ -215,19 +212,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMFree(ur_context_handle_t Context, // Use a blocking free to avoid issues with indirect access from kernels that // might be still running. - clMemBlockingFreeINTEL_fn FuncPtr = nullptr; - cl_context CLContext = Context->get(); - ur_result_t RetVal = UR_RESULT_ERROR_INVALID_OPERATION; - RetVal = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clMemBlockingFreeINTELCache, - cl_ext::MemBlockingFreeName, &FuncPtr); - - if (FuncPtr) { - RetVal = mapCLErrorToUR(FuncPtr(CLContext, pMem)); - } - - return RetVal; + ur_platform_handle_t Platform = Context->getPlatform(); + clMemBlockingFreeINTEL_fn clMemBlockingFree = + Platform->ExtFuncPtr->clMemBlockingFreeINTELCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clMemBlockingFree, + cl_ext::MemBlockingFreeName, + "cl_intel_unified_shared_memory")); + + return mapCLErrorToUR(clMemBlockingFree(CLContext, pMem)); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( @@ -236,13 +229,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { // Have to look up the context from the kernel cl_context CLContext = hQueue->Context->get(); - + ur_platform_handle_t Platform = hQueue->Context->getPlatform(); if (patternSize <= 128) { - clEnqueueMemFillINTEL_fn EnqueueMemFill = nullptr; + clEnqueueMemFillINTEL_fn EnqueueMemFill = + Platform->ExtFuncPtr->clEnqueueMemFillINTELCache; UR_RETURN_ON_FAILURE( - cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clEnqueueMemFillINTELCache, - cl_ext::EnqueueMemFillName, &EnqueueMemFill)); + Platform->getExtFunc(&EnqueueMemFill, cl_ext::EnqueueMemFillName, + "cl_intel_unified_shared_memory")); + cl_event Event; std::vector CLWaitEvents(numEventsInWaitList); for (uint32_t i = 0; i < numEventsInWaitList; i++) { @@ -268,20 +262,21 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( // OpenCL only supports pattern sizes as large as the largest CL type // (double16/long16 - 128 bytes), anything larger we need to do on the host // side and copy it into the target allocation. - clHostMemAllocINTEL_fn HostMemAlloc = nullptr; - UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache, - cl_ext::HostMemAllocName, &HostMemAlloc)); - - clEnqueueMemcpyINTEL_fn USMMemcpy = nullptr; - UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clEnqueueMemcpyINTELCache, - cl_ext::EnqueueMemcpyName, &USMMemcpy)); - - clMemBlockingFreeINTEL_fn USMFree = nullptr; - UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clMemBlockingFreeINTELCache, - cl_ext::MemBlockingFreeName, &USMFree)); + clHostMemAllocINTEL_fn HostMemAlloc = + Platform->ExtFuncPtr->clHostMemAllocINTELCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&HostMemAlloc, + cl_ext::HostMemAllocName, + "cl_intel_unified_shared_memory")); + + clEnqueueMemcpyINTEL_fn USMMemcpy = + Platform->ExtFuncPtr->clEnqueueMemcpyINTELCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc( + &USMMemcpy, cl_ext::EnqueueMemcpyName, "cl_intel_unified_shared_memory")); + + clMemBlockingFreeINTEL_fn USMFree = + Platform->ExtFuncPtr->clMemBlockingFreeINTELCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc( + &USMFree, cl_ext::MemBlockingFreeName, "cl_intel_unified_shared_memory")); cl_int ClErr = CL_SUCCESS; auto HostBuffer = static_cast( @@ -360,32 +355,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { // Have to look up the context from the kernel - cl_context CLContext = hQueue->Context->get(); - - clEnqueueMemcpyINTEL_fn FuncPtr = nullptr; - ur_result_t RetVal = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clEnqueueMemcpyINTELCache, - cl_ext::EnqueueMemcpyName, &FuncPtr); + ur_platform_handle_t Platform = hQueue->Context->getPlatform(); + clEnqueueMemcpyINTEL_fn clEnqueueMemcpy = + Platform->ExtFuncPtr->clEnqueueMemcpyINTELCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clEnqueueMemcpy, + cl_ext::EnqueueMemcpyName, + "cl_intel_unified_shared_memory")); - if (FuncPtr) { - cl_event Event; - std::vector CLWaitEvents(numEventsInWaitList); - for (uint32_t i = 0; i < numEventsInWaitList; i++) { - CLWaitEvents[i] = phEventWaitList[i]->get(); - } - RetVal = mapCLErrorToUR(FuncPtr(hQueue->get(), blocking, pDst, pSrc, size, - numEventsInWaitList, CLWaitEvents.data(), - &Event)); - if (phEvent) { - try { - auto UREvent = std::make_unique( - Event, hQueue->Context, hQueue); - *phEvent = UREvent.release(); - } catch (std::bad_alloc &) { - return UR_RESULT_ERROR_OUT_OF_RESOURCES; - } catch (...) { - return UR_RESULT_ERROR_UNKNOWN; - } + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } + ur_result_t RetVal = mapCLErrorToUR( + clEnqueueMemcpy(hQueue->get(), blocking, pDst, pSrc, size, + numEventsInWaitList, CLWaitEvents.data(), &Event)); + if (phEvent) { + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; } } @@ -495,16 +488,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( const void *pSrc, size_t srcPitch, size_t width, size_t height, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - cl_context CLContext = hQueue->Context->get(); - clEnqueueMemcpyINTEL_fn FuncPtr = nullptr; - ur_result_t RetVal = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clEnqueueMemcpyINTELCache, - cl_ext::EnqueueMemcpyName, &FuncPtr); - - if (!FuncPtr) { - return RetVal; - } + ur_platform_handle_t Platform = hQueue->Context->getPlatform(); + clEnqueueMemcpyINTEL_fn clEnqueueMemcpy = + Platform->ExtFuncPtr->clEnqueueMemcpyINTELCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&clEnqueueMemcpy, + cl_ext::EnqueueMemcpyName, + "cl_intel_unified_shared_memory")); std::vector Events(height); for (size_t HeightIndex = 0; HeightIndex < height; HeightIndex++) { @@ -513,11 +503,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( for (uint32_t i = 0; i < numEventsInWaitList; i++) { CLWaitEvents[i] = phEventWaitList[i]->get(); } - auto ClResult = - FuncPtr(hQueue->get(), false, - static_cast(pDst) + dstPitch * HeightIndex, - static_cast(pSrc) + srcPitch * HeightIndex, - width, numEventsInWaitList, CLWaitEvents.data(), &Event); + auto ClResult = clEnqueueMemcpy( + hQueue->get(), false, + static_cast(pDst) + dstPitch * HeightIndex, + static_cast(pSrc) + srcPitch * HeightIndex, width, + numEventsInWaitList, CLWaitEvents.data(), &Event); Events[HeightIndex] = Event; if (ClResult != CL_SUCCESS) { for (const auto &E : Events) { @@ -572,11 +562,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo( ur_context_handle_t Context, const void *pMem, ur_usm_alloc_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - clGetMemAllocInfoINTEL_fn GetMemAllocInfo = nullptr; - cl_context CLContext = Context->get(); - UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clGetMemAllocInfoINTELCache, - cl_ext::GetMemAllocInfoName, &GetMemAllocInfo)); + ur_platform_handle_t Platform = Context->getPlatform(); + clGetMemAllocInfoINTEL_fn GetMemAllocInfo = + Platform->ExtFuncPtr->clGetMemAllocInfoINTELCache; + UR_RETURN_ON_FAILURE(Platform->getExtFunc(&GetMemAllocInfo, + cl_ext::GetMemAllocInfoName, + "cl_intel_unified_shared_memory")); cl_mem_info_intel PropNameCL; switch (propName) {