From 683f0da46ae547b2eb3a3892af8b6e5d5de90482 Mon Sep 17 00:00:00 2001 From: Martin Morrison-Grant Date: Tue, 17 Oct 2023 12:39:39 +0100 Subject: [PATCH] [SYCL][OpenCL] Add Command Buffer extension to OpenCL adapter. --- source/adapters/opencl/command_buffer.cpp | 285 ++++++++++++++++------ source/adapters/opencl/command_buffer.hpp | 13 +- source/adapters/opencl/common.cpp | 4 + source/adapters/opencl/common.hpp | 71 ++++++ source/adapters/opencl/device.cpp | 17 +- source/adapters/opencl/enqueue.cpp | 8 +- 6 files changed, 315 insertions(+), 83 deletions(-) diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 121a991cbd..06154518aa 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -1,71 +1,128 @@ //===--------- command_buffer.cpp - OpenCL Adapter ---------------------===// // -// Copyright (C) 2023 Intel Corporation -// -// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM -// Exceptions. See LICENSE.TXT +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -//===----------------------------------------------------------------------===// +//===-----------------------------------------------------------------===// #include "command_buffer.hpp" #include "common.hpp" -/// Stub implementations of UR experimental feature command-buffers - UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( - [[maybe_unused]] ur_context_handle_t hContext, - [[maybe_unused]] ur_device_handle_t hDevice, + ur_context_handle_t hContext, ur_device_handle_t hDevice, [[maybe_unused]] const ur_exp_command_buffer_desc_t *pCommandBufferDesc, - [[maybe_unused]] ur_exp_command_buffer_handle_t *phCommandBuffer) { + ur_exp_command_buffer_handle_t *phCommandBuffer) { - cl_adapter::die("Experimental Command-buffer feature is not " - "implemented for OpenCL adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + ur_queue_handle_t Queue = nullptr; + ur_result_t Result = urQueueCreate(hContext, hDevice, nullptr, &Queue); + if (Result != UR_RESULT_SUCCESS) { + return Result; + } + + cl_context CLContext = cl_adapter::cast(hContext); + cl_ext::clCreateCommandBufferKHR_fn F = nullptr; + cl_int Res = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clCreateCommandBufferKHRCache, + cl_ext::CreateCommandBufferName, &F); + + if (!F || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + auto CLCommandBuffer = + F(1, cl_adapter::cast(&Queue), nullptr, &Res); + CL_RETURN_ON_FAILURE_AND_SET_NULL(Res, phCommandBuffer); + + try { + auto URCommandBuffer = std::make_unique( + Queue, hContext, CLCommandBuffer); + *phCommandBuffer = URCommandBuffer.release(); + } catch (...) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } + + CL_RETURN_ON_FAILURE(Res); + return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainExp( - [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer) { +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer) { + cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_ext::clRetainCommandBufferKHR_fn F = nullptr; + cl_int Res = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clRetainCommandBufferKHRCache, + cl_ext::RetainCommandBufferName, &F); - cl_adapter::die("Experimental Command-buffer feature is not " - "implemented for OpenCL adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + if (!F || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + CL_RETURN_ON_FAILURE(F(hCommandBuffer->CLCommandBuffer)); + return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseExp( - [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer) { +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) { + ur_result_t Result = urQueueRelease(hCommandBuffer->hInternalQueue); + if (Result != UR_RESULT_SUCCESS) { + return Result; + } - cl_adapter::die("Experimental Command-buffer feature is not " - "implemented for OpenCL adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_ext::clReleaseCommandBufferKHR_fn F = nullptr; + cl_int Res = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clReleaseCommandBufferKHRCache, + cl_ext::ReleaseCommandBufferName, &F); + + if (!F || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + CL_RETURN_ON_FAILURE(F(hCommandBuffer->CLCommandBuffer)); + return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferFinalizeExp( - [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer) { +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) { + cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_ext::clFinalizeCommandBufferKHR_fn F = nullptr; + cl_int Res = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clFinalizeCommandBufferKHRCache, + cl_ext::FinalizeCommandBufferName, &F); - cl_adapter::die("Experimental Command-buffer feature is not " - "implemented for OpenCL adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + if (!F || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + CL_RETURN_ON_FAILURE(F(hCommandBuffer->CLCommandBuffer)); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( - [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, - [[maybe_unused]] ur_kernel_handle_t hKernel, - [[maybe_unused]] uint32_t workDim, - [[maybe_unused]] const size_t *pGlobalWorkOffset, - [[maybe_unused]] const size_t *pGlobalWorkSize, - [[maybe_unused]] const size_t *pLocalWorkSize, - [[maybe_unused]] uint32_t numSyncPointsInWaitList, - [[maybe_unused]] const ur_exp_command_buffer_sync_point_t - *pSyncPointWaitList, - [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { + ur_exp_command_buffer_handle_t hCommandBuffer, ur_kernel_handle_t hKernel, + uint32_t workDim, const size_t *pGlobalWorkOffset, + const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint) { - cl_adapter::die("Experimental Command-buffer feature is not " - "implemented for OpenCL adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_ext::clCommandNDRangeKernelKHR_fn F = nullptr; + cl_int Res = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clCommandNDRangeKernelKHRCache, + cl_ext::CommandNRRangeKernelName, &F); + + if (!F || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + CL_RETURN_ON_FAILURE( + F(hCommandBuffer->CLCommandBuffer, + cl_adapter::cast(hCommandBuffer->hInternalQueue), + nullptr, cl_adapter::cast(hKernel), workDim, + pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, + numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, nullptr)); + + return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, [[maybe_unused]] void *pDst, [[maybe_unused]] const void *pSrc, [[maybe_unused]] size_t size, @@ -74,43 +131,81 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( *pSyncPointWaitList, [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { - cl_adapter::die("Experimental Command-buffer feature is not " + cl_adapter::die("Experimental Command-buffer entry point is not " "implemented for OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, - [[maybe_unused]] ur_mem_handle_t hSrcMem, - [[maybe_unused]] ur_mem_handle_t hDstMem, [[maybe_unused]] size_t srcOffset, - [[maybe_unused]] size_t dstOffset, [[maybe_unused]] size_t size, + [[maybe_unused]] void *pMemory, [[maybe_unused]] const void *pPattern, + [[maybe_unused]] size_t patternSize, [[maybe_unused]] size_t size, [[maybe_unused]] uint32_t numSyncPointsInWaitList, [[maybe_unused]] const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { - - cl_adapter::die("Experimental Command-buffer feature is not " + cl_adapter::die("Experimental Command-buffer entry point is not " "implemented for OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem, + ur_mem_handle_t hDstMem, size_t srcOffset, size_t dstOffset, size_t size, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint) { + + cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_ext::clCommandCopyBufferKHR_fn F = nullptr; + cl_int Res = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferKHRCache, + cl_ext::CommandCopyBufferName, &F); + + if (!F || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + CL_RETURN_ON_FAILURE( + F(hCommandBuffer->CLCommandBuffer, + cl_adapter::cast(hCommandBuffer->hInternalQueue), + cl_adapter::cast(hSrcMem), cl_adapter::cast(hDstMem), + srcOffset, dstOffset, size, numSyncPointsInWaitList, pSyncPointWaitList, + pSyncPoint, nullptr)); + + return UR_RESULT_SUCCESS; +} + UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( - [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, - [[maybe_unused]] ur_mem_handle_t hSrcMem, - [[maybe_unused]] ur_mem_handle_t hDstMem, - [[maybe_unused]] ur_rect_offset_t srcOrigin, - [[maybe_unused]] ur_rect_offset_t dstOrigin, - [[maybe_unused]] ur_rect_region_t region, - [[maybe_unused]] size_t srcRowPitch, [[maybe_unused]] size_t srcSlicePitch, - [[maybe_unused]] size_t dstRowPitch, [[maybe_unused]] size_t dstSlicePitch, - [[maybe_unused]] uint32_t numSyncPointsInWaitList, - [[maybe_unused]] const ur_exp_command_buffer_sync_point_t - *pSyncPointWaitList, - [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem, + ur_mem_handle_t hDstMem, ur_rect_offset_t srcOrigin, + ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, + size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint) { - cl_adapter::die("Experimental Command-buffer feature is not " - "implemented for OpenCL adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + size_t OpenCLOriginRect[3]{srcOrigin.x, srcOrigin.y, srcOrigin.z}; + size_t OpenCLDstRect[3]{dstOrigin.x, dstOrigin.y, dstOrigin.z}; + size_t OpenCLRegion[3]{region.width, region.height, region.depth}; + + cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_ext::clCommandCopyBufferRectKHR_fn F = nullptr; + cl_int Res = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferRectKHRCache, + cl_ext::CommandCopyBufferRectName, &F); + + if (!F || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + CL_RETURN_ON_FAILURE( + F(hCommandBuffer->CLCommandBuffer, + cl_adapter::cast(hCommandBuffer->hInternalQueue), + cl_adapter::cast(hSrcMem), cl_adapter::cast(hDstMem), + OpenCLOriginRect, OpenCLDstRect, OpenCLRegion, srcRowPitch, + srcSlicePitch, dstRowPitch, dstSlicePitch, numSyncPointsInWaitList, + pSyncPointWaitList, pSyncPoint, nullptr)); + + return UR_RESULT_SUCCESS; } UR_APIEXPORT @@ -123,7 +218,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( *pSyncPointWaitList, [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { - cl_adapter::die("Experimental Command-buffer feature is not " + cl_adapter::die("Experimental Command-buffer entry point is not " "implemented for OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -138,7 +233,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( *pSyncPointWaitList, [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { - cl_adapter::die("Experimental Command-buffer feature is not " + cl_adapter::die("Experimental Command-buffer entry point is not " "implemented for OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -159,7 +254,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( *pSyncPointWaitList, [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { - cl_adapter::die("Experimental Command-buffer feature is not " + cl_adapter::die("Experimental Command-buffer entry point is not " "implemented for OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -180,19 +275,57 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( *pSyncPointWaitList, [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { - cl_adapter::die("Experimental Command-buffer feature is not " + cl_adapter::die("Experimental Command-buffer entry point is not " "implemented for OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferFillExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, + const void *pPattern, size_t patternSize, size_t offset, size_t size, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint) { + + cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_ext::clCommandFillBufferKHR_fn F = nullptr; + cl_int Res = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clCommandFillBufferKHRCache, + cl_ext::CommandFillBufferName, &F); + + if (!F || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + CL_RETURN_ON_FAILURE( + F(hCommandBuffer->CLCommandBuffer, + cl_adapter::cast(hCommandBuffer->hInternalQueue), + cl_adapter::cast(hBuffer), pPattern, patternSize, offset, size, + numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, nullptr)); + + return UR_RESULT_SUCCESS; +} + UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( - [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, - [[maybe_unused]] ur_queue_handle_t hQueue, - [[maybe_unused]] uint32_t numEventsInWaitList, - [[maybe_unused]] const ur_event_handle_t *phEventWaitList, - [[maybe_unused]] ur_event_handle_t *phEvent) { + ur_exp_command_buffer_handle_t hCommandBuffer, ur_queue_handle_t hQueue, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { - cl_adapter::die("Experimental Command-buffer feature is not " - "implemented for OpenCL adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_ext::clEnqueueCommandBufferKHR_fn F = nullptr; + cl_int Res = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clEnqueueCommandBufferKHRCache, + cl_ext::EnqueueCommandBufferName, &F); + + if (!F || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + uint32_t numQueues = (hQueue) ? 1 : 0; + + CL_RETURN_ON_FAILURE(F(numQueues, + cl_adapter::cast(&hQueue), + hCommandBuffer->CLCommandBuffer, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/command_buffer.hpp b/source/adapters/opencl/command_buffer.hpp index 7ab145c53d..d80f29594b 100644 --- a/source/adapters/opencl/command_buffer.hpp +++ b/source/adapters/opencl/command_buffer.hpp @@ -8,8 +8,17 @@ // //===----------------------------------------------------------------------===// +#include #include -/// Stub implementation of command-buffers for OpenCL +struct ur_exp_command_buffer_handle_t_ { + ur_queue_handle_t hInternalQueue; + ur_context_handle_t hContext; + cl_command_buffer_khr CLCommandBuffer; -struct ur_exp_command_buffer_handle_t_ {}; + ur_exp_command_buffer_handle_t_(ur_queue_handle_t hQueue, + ur_context_handle_t hContext, + cl_command_buffer_khr CLCommandBuffer) + : hInternalQueue(hQueue), hContext(hContext), + CLCommandBuffer(CLCommandBuffer) {} +}; diff --git a/source/adapters/opencl/common.cpp b/source/adapters/opencl/common.cpp index 2b0e7b6a27..67ecd6312b 100644 --- a/source/adapters/opencl/common.cpp +++ b/source/adapters/opencl/common.cpp @@ -60,6 +60,10 @@ ur_result_t mapCLErrorToUR(cl_int Result) { return UR_RESULT_ERROR_OUT_OF_RESOURCES; case CL_INVALID_MEM_OBJECT: return UR_RESULT_ERROR_INVALID_MEM_OBJECT; + case CL_INVALID_COMMAND_BUFFER_KHR: + return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP; + case CL_INVALID_SYNC_POINT_WAIT_LIST_KHR: + return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; default: return UR_RESULT_ERROR_UNKNOWN; } diff --git a/source/adapters/opencl/common.hpp b/source/adapters/opencl/common.hpp index f78710d0df..7beff7d4af 100644 --- a/source/adapters/opencl/common.hpp +++ b/source/adapters/opencl/common.hpp @@ -192,6 +192,16 @@ CONSTFIX char EnqueueReadGlobalVariableName[] = // Names of host pipe functions queried from OpenCL CONSTFIX char EnqueueReadHostPipeName[] = "clEnqueueReadHostPipeINTEL"; CONSTFIX char EnqueueWriteHostPipeName[] = "clEnqueueWriteHostPipeINTEL"; +// Names of command buffer functions queried from OpenCL +CONSTFIX char CreateCommandBufferName[] = "clCreateCommandBufferKHR"; +CONSTFIX char RetainCommandBufferName[] = "clRetainCommandBufferKHR"; +CONSTFIX char ReleaseCommandBufferName[] = "clReleaseCommandBufferKHR"; +CONSTFIX char FinalizeCommandBufferName[] = "clFinalizeCommandBufferKHR"; +CONSTFIX char CommandNRRangeKernelName[] = "clCommandNDRangeKernelKHR"; +CONSTFIX char CommandCopyBufferName[] = "clCommandCopyBufferKHR"; +CONSTFIX char CommandCopyBufferRectName[] = "clCommandCopyBufferRectKHR"; +CONSTFIX char CommandFillBufferName[] = "clCommandFillBufferKHR"; +CONSTFIX char EnqueueCommandBufferName[] = "clEnqueueCommandBufferKHR"; #undef CONSTFIX @@ -226,6 +236,58 @@ cl_int(CL_API_CALL *)(cl_command_queue queue, cl_program program, cl_uint num_events_in_waitlist, const cl_event *events_waitlist, cl_event *event); +using clCreateCommandBufferKHR_fn = CL_API_ENTRY cl_command_buffer_khr( + CL_API_CALL *)(cl_uint num_queues, const cl_command_queue *queues, + const cl_command_buffer_properties_khr *properties, + cl_int *errcode_ret); + +using clRetainCommandBufferKHR_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_command_buffer_khr command_buffer); + +using clReleaseCommandBufferKHR_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_command_buffer_khr command_buffer); + +using clFinalizeCommandBufferKHR_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_command_buffer_khr command_buffer); + +using clCommandNDRangeKernelKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)( + cl_command_buffer_khr command_buffer, cl_command_queue command_queue, + const cl_ndrange_kernel_command_properties_khr *properties, + cl_kernel kernel, cl_uint work_dim, const size_t *global_work_offset, + const size_t *global_work_size, const size_t *local_work_size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr *sync_point_wait_list, + cl_sync_point_khr *sync_point, cl_mutable_command_khr *mutable_handle); + +using clCommandCopyBufferKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)( + cl_command_buffer_khr command_buffer, cl_command_queue command_queue, + cl_mem src_buffer, cl_mem dst_buffer, size_t src_offset, size_t dst_offset, + size_t size, cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr *sync_point_wait_list, + cl_sync_point_khr *sync_point, cl_mutable_command_khr *mutable_handle); + +using clCommandCopyBufferRectKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)( + cl_command_buffer_khr command_buffer, cl_command_queue command_queue, + cl_mem src_buffer, cl_mem dst_buffer, const size_t *src_origin, + const size_t *dst_origin, const size_t *region, size_t src_row_pitch, + size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr *sync_point_wait_list, + cl_sync_point_khr *sync_point, cl_mutable_command_khr *mutable_handle); + +using clCommandFillBufferKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)( + cl_command_buffer_khr command_buffer, cl_command_queue command_queue, + cl_mem buffer, const void *pattern, size_t pattern_size, size_t offset, + size_t size, cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr *sync_point_wait_list, + cl_sync_point_khr *sync_point, cl_mutable_command_khr *mutable_handle); + +using clEnqueueCommandBufferKHR_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_uint num_queues, cl_command_queue *queues, + cl_command_buffer_khr command_buffer, + cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + template struct FuncPtrCache { std::map Map; std::mutex Mutex; @@ -255,6 +317,15 @@ struct ExtFuncPtrCacheT { FuncPtrCache clEnqueueWriteHostPipeINTELCache; FuncPtrCache clSetProgramSpecializationConstantCache; + FuncPtrCache clCreateCommandBufferKHRCache; + FuncPtrCache clRetainCommandBufferKHRCache; + FuncPtrCache clReleaseCommandBufferKHRCache; + FuncPtrCache clFinalizeCommandBufferKHRCache; + FuncPtrCache clCommandNDRangeKernelKHRCache; + FuncPtrCache clCommandCopyBufferKHRCache; + FuncPtrCache clCommandCopyBufferRectKHRCache; + FuncPtrCache clCommandFillBufferKHRCache; + FuncPtrCache clEnqueueCommandBufferKHRCache; }; // A raw pointer is used here since the lifetime of this map has to be tied to // piTeardown to avoid issues with static destruction order (a user application diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index 3fc6f5d491..f51502902f 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -859,7 +859,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_PROFILE: case UR_DEVICE_INFO_VERSION: case UR_EXT_DEVICE_INFO_OPENCL_C_VERSION: - case UR_DEVICE_INFO_EXTENSIONS: case UR_DEVICE_INFO_BUILT_IN_KERNELS: case UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES: case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: @@ -881,6 +880,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return UR_RESULT_SUCCESS; } + case UR_DEVICE_INFO_EXTENSIONS: { + cl_device_id Dev = cl_adapter::cast(hDevice); + size_t ExtSize = 0; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize)); + + std::string ExtStr(ExtSize, '\0'); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, ExtSize, + ExtStr.data(), nullptr)); + + std::string SupportedExtensions(ExtStr.c_str()); + if (ExtStr.find("cl_khr_command_buffer") != std::string::npos) { + SupportedExtensions += " ur_exp_command_buffer"; + } + return ReturnValue(SupportedExtensions.c_str()); + } /* TODO: Check regularly to see if support is enabled in OpenCL. Intel GPU * EU device-specific information extensions. Some of the queries are * enabled by cl_intel_device_attribute_query extension, but it's not yet in diff --git a/source/adapters/opencl/enqueue.cpp b/source/adapters/opencl/enqueue.cpp index 29c5ad672e..5f41878182 100644 --- a/source/adapters/opencl/enqueue.cpp +++ b/source/adapters/opencl/enqueue.cpp @@ -350,9 +350,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( return mapCLErrorToUR(CLErr); } - clEnqueueReadHostPipeINTEL_fn FuncPtr = nullptr; + cl_ext::clEnqueueReadHostPipeINTEL_fn FuncPtr = nullptr; ur_result_t RetVal = - cl_ext::getExtFuncFromContext( + cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clEnqueueReadHostPipeINTELCache, cl_ext::EnqueueReadHostPipeName, &FuncPtr); @@ -382,9 +382,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( return mapCLErrorToUR(CLErr); } - clEnqueueWriteHostPipeINTEL_fn FuncPtr = nullptr; + cl_ext::clEnqueueWriteHostPipeINTEL_fn FuncPtr = nullptr; ur_result_t RetVal = - cl_ext::getExtFuncFromContext( + cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clEnqueueWriteHostPipeINTELCache, cl_ext::EnqueueWriteHostPipeName, &FuncPtr);