From ce980d4a5590b5984bb92fa0ee2b60d7626e1441 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Thu, 8 Feb 2024 17:36:06 +0000 Subject: [PATCH] [EXP][Command-buffer] OpenCL kernel command update Implement the API for updating the kernel commands in a command-buffer defined by https://github.com/oneapi-src/unified-runtime/pull/1089 for the OpenCL adapter. This depends on support for the [cl_khr_command_buffer_mutable_dispatch](https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_command_buffer_mutable_dispatch) extension. Tested on Intel GPU OpenCL implementations with the [command-buffer emulation layer](https://github.com/bashbaug/SimpleOpenCLSamples/tree/main/layers/10_cmdbufemu). --- source/adapters/opencl/command_buffer.cpp | 235 ++++++++++++++++-- source/adapters/opencl/command_buffer.hpp | 27 +- source/adapters/opencl/common.cpp | 30 +++ source/adapters/opencl/common.hpp | 9 + source/adapters/opencl/device.cpp | 6 +- .../buffer_fill_kernel_update.cpp | 26 +- .../buffer_saxpy_kernel_update.cpp | 29 ++- .../conformance/exp_command_buffer/fixtures.h | 4 + .../exp_command_buffer/invalid_update.cpp | 4 + .../exp_command_buffer/ndrange_update.cpp | 12 + .../usm_fill_kernel_update.cpp | 8 +- 11 files changed, 362 insertions(+), 28 deletions(-) diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 88c661b4ae..cd53e75332 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -13,7 +13,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, - [[maybe_unused]] const ur_exp_command_buffer_desc_t *pCommandBufferDesc, + const ur_exp_command_buffer_desc_t *pCommandBufferDesc, ur_exp_command_buffer_handle_t *phCommandBuffer) { ur_queue_handle_t Queue = nullptr; @@ -29,13 +29,26 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( if (!clCreateCommandBufferKHR || Res != CL_SUCCESS) return UR_RESULT_ERROR_INVALID_OPERATION; + bool IsUpdatable = + pCommandBufferDesc ? pCommandBufferDesc->isUpdatable : false; + + bool SupportsUpdate = false; + cl_device_id CLDevice = cl_adapter::cast(hDevice); + CL_RETURN_ON_FAILURE( + deviceSupportsURCommandBufferKernelUpdate(CLDevice, SupportsUpdate)); + + bool Updatable = IsUpdatable && SupportsUpdate; + + cl_command_buffer_properties_khr Properties[3] = { + CL_COMMAND_BUFFER_FLAGS_KHR, + Updatable ? CL_COMMAND_BUFFER_MUTABLE_KHR : 0u, 0}; auto CLCommandBuffer = clCreateCommandBufferKHR( - 1, cl_adapter::cast(&Queue), nullptr, &Res); + 1, cl_adapter::cast(&Queue), Properties, &Res); CL_RETURN_ON_FAILURE_AND_SET_NULL(Res, phCommandBuffer); try { auto URCommandBuffer = std::make_unique( - Queue, hContext, CLCommandBuffer); + Queue, hContext, CLCommandBuffer, Updatable); *phCommandBuffer = URCommandBuffer.release(); } catch (...) { return UR_RESULT_ERROR_OUT_OF_RESOURCES; @@ -95,6 +108,7 @@ urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) { CL_RETURN_ON_FAILURE( clFinalizeCommandBufferKHR(hCommandBuffer->CLCommandBuffer)); + hCommandBuffer->Finalized = true; return UR_RESULT_SUCCESS; } @@ -105,7 +119,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ur_exp_command_buffer_sync_point_t *pSyncPoint, - ur_exp_command_buffer_command_handle_t *) { + ur_exp_command_buffer_command_handle_t *phCommandHandle) { cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); cl_ext::clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR = nullptr; @@ -117,11 +131,35 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( if (!clCommandNDRangeKernelKHR || Res != CL_SUCCESS) return UR_RESULT_ERROR_INVALID_OPERATION; + cl_mutable_command_khr CommandHandle = nullptr; + cl_mutable_command_khr *OutCommandHandle = + hCommandBuffer->Updatable ? &CommandHandle : nullptr; + + cl_ndrange_kernel_command_properties_khr UpdateProperties[] = { + CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, + CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR | + CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR | + CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR | + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR | CL_MUTABLE_DISPATCH_EXEC_INFO_KHR, + 0}; + + cl_ndrange_kernel_command_properties_khr *Properties = + hCommandBuffer->Updatable ? UpdateProperties : nullptr; CL_RETURN_ON_FAILURE(clCommandNDRangeKernelKHR( - hCommandBuffer->CLCommandBuffer, nullptr, nullptr, + hCommandBuffer->CLCommandBuffer, nullptr, Properties, cl_adapter::cast(hKernel), workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint, nullptr)); + pSyncPointWaitList, pSyncPoint, OutCommandHandle)); + + try { + auto URCommandHandle = + std::make_unique( + hCommandBuffer, CommandHandle, workDim); + *phCommandHandle = URCommandHandle.release(); + hCommandBuffer->CommandHandles.push_back(*phCommandHandle); + } catch (...) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } return UR_RESULT_SUCCESS; } @@ -360,19 +398,180 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainCommandExp( [[maybe_unused]] ur_exp_command_buffer_command_handle_t hCommand) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + hCommand->incrementReferenceCount(); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( [[maybe_unused]] ur_exp_command_buffer_command_handle_t hCommand) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + if (hCommand->decrementReferenceCount() == 0) { + // TODO + } + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( [[maybe_unused]] ur_exp_command_buffer_command_handle_t hCommand, [[maybe_unused]] const ur_exp_command_buffer_update_kernel_launch_desc_t *pUpdateKernelLaunch) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + + ur_exp_command_buffer_handle_t hCommandBuffer = hCommand->hCommandBuffer; + cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_ext::clUpdateMutableCommandsKHR_fn clUpdateMutableCommandsKHR = nullptr; + cl_int Res = + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clUpdateMutableCommandsKHRCache, + cl_ext::UpdateMutableCommandsName, &clUpdateMutableCommandsKHR); + + if (!clUpdateMutableCommandsKHR || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + if (!hCommandBuffer->Finalized || !hCommandBuffer->Updatable) + return UR_RESULT_ERROR_INVALID_OPERATION; + + // Find the CL execution info to update + uint32_t NumExecInfos = pUpdateKernelLaunch->numNewExecInfos; + const ur_exp_command_buffer_update_exec_info_desc_t *ExecInfoList = + pUpdateKernelLaunch->pNewExecInfoList; + std::vector CLExecInfos; + for (uint32_t i = 0; i < NumExecInfos; i++) { + const ur_exp_command_buffer_update_exec_info_desc_t &URExecInfo = + ExecInfoList[i]; + + if (URExecInfo.propName == UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS) { + cl_bool TrueVal = CL_TRUE; + cl_mutable_dispatch_exec_info_khr CLExecInfo; + CLExecInfo.param_value_size = sizeof(cl_bool); + CLExecInfo.param_value = &TrueVal; + CLExecInfo.param_name = CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL; + CLExecInfos.push_back(CLExecInfo); + + CLExecInfo.param_name = CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL; + CLExecInfos.push_back(CLExecInfo); + + CLExecInfo.param_name = CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL; + CLExecInfos.push_back(CLExecInfo); + } else if (URExecInfo.propName == UR_KERNEL_EXEC_INFO_USM_PTRS) { + cl_mutable_dispatch_exec_info_khr CLExecInfo{}; + CLExecInfo.param_value_size = URExecInfo.propSize; + CLExecInfo.param_value = URExecInfo.pNewExecInfo; + CLExecInfo.param_name = CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL; + CLExecInfos.push_back(CLExecInfo); + } else if (URExecInfo.propName != UR_KERNEL_EXEC_INFO_CACHE_CONFIG) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + } + + // Find the CL USM pointer arguments to the kernel + // WARNING - This relies on USM and SVM using the same implementation, + // which is not guaranteed. + // See https://github.com/KhronosGroup/OpenCL-Docs/issues/843 + uint32_t NumPointerArgs = pUpdateKernelLaunch->numNewPointerArgs; + const ur_exp_command_buffer_update_pointer_arg_desc_t *ArgPointerList = + pUpdateKernelLaunch->pNewPointerArgList; + std::vector CLUSMArgs(NumPointerArgs); + for (uint32_t i = 0; i < NumPointerArgs; i++) { + const ur_exp_command_buffer_update_pointer_arg_desc_t &URPointerArg = + ArgPointerList[i]; + cl_mutable_dispatch_arg_khr &USMArg = CLUSMArgs[i]; + USMArg.arg_index = URPointerArg.argIndex; + USMArg.arg_value = *(void **)URPointerArg.pNewPointerArg; + } + + uint32_t NumMemobjArgs = pUpdateKernelLaunch->numNewMemObjArgs; + const ur_exp_command_buffer_update_memobj_arg_desc_t *ArgMemobjList = + pUpdateKernelLaunch->pNewMemObjArgList; + uint32_t NumValueArgs = pUpdateKernelLaunch->numNewValueArgs; + const ur_exp_command_buffer_update_value_arg_desc_t *ArgValueList = + pUpdateKernelLaunch->pNewValueArgList; + + std::vector CLArgs; + for (uint32_t i = 0; i < NumMemobjArgs; i++) { + const ur_exp_command_buffer_update_memobj_arg_desc_t &URMemObjArg = + ArgMemobjList[i]; + cl_mutable_dispatch_arg_khr CLArg{ + URMemObjArg.argIndex, // arg_index + sizeof(cl_mem), // arg_size + cl_adapter::cast( + &URMemObjArg.hNewMemObjArg) // arg_value + }; + + CLArgs.push_back(CLArg); + } + + for (uint32_t i = 0; i < NumValueArgs; i++) { + const ur_exp_command_buffer_update_value_arg_desc_t &URValueArg = + ArgValueList[i]; + cl_mutable_dispatch_arg_khr CLArg{ + URValueArg.argIndex, // arg_index + URValueArg.argSize, // arg_size + URValueArg.pNewValueArg // arg_value + }; + CLArgs.push_back(CLArg); + } + + const cl_uint NewWorkDim = pUpdateKernelLaunch->newWorkDim; + cl_uint &CLWorkDim = hCommand->WorkDim; + if (NewWorkDim != 0 && NewWorkDim != CLWorkDim) { + // Limitation of the cl_khr_command_buffer_mutable_dispatch specification + // that it is an error to change the ND-Range size. + // https://github.com/KhronosGroup/OpenCL-Docs/issues/1057 + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + const size_t CopySize = sizeof(size_t) * CLWorkDim; + std::vector CLGlobalWorkOffset, CLGlobalWorkSize, CLLocalWorkSize; + + if (auto GlobalWorkOffsetPtr = pUpdateKernelLaunch->pNewGlobalWorkOffset) { + CLGlobalWorkOffset.resize(CLWorkDim); + std::memcpy(CLGlobalWorkOffset.data(), GlobalWorkOffsetPtr, CopySize); + if (CLWorkDim < 3) { + const size_t ZeroSize = sizeof(size_t) * (3 - CLWorkDim); + std::memset(CLGlobalWorkOffset.data() + CLWorkDim, 0, ZeroSize); + } + } + + if (auto GlobalWorkSizePtr = pUpdateKernelLaunch->pNewGlobalWorkSize) { + CLGlobalWorkSize.resize(CLWorkDim); + std::memcpy(CLGlobalWorkSize.data(), GlobalWorkSizePtr, CopySize); + if (CLWorkDim < 3) { + const size_t ZeroSize = sizeof(size_t) * (3 - CLWorkDim); + std::memset(CLGlobalWorkSize.data() + CLWorkDim, 0, ZeroSize); + } + } + + if (auto LocalWorkSizePtr = pUpdateKernelLaunch->pNewLocalWorkSize) { + CLLocalWorkSize.resize(CLWorkDim); + std::memcpy(CLLocalWorkSize.data(), LocalWorkSizePtr, CopySize); + if (CLWorkDim < 3) { + const size_t ZeroSize = sizeof(size_t) * (3 - CLWorkDim); + std::memset(CLLocalWorkSize.data() + CLWorkDim, 0, ZeroSize); + } + } + + cl_mutable_command_khr command = + cl_adapter::cast(hCommand->CLMutableCommand); + cl_mutable_dispatch_config_khr dispatch_config = { + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + static_cast(CLArgs.size()), // num_args + static_cast(CLUSMArgs.size()), // num_svm_args + static_cast(CLExecInfos.size()), // num_exec_infos + CLWorkDim, // work_dim + CLArgs.data(), // arg_list + CLUSMArgs.data(), // arg_svm_list + CLExecInfos.data(), // exec_info_list + CLGlobalWorkOffset.data(), // global_work_offset + CLGlobalWorkSize.data(), // global_work_size + CLLocalWorkSize.data(), // local_work_size + }; + cl_mutable_base_config_khr config = { + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, &dispatch_config}; + CL_RETURN_ON_FAILURE( + clUpdateMutableCommandsKHR(hCommandBuffer->CLCommandBuffer, &config)); + + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( @@ -415,9 +614,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( - [[maybe_unused]] ur_exp_command_buffer_command_handle_t hCommand, - [[maybe_unused]] ur_exp_command_buffer_command_info_t propName, - [[maybe_unused]] size_t propSize, [[maybe_unused]] void *pPropValue, - [[maybe_unused]] size_t *pPropSizeRet) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + ur_exp_command_buffer_command_handle_t hCommand, + ur_exp_command_buffer_command_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) { + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + + switch (propName) { + case UR_EXP_COMMAND_BUFFER_COMMAND_INFO_REFERENCE_COUNT: + return ReturnValue(hCommand->getReferenceCount()); + default: + assert(!"Command-buffer command info request not implemented"); + } + + return UR_RESULT_ERROR_INVALID_ENUMERATION; } diff --git a/source/adapters/opencl/command_buffer.hpp b/source/adapters/opencl/command_buffer.hpp index d80f29594b..cee863ec51 100644 --- a/source/adapters/opencl/command_buffer.hpp +++ b/source/adapters/opencl/command_buffer.hpp @@ -11,14 +11,37 @@ #include #include +// Handle to a kernel command. +struct ur_exp_command_buffer_command_handle_t_ { + ur_exp_command_buffer_handle_t hCommandBuffer; + cl_mutable_command_khr CLMutableCommand; + cl_uint WorkDim; + std::atomic_uint32_t RefCount; + + ur_exp_command_buffer_command_handle_t_( + ur_exp_command_buffer_handle_t hCommandBuffer, + cl_mutable_command_khr CLMutableCommand, cl_uint WorkDim) + : hCommandBuffer(hCommandBuffer), CLMutableCommand(CLMutableCommand), + WorkDim(WorkDim), RefCount{0} {} + + uint32_t incrementReferenceCount() noexcept { return ++RefCount; } + uint32_t decrementReferenceCount() noexcept { return --RefCount; } + uint32_t getReferenceCount() const noexcept { return RefCount; } +}; + struct ur_exp_command_buffer_handle_t_ { ur_queue_handle_t hInternalQueue; ur_context_handle_t hContext; cl_command_buffer_khr CLCommandBuffer; + bool Updatable; + bool Finalized; + std::vector CommandHandles; ur_exp_command_buffer_handle_t_(ur_queue_handle_t hQueue, ur_context_handle_t hContext, - cl_command_buffer_khr CLCommandBuffer) + cl_command_buffer_khr CLCommandBuffer, + bool Updatable) : hInternalQueue(hQueue), hContext(hContext), - CLCommandBuffer(CLCommandBuffer) {} + CLCommandBuffer(CLCommandBuffer), Updatable(Updatable), + Finalized(false) {} }; diff --git a/source/adapters/opencl/common.cpp b/source/adapters/opencl/common.cpp index 4fe8bed408..9051cf7024 100644 --- a/source/adapters/opencl/common.cpp +++ b/source/adapters/opencl/common.cpp @@ -101,3 +101,33 @@ ur_result_t getNativeHandle(void *URObj, ur_native_handle_t *NativeHandle) { *NativeHandle = reinterpret_cast(URObj); return UR_RESULT_SUCCESS; } + +cl_int deviceSupportsURCommandBufferKernelUpdate(cl_device_id Dev, + bool &Result) { + size_t ExtSize = 0; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize)); + + std::string ExtStr(ExtSize, '\0'); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, ExtSize, + ExtStr.data(), nullptr)); + + std::string SupportedExtensions(ExtStr.c_str()); + if (ExtStr.find("cl_khr_command_buffer_mutable_dispatch") == + std::string::npos) { + Result = false; + return CL_SUCCESS; + } + + cl_mutable_dispatch_fields_khr mutable_capabilities; + CL_RETURN_ON_FAILURE(clGetDeviceInfo( + Dev, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, + sizeof(mutable_capabilities), &mutable_capabilities, nullptr)); + const cl_mutable_dispatch_fields_khr required_caps = + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR | + CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR | + CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR | CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR | + CL_MUTABLE_DISPATCH_EXEC_INFO_KHR; + Result = (mutable_capabilities & required_caps) == required_caps; + return CL_SUCCESS; +} diff --git a/source/adapters/opencl/common.hpp b/source/adapters/opencl/common.hpp index 0667cd3d17..bdb94c3feb 100644 --- a/source/adapters/opencl/common.hpp +++ b/source/adapters/opencl/common.hpp @@ -215,6 +215,7 @@ CONSTFIX char CommandCopyBufferRectName[] = "clCommandCopyBufferRectKHR"; CONSTFIX char CommandFillBufferName[] = "clCommandFillBufferKHR"; CONSTFIX char EnqueueCommandBufferName[] = "clEnqueueCommandBufferKHR"; CONSTFIX char GetCommandBufferInfoName[] = "clGetCommandBufferInfoKHR"; +CONSTFIX char UpdateMutableCommandsName[] = "clUpdateMutableCommandsKHR"; #undef CONSTFIX @@ -305,6 +306,10 @@ using clGetCommandBufferInfoKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)( cl_command_buffer_khr command_buffer, cl_command_buffer_info_khr param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret); +using clUpdateMutableCommandsKHR_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_command_buffer_khr command_buffer, + const cl_mutable_base_config_khr *mutable_config); + template struct FuncPtrCache { std::map Map; std::mutex Mutex; @@ -344,6 +349,7 @@ struct ExtFuncPtrCacheT { FuncPtrCache clCommandFillBufferKHRCache; FuncPtrCache clEnqueueCommandBufferKHRCache; FuncPtrCache clGetCommandBufferInfoKHRCache; + FuncPtrCache clUpdateMutableCommandsKHRCache; }; // A raw pointer is used here since the lifetime of this map has to be tied to // piTeardown to avoid issues with static destruction order (a user application @@ -414,3 +420,6 @@ static ur_result_t getExtFuncFromContext(cl_context Context, ur_result_t mapCLErrorToUR(cl_int Result); ur_result_t getNativeHandle(void *URObj, ur_native_handle_t *NativeHandle); + +cl_int deviceSupportsURCommandBufferKernelUpdate(cl_device_id Dev, + bool &Result); diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index 115b9b2e09..7792839d29 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -967,7 +967,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, std::string::npos); } case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: { - return ReturnValue(false); + cl_device_id Dev = cl_adapter::cast(hDevice); + bool Supported = false; + CL_RETURN_ON_FAILURE( + deviceSupportsURCommandBufferKernelUpdate(Dev, Supported)); + return ReturnValue(Supported); } default: { return UR_RESULT_ERROR_INVALID_ENUMERATION; diff --git a/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp index ea5295dc6b..6bb1b51568 100644 --- a/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp @@ -14,10 +14,30 @@ struct BufferFillCommandTest UUR_RETURN_ON_FATAL_FAILURE( urUpdatableCommandBufferExpExecutionTest::SetUp()); - // First argument is buffer to fill (will also be hidden accessor arg) - AddBuffer1DArg(sizeof(val) * global_size, &buffer); + ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, + sizeof(val) * global_size, nullptr, + &buffer)); + + // TODO - Enable single code path after https://github.com/oneapi-src/unified-runtime/pull/1176 + // is merged + if (backend != UR_PLATFORM_BACKEND_OPENCL) { + // First argument is buffer to fill + ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, 0, nullptr, buffer)); + } else { + // First argument is buffer to fill + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 0, sizeof(buffer), + nullptr, &buffer)); + } + // second arg is hidden accessor + struct { + size_t offsets[1] = {0}; + } accessor; + ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(accessor), nullptr, + &accessor)); + // Second argument is scalar to fill with. - AddPodArg(val); + ASSERT_SUCCESS( + urKernelSetArgValue(kernel, 2, sizeof(val), nullptr, &val)); // Append kernel command to command-buffer and close command-buffer ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( diff --git a/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp index 879b3a9bc6..3c9b46bc6f 100644 --- a/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp @@ -29,8 +29,17 @@ struct BufferSaxpyKernelTest 0, nullptr, nullptr)); } - // Index 0 is output buffer - ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, 0, nullptr, buffers[0])); + // TODO -Enable single code path once https://github.com/oneapi-src/unified-runtime/pull/1176 + // is merged + if (backend != UR_PLATFORM_BACKEND_OPENCL) { + // Index 0 is output buffer + ASSERT_SUCCESS( + urKernelSetArgMemObj(kernel, 0, nullptr, buffers[0])); + } else { + // Index 0 is output buffer + ASSERT_SUCCESS(urKernelSetArgValue( + kernel, 0, sizeof(ur_mem_handle_t), nullptr, &buffers[0])); + } // Index 1 is output accessor struct { size_t offsets[1] = {0}; @@ -41,13 +50,25 @@ struct BufferSaxpyKernelTest // Index 2 is A ASSERT_SUCCESS(urKernelSetArgValue(kernel, 2, sizeof(A), nullptr, &A)); // Index 3 is X buffer - ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, 3, nullptr, buffers[1])); + if (backend != UR_PLATFORM_BACKEND_OPENCL) { + ASSERT_SUCCESS( + urKernelSetArgMemObj(kernel, 3, nullptr, buffers[1])); + } else { + ASSERT_SUCCESS(urKernelSetArgValue( + kernel, 3, sizeof(ur_mem_handle_t), nullptr, &buffers[1])); + } // Index 4 is X buffer accessor ASSERT_SUCCESS(urKernelSetArgValue(kernel, 4, sizeof(accessor), nullptr, &accessor)); // Index 5 is Y buffer - ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, 5, nullptr, buffers[2])); + if (backend != UR_PLATFORM_BACKEND_OPENCL) { + ASSERT_SUCCESS( + urKernelSetArgMemObj(kernel, 5, nullptr, buffers[2])); + } else { + ASSERT_SUCCESS(urKernelSetArgValue( + kernel, 5, sizeof(ur_mem_handle_t), nullptr, &buffers[2])); + } // Index 6 is Y buffer accessor ASSERT_SUCCESS(urKernelSetArgValue(kernel, 6, sizeof(accessor), nullptr, diff --git a/test/conformance/exp_command_buffer/fixtures.h b/test/conformance/exp_command_buffer/fixtures.h index 4e9bff35f9..cbf441db2f 100644 --- a/test/conformance/exp_command_buffer/fixtures.h +++ b/test/conformance/exp_command_buffer/fixtures.h @@ -59,6 +59,9 @@ struct urCommandBufferExpExecutionTest : uur::urKernelExecutionTest { void SetUp() override { UUR_RETURN_ON_FATAL_FAILURE(uur::urKernelExecutionTest::SetUp()); + ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, + sizeof(backend), &backend, nullptr)); + size_t returned_size; ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_EXTENSIONS, 0, nullptr, &returned_size)); @@ -97,6 +100,7 @@ struct urCommandBufferExpExecutionTest : uur::urKernelExecutionTest { ur_exp_command_buffer_handle_t cmd_buf_handle = nullptr; ur_bool_t updatable_command_buffer_support = false; + ur_platform_backend_t backend{}; }; struct urUpdatableCommandBufferExpExecutionTest diff --git a/test/conformance/exp_command_buffer/invalid_update.cpp b/test/conformance/exp_command_buffer/invalid_update.cpp index 00cf04ea85..dbf0534437 100644 --- a/test/conformance/exp_command_buffer/invalid_update.cpp +++ b/test/conformance/exp_command_buffer/invalid_update.cpp @@ -41,6 +41,10 @@ struct InvalidUpdateTest } void TearDown() override { + // Workaround an issue with the OpenCL adapter implementing urUsmFree + // using a blocking free where hangs + EXPECT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + if (shared_ptr) { EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); } diff --git a/test/conformance/exp_command_buffer/ndrange_update.cpp b/test/conformance/exp_command_buffer/ndrange_update.cpp index e5631f9176..bd3781c4a4 100644 --- a/test/conformance/exp_command_buffer/ndrange_update.cpp +++ b/test/conformance/exp_command_buffer/ndrange_update.cpp @@ -155,6 +155,12 @@ TEST_P(NDRangeUpdateTest, Update3D) { // Update the kernel work dimensions to 2, and update global size, local size, // and global offset to new values. TEST_P(NDRangeUpdateTest, Update2D) { + if (backend == UR_PLATFORM_BACKEND_OPENCL) { + // OpenCL cl_khr_command_buffer_mutable_dispatch does not support + // updating the work dimension. + GTEST_SKIP(); + } + // Run command-buffer prior to update an verify output ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, nullptr, nullptr)); @@ -205,6 +211,12 @@ TEST_P(NDRangeUpdateTest, Update2D) { // Update the kernel work dimensions to 1, and check that previously // set global size, local size, and global offset update accordingly. TEST_P(NDRangeUpdateTest, Update1D) { + if (backend == UR_PLATFORM_BACKEND_OPENCL) { + // OpenCL cl_khr_command_buffer_mutable_dispatch does not support + // updating the work dimension. + GTEST_SKIP(); + } + // Run command-buffer prior to update an verify output ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, nullptr, nullptr)); diff --git a/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp index 7e6cab6ee3..20e2bf9c16 100644 --- a/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp @@ -154,7 +154,7 @@ TEST_P(USMFillCommandTest, UpdateExecInfo) { ur_exp_command_buffer_update_exec_info_desc_t new_exec_info_descs[3]; // Update direct access flag - bool indirect_access = false; + bool indirect_access = true; new_exec_info_descs[0] = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC, // stype nullptr, // pNext @@ -179,14 +179,14 @@ TEST_P(USMFillCommandTest, UpdateExecInfo) { ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, allocation_size, &new_shared_ptr)); ASSERT_NE(new_shared_ptr, nullptr); - void *pointers = {new_shared_ptr}; + void *pointers[1] = {new_shared_ptr}; new_exec_info_descs[2] = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC, // stype nullptr, // pNext UR_KERNEL_EXEC_INFO_USM_PTRS, // propName sizeof(pointers), // propSize - nullptr, // pProperties - &pointers, // pPropValue + nullptr, // pProperties + pointers, // pPropValue }; ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = {