From 3ceb6d931fd36d2433f202f8010261b20d4e18e8 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Thu, 22 Feb 2024 12:26:17 +0000 Subject: [PATCH] Error if number of dimensions changes during update See https://github.com/KhronosGroup/OpenCL-Docs/issues/1057 for discussions as to why we shouldn't enable changing to number of dimensions in an update. --- include/ur_api.h | 1 + scripts/core/exp-command-buffer.yml | 1 + source/adapters/cuda/command_buffer.cpp | 7 + source/adapters/opencl/command_buffer.cpp | 120 ++++++++++-------- source/loader/ur_libapi.cpp | 1 + source/ur_api.cpp | 1 + .../buffer_fill_kernel_update.cpp | 2 - .../buffer_saxpy_kernel_update.cpp | 2 - .../exp_command_buffer/ndrange_update.cpp | 79 +++++++----- 9 files changed, 128 insertions(+), 86 deletions(-) diff --git a/include/ur_api.h b/include/ur_api.h index 62534e4285..6e14460786 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -8532,6 +8532,7 @@ urCommandBufferReleaseCommandExp( /// - ::UR_RESULT_ERROR_INVALID_OPERATION /// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. /// + If the command-buffer `hCommand` belongs to has not been finalized. +/// + If `pUpdateKernelLaunch->newWorkDim` is different from the work-dim used on creation of `hCommand`. /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX diff --git a/scripts/core/exp-command-buffer.yml b/scripts/core/exp-command-buffer.yml index 1f8934d4b2..150529c5a2 100644 --- a/scripts/core/exp-command-buffer.yml +++ b/scripts/core/exp-command-buffer.yml @@ -926,6 +926,7 @@ returns: - $X_RESULT_ERROR_INVALID_OPERATION: - "If $x_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to." - "If the command-buffer `hCommand` belongs to has not been finalized." + - "If `pUpdateKernelLaunch->newWorkDim` is different from the work-dim used on creation of `hCommand`." - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP - $X_RESULT_ERROR_INVALID_MEM_OBJECT - $X_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 3f7970df53..aef2f13498 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -867,6 +867,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( return UR_RESULT_ERROR_INVALID_OPERATION; } + // Error if work dim changes + if (auto NewWorkDim = pUpdateKernelLaunch->newWorkDim) { + if (NewWorkDim != hCommand->WorkDim) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + } + // Kernel corresponding to the command to update ur_kernel_handle_t Kernel = hCommand->Kernel; diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index f5ec767a8c..ae41d805be 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -437,30 +437,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( return commandHandleReleaseInternal(hCommand); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( - ur_exp_command_buffer_command_handle_t hCommand, +namespace { +ur_result_t updateKernelExecInfo( + std::vector &CLExecInfos, const ur_exp_command_buffer_update_kernel_launch_desc_t *pUpdateKernelLaunch) { - - ur_exp_command_buffer_handle_t hCommandBuffer = hCommand->hCommandBuffer; - cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); - cl_ext::clUpdateMutableCommandsKHR_fn clUpdateMutableCommandsKHR = nullptr; - cl_int Res = - cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clUpdateMutableCommandsKHRCache, - cl_ext::UpdateMutableCommandsName, &clUpdateMutableCommandsKHR); - - if (!clUpdateMutableCommandsKHR || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; - - if (!hCommandBuffer->IsFinalized || !hCommandBuffer->IsUpdatable) - return UR_RESULT_ERROR_INVALID_OPERATION; - - // Find the CL execution info to update const uint32_t NumExecInfos = pUpdateKernelLaunch->numNewExecInfos; const ur_exp_command_buffer_update_exec_info_desc_t *ExecInfoList = pUpdateKernelLaunch->pNewExecInfoList; - std::vector CLExecInfos; for (uint32_t i = 0; i < NumExecInfos; i++) { const ur_exp_command_buffer_update_exec_info_desc_t &URExecInfo = ExecInfoList[i]; @@ -488,15 +472,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( return UR_RESULT_ERROR_INVALID_ENUMERATION; } } + return UR_RESULT_SUCCESS; +} + +void updateKernelPointerArgs( + std::vector &CLUSMArgs, + const ur_exp_command_buffer_update_kernel_launch_desc_t + *pUpdateKernelLaunch) { - // Find the CL USM pointer arguments to the kernel. // WARNING - This relies on USM and SVM using the same implementation, // which is not guaranteed. // See https://github.com/KhronosGroup/OpenCL-Docs/issues/843 const uint32_t NumPointerArgs = pUpdateKernelLaunch->numNewPointerArgs; const ur_exp_command_buffer_update_pointer_arg_desc_t *ArgPointerList = pUpdateKernelLaunch->pNewPointerArgList; - std::vector CLUSMArgs(NumPointerArgs); + + CLUSMArgs.resize(NumPointerArgs); for (uint32_t i = 0; i < NumPointerArgs; i++) { const ur_exp_command_buffer_update_pointer_arg_desc_t &URPointerArg = ArgPointerList[i]; @@ -504,8 +495,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( USMArg.arg_index = URPointerArg.argIndex; USMArg.arg_value = *(void *const *)URPointerArg.pNewPointerArg; } +} - // Find the memory object and scalar arguments to the kernel. +void updateKernelArgs(std::vector &CLArgs, + const ur_exp_command_buffer_update_kernel_launch_desc_t + *pUpdateKernelLaunch) { const uint32_t NumMemobjArgs = pUpdateKernelLaunch->numNewMemObjArgs; const ur_exp_command_buffer_update_memobj_arg_desc_t *ArgMemobjList = pUpdateKernelLaunch->pNewMemObjArgList; @@ -513,7 +507,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( const ur_exp_command_buffer_update_value_arg_desc_t *ArgValueList = pUpdateKernelLaunch->pNewValueArgList; - std::vector CLArgs; for (uint32_t i = 0; i < NumMemobjArgs; i++) { const ur_exp_command_buffer_update_memobj_arg_desc_t &URMemObjArg = ArgMemobjList[i]; @@ -537,45 +530,72 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( }; CLArgs.push_back(CLArg); } +} + +} // end anonymous namespace + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t hCommand, + const ur_exp_command_buffer_update_kernel_launch_desc_t + *pUpdateKernelLaunch) { + + ur_exp_command_buffer_handle_t hCommandBuffer = hCommand->hCommandBuffer; + cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_ext::clUpdateMutableCommandsKHR_fn clUpdateMutableCommandsKHR = nullptr; + cl_int Res = + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clUpdateMutableCommandsKHRCache, + cl_ext::UpdateMutableCommandsName, &clUpdateMutableCommandsKHR); + + if (!clUpdateMutableCommandsKHR || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + if (!hCommandBuffer->IsFinalized || !hCommandBuffer->IsUpdatable) + return UR_RESULT_ERROR_INVALID_OPERATION; const cl_uint NewWorkDim = pUpdateKernelLaunch->newWorkDim; - cl_uint &CLWorkDim = hCommand->WorkDim; - if (NewWorkDim != 0 && NewWorkDim != CLWorkDim) { - // Limitation of the cl_khr_command_buffer_mutable_dispatch specification - // that it is an error to change the ND-Range size. - // https://github.com/KhronosGroup/OpenCL-Docs/issues/1057 - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + if (NewWorkDim != 0 && NewWorkDim != hCommand->WorkDim) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + + // Find the CL execution info to update + std::vector CLExecInfos; + if (ur_result_t result = + updateKernelExecInfo(CLExecInfos, pUpdateKernelLaunch)) { + return result; } - // Update the ND-Range configuration of the kernel. - const size_t CopySize = sizeof(size_t) * CLWorkDim; + // Find the CL USM pointer arguments to the kernel to update + std::vector CLUSMArgs; + updateKernelPointerArgs(CLUSMArgs, pUpdateKernelLaunch); + + // Find the memory object and scalar arguments to the kernel to update + std::vector CLArgs; + + updateKernelArgs(CLArgs, pUpdateKernelLaunch); + + // Find the updated ND-Range configuration of the kernel. std::vector CLGlobalWorkOffset, CLGlobalWorkSize, CLLocalWorkSize; + cl_uint &CommandWorkDim = hCommand->WorkDim; + + // Lambda for N-Dimensional update + auto updateNDRange = [CommandWorkDim](std::vector &NDRange, + size_t *UpdatePtr) { + NDRange.resize(CommandWorkDim, 0); + const size_t CopySize = sizeof(size_t) * CommandWorkDim; + std::memcpy(NDRange.data(), UpdatePtr, CopySize); + }; if (auto GlobalWorkOffsetPtr = pUpdateKernelLaunch->pNewGlobalWorkOffset) { - CLGlobalWorkOffset.resize(CLWorkDim); - std::memcpy(CLGlobalWorkOffset.data(), GlobalWorkOffsetPtr, CopySize); - if (CLWorkDim < 3) { - const size_t ZeroSize = sizeof(size_t) * (3 - CLWorkDim); - std::memset(CLGlobalWorkOffset.data() + CLWorkDim, 0, ZeroSize); - } + updateNDRange(CLGlobalWorkOffset, GlobalWorkOffsetPtr); } if (auto GlobalWorkSizePtr = pUpdateKernelLaunch->pNewGlobalWorkSize) { - CLGlobalWorkSize.resize(CLWorkDim); - std::memcpy(CLGlobalWorkSize.data(), GlobalWorkSizePtr, CopySize); - if (CLWorkDim < 3) { - const size_t ZeroSize = sizeof(size_t) * (3 - CLWorkDim); - std::memset(CLGlobalWorkSize.data() + CLWorkDim, 0, ZeroSize); - } + updateNDRange(CLGlobalWorkSize, GlobalWorkSizePtr); } if (auto LocalWorkSizePtr = pUpdateKernelLaunch->pNewLocalWorkSize) { - CLLocalWorkSize.resize(CLWorkDim); - std::memcpy(CLLocalWorkSize.data(), LocalWorkSizePtr, CopySize); - if (CLWorkDim < 3) { - const size_t ZeroSize = sizeof(size_t) * (3 - CLWorkDim); - std::memset(CLLocalWorkSize.data() + CLWorkDim, 0, ZeroSize); - } + updateNDRange(CLLocalWorkSize, LocalWorkSizePtr); } cl_mutable_command_khr command = @@ -587,7 +607,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( static_cast(CLArgs.size()), // num_args static_cast(CLUSMArgs.size()), // num_svm_args static_cast(CLExecInfos.size()), // num_exec_infos - CLWorkDim, // work_dim + CommandWorkDim, // work_dim CLArgs.data(), // arg_list CLUSMArgs.data(), // arg_svm_list CLExecInfos.data(), // exec_info_list diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 3e25fe6044..bc658f5be6 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -7916,6 +7916,7 @@ ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( /// - ::UR_RESULT_ERROR_INVALID_OPERATION /// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. /// + If the command-buffer `hCommand` belongs to has not been finalized. +/// + If `pUpdateKernelLaunch->newWorkDim` is different from the work-dim used on creation of `hCommand`. /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 84ed7e61e4..37a819d7f4 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -6687,6 +6687,7 @@ ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( /// - ::UR_RESULT_ERROR_INVALID_OPERATION /// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. /// + If the command-buffer `hCommand` belongs to has not been finalized. +/// + If `pUpdateKernelLaunch->newWorkDim` is different from the work-dim used on creation of `hCommand`. /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX diff --git a/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp index 6bb1b51568..e15571d32f 100644 --- a/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp @@ -18,8 +18,6 @@ struct BufferFillCommandTest sizeof(val) * global_size, nullptr, &buffer)); - // TODO - Enable single code path after https://github.com/oneapi-src/unified-runtime/pull/1176 - // is merged if (backend != UR_PLATFORM_BACKEND_OPENCL) { // First argument is buffer to fill ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, 0, nullptr, buffer)); diff --git a/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp index 9f0ad92f14..2ea8eacda4 100644 --- a/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp @@ -29,8 +29,6 @@ struct BufferSaxpyKernelTest 0, nullptr, nullptr)); } - // TODO: Enable single code path once https://github.com/oneapi-src/unified-runtime/pull/1176 - // is merged if (backend != UR_PLATFORM_BACKEND_OPENCL) { // Index 0 is output buffer ASSERT_SUCCESS( diff --git a/test/conformance/exp_command_buffer/ndrange_update.cpp b/test/conformance/exp_command_buffer/ndrange_update.cpp index bd3781c4a4..03751c7e91 100644 --- a/test/conformance/exp_command_buffer/ndrange_update.cpp +++ b/test/conformance/exp_command_buffer/ndrange_update.cpp @@ -152,15 +152,9 @@ TEST_P(NDRangeUpdateTest, Update3D) { Validate(global_size, new_local_size, new_global_offset); } -// Update the kernel work dimensions to 2, and update global size, local size, -// and global offset to new values. +// Update the kernel work dimensions to use 1 in the Z dimension, +// and update global size, local size, and global offset to new values. TEST_P(NDRangeUpdateTest, Update2D) { - if (backend == UR_PLATFORM_BACKEND_OPENCL) { - // OpenCL cl_khr_command_buffer_mutable_dispatch does not support - // updating the work dimension. - GTEST_SKIP(); - } - // Run command-buffer prior to update an verify output ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, nullptr, nullptr)); @@ -183,7 +177,7 @@ TEST_P(NDRangeUpdateTest, Update2D) { 0, // numNewPointerArgs 0, // numNewValueArgs 0, // numNewExecInfos - 2, // newWorkDim + 3, // newWorkDim nullptr, // pNewMemObjArgList nullptr, // pNewPointerArgList nullptr, // pNewValueArgList @@ -208,15 +202,10 @@ TEST_P(NDRangeUpdateTest, Update2D) { Validate(new_global_size, new_local_size, new_global_offset); } -// Update the kernel work dimensions to 1, and check that previously -// set global size, local size, and global offset update accordingly. +// Update the kernel work dimensions to be 1 in Y & Z dimensions, and check +// that the previously set global size, local size, and global offset update +// accordingly. TEST_P(NDRangeUpdateTest, Update1D) { - if (backend == UR_PLATFORM_BACKEND_OPENCL) { - // OpenCL cl_khr_command_buffer_mutable_dispatch does not support - // updating the work dimension. - GTEST_SKIP(); - } - // Run command-buffer prior to update an verify output ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, nullptr, nullptr)); @@ -224,21 +213,24 @@ TEST_P(NDRangeUpdateTest, Update1D) { Validate(global_size, local_size, global_offset); // Set dimensions to 1 + std::array new_global_size = {9, 1, 1}; + std::array new_local_size = {3, 1, 1}; + std::array new_global_offset = {0, 0, 0}; ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - 0, // numNewMemObjArgs - 0, // numNewPointerArgs - 0, // numNewValueArgs - 0, // numNewExecInfos - 1, // newWorkDim - nullptr, // pNewMemObjArgList - nullptr, // pNewPointerArgList - nullptr, // pNewValueArgList - nullptr, // pNewExecInfoList - nullptr, // pNewGlobalWorkOffset - nullptr, // pNewGlobalWorkSize - nullptr, // pNewLocalWorkSize + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + 0, // numNewExecInfos + 3, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + nullptr, // pNewExecInfoList + new_global_offset.data(), // pNewGlobalWorkOffset + new_global_size.data(), // pNewGlobalWorkSize + new_local_size.data(), // pNewLocalWorkSize }; // Reset output to remove old values which will no longer have a @@ -253,8 +245,31 @@ TEST_P(NDRangeUpdateTest, Update1D) { ASSERT_SUCCESS(urQueueFinish(queue)); // Verify that update occurred correctly - std::array new_global_size = {global_size[0], 1, 1}; - std::array new_local_size = {local_size[0], 1, 1}; - std::array new_global_offset = {global_offset[0], 0, 0}; Validate(new_global_size, new_local_size, new_global_offset); } + +// Test error code is returned if work dimension parameter changes +TEST_P(NDRangeUpdateTest, Invalid) { + const size_t new_work_dim = n_dimensions - 1; + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + 0, // numNewExecInfos + new_work_dim, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + nullptr, // pNewExecInfoList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Update command to command-buffer to use different work dim + ur_result_t result = + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc); + ASSERT_EQ(UR_RESULT_ERROR_INVALID_OPERATION, result); +}