From b0057ee85f2b62825741e469011bf0bdeb034b9a Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 12 Mar 2024 15:14:54 -0700 Subject: [PATCH] support for mutable dispatch asserts (#106) * initial support for mutable dispatch assertions (untested) * update to latest version of the spec * check command buffer mutable dispatch asserts also * add a sample demonstrating use of mutable dispatch asserts * update sample documentation * minor document formatting fix --- layers/10_cmdbufemu/emulate.cpp | 360 ++++++++++++------ samples/13_mutablecommandbuffers/main.cpp | 4 + .../CMakeLists.txt | 11 + .../15_mutablecommandbufferasserts/README.md | 30 ++ .../15_mutablecommandbufferasserts/main.cpp | 300 +++++++++++++++ samples/CMakeLists.txt | 1 + 6 files changed, 600 insertions(+), 106 deletions(-) create mode 100644 samples/15_mutablecommandbufferasserts/CMakeLists.txt create mode 100644 samples/15_mutablecommandbufferasserts/README.md create mode 100644 samples/15_mutablecommandbufferasserts/main.cpp diff --git a/layers/10_cmdbufemu/emulate.cpp b/layers/10_cmdbufemu/emulate.cpp index e465ab7..1da1792 100644 --- a/layers/10_cmdbufemu/emulate.cpp +++ b/layers/10_cmdbufemu/emulate.cpp @@ -22,7 +22,7 @@ static constexpr cl_version version_cl_khr_command_buffer = CL_MAKE_VERSION(0, 9, 4); static constexpr cl_version version_cl_khr_command_buffer_mutable_dispatch = - CL_MAKE_VERSION(0, 9, 0); + CL_MAKE_VERSION(0, 9, 1); SLayerContext& getLayerContext(void) { @@ -41,6 +41,13 @@ const cl_mutable_dispatch_fields_khr g_MutableDispatchCaps = CL_MUTABLE_DISPATCH_ARGUMENTS_KHR | CL_MUTABLE_DISPATCH_EXEC_INFO_KHR; +#if !defined(CL_MUTABLE_DISPATCH_ASSERTS_KHR) +typedef cl_bitfield cl_mutable_dispatch_asserts_khr; +#define CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR 0x12B7 +#define CL_MUTABLE_DISPATCH_ASSERTS_KHR 0x12B8 +#define CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR (1 << 0) +#endif // !defined(CL_MUTABLE_DISPATCH_ASSERTS_KHR) + #endif // defined(cl_khr_command_buffer_mutable_dispatch) typedef struct _cl_mutable_command_khr @@ -812,95 +819,7 @@ struct NDRangeKernel : Command const size_t* global_work_offset, const size_t* global_work_size, const size_t* local_work_size, - cl_int& errorCode) - { - errorCode = CL_SUCCESS; - - ptrdiff_t numProperties = 0; -#if defined(cl_khr_command_buffer_mutable_dispatch) - cl_mutable_dispatch_fields_khr mutableFields = g_MutableDispatchCaps; -#endif - - if( properties ) - { - const cl_ndrange_kernel_command_properties_khr* check = properties; - bool found_CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR = false; - while( errorCode == CL_SUCCESS && check[0] != 0 ) - { - cl_int property = (cl_int)check[0]; - switch( property ) - { -#if defined(cl_khr_command_buffer_mutable_dispatch) - case CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR: - if( found_CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR ) - { - errorCode = CL_INVALID_VALUE; - return nullptr; - } - else - { - found_CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR = true; - mutableFields = ((const cl_mutable_dispatch_fields_khr*)(check + 1))[0]; - check += 2; - } - break; -#endif - default: - errorCode = CL_INVALID_VALUE; - return nullptr; - } - } - numProperties = check - properties + 1; - } - - auto command = std::unique_ptr( - new NDRangeKernel(cmdbuf, queue)); - - command->original_kernel = kernel; - command->kernel = g_pNextDispatch->clCloneKernel(kernel, NULL); - command->work_dim = work_dim; - -#if defined(cl_khr_command_buffer_mutable_dispatch) - command->mutableFields = mutableFields; -#endif - - command->properties.reserve(numProperties); - command->properties.insert( - command->properties.begin(), - properties, - properties + numProperties ); - - if( global_work_offset ) - { - command->global_work_offset.reserve(work_dim); - command->global_work_offset.insert( - command->global_work_offset.begin(), - global_work_offset, - global_work_offset + work_dim); - } - - if( global_work_size ) - { - command->global_work_size.reserve(work_dim); - command->global_work_size.insert( - command->global_work_size.begin(), - global_work_size, - global_work_size + work_dim); - } - - if( local_work_size ) - { - command->local_work_size.reserve(work_dim); - command->local_work_size.insert( - command->local_work_size.begin(), - local_work_size, - local_work_size + work_dim); - } - - g_pNextDispatch->clRetainKernel(command->original_kernel); - - return command; - } + cl_int& errorCode); ~NDRangeKernel() { @@ -1015,9 +934,10 @@ struct NDRangeKernel : Command return CL_INVALID_VALUE; } - cl_int mutate( const cl_mutable_dispatch_config_khr* dispatchConfig ) + cl_int mutate( + const cl_mutable_dispatch_asserts_khr mutableAssertsCmdBuf, + const cl_mutable_dispatch_config_khr* dispatchConfig ) { - //CL_INVALID_OPERATION if values of local_work_size and/or global_work_size result in an increase to the number of work-groups in the ND-range. //CL_INVALID_OPERATION if the values of local_work_size and/or global_work_size result in a change to work-group uniformity. if( dispatchConfig->work_dim != 0 && dispatchConfig->work_dim != work_dim ) { @@ -1100,6 +1020,36 @@ struct NDRangeKernel : Command } } + if( mutableAssertsCmdBuf & CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR || + mutableAsserts & CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR ) + { + const size_t* check_global_work_size = + dispatchConfig->global_work_size ? + dispatchConfig->global_work_size : + global_work_size.size() > 0 ? + global_work_size.data() : + nullptr; + const size_t* check_local_work_size = + dispatchConfig->local_work_size ? + dispatchConfig->local_work_size : + local_work_size.size() > 0 ? + local_work_size.data() : + nullptr; + if( check_local_work_size == nullptr ) + { + return CL_INVALID_WORK_GROUP_SIZE; + } + + size_t newNumWorkGroups = getNumWorkGroups( + work_dim, + check_global_work_size, + check_local_work_size ); + if( newNumWorkGroups > numWorkGroups ) + { + return CL_INVALID_WORK_GROUP_SIZE; + } + } + if( dispatchConfig->global_work_offset ) { global_work_offset.assign( @@ -1148,7 +1098,9 @@ struct NDRangeKernel : Command cl_uint work_dim = 0; #if defined(cl_khr_command_buffer_mutable_dispatch) cl_mutable_dispatch_fields_khr mutableFields = 0; -#endif + cl_mutable_dispatch_asserts_khr mutableAsserts = 0; + size_t numWorkGroups = 0; +#endif // defined(cl_khr_command_buffer_mutable_dispatch) std::vector properties; std::vector global_work_offset; std::vector global_work_size; @@ -1159,6 +1111,29 @@ struct NDRangeKernel : Command cl_command_buffer_khr cmdbuf, cl_command_queue queue) : Command(cmdbuf, queue, CL_COMMAND_NDRANGE_KERNEL) {}; + + static size_t getNumWorkGroups( + cl_uint work_dim, + const size_t* global_work_size, + const size_t* local_work_size ) + { + if( work_dim == 0 || + global_work_size == nullptr || + local_work_size == nullptr ) + { + return 1; + } + + size_t count = 1; + for( cl_uint i = 0; i < work_dim; i++ ) + { + size_t gws = global_work_size[i]; + size_t lws = local_work_size[i]; + count *= (gws + lws - 1) / lws; + } + + return count; + } }; typedef struct _cl_command_buffer_khr @@ -1174,6 +1149,9 @@ typedef struct _cl_command_buffer_khr ptrdiff_t numProperties = 0; cl_command_buffer_flags_khr flags = 0; +#if defined(cl_khr_command_buffer_mutable_dispatch) + cl_mutable_dispatch_asserts_khr mutableDispatchAsserts = 0; +#endif // defined(cl_khr_command_buffer_mutable_dispatch) if( num_queues != 1 || queues == NULL ) { @@ -1183,6 +1161,7 @@ typedef struct _cl_command_buffer_khr { const cl_command_buffer_properties_khr* check = properties; bool found_CL_COMMAND_BUFFER_FLAGS_KHR = false; + bool found_CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR = false; while( errorCode == CL_SUCCESS && check[0] != 0 ) { cl_int property = (cl_int)check[0]; @@ -1201,6 +1180,20 @@ typedef struct _cl_command_buffer_khr check += 2; } break; +#if defined(cl_khr_command_buffer_mutable_dispatch) + case CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR: + if( found_CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR ) + { + errorCode = CL_INVALID_VALUE; + } + else + { + found_CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR = true; + mutableDispatchAsserts = ((const cl_mutable_dispatch_asserts_khr*)(check + 1))[0]; + check += 2; + } + break; +#endif // defined(cl_khr_command_buffer_mutable_dispatch) default: errorCode = CL_INVALID_VALUE; break; @@ -1213,7 +1206,11 @@ typedef struct _cl_command_buffer_khr errcode_ret[0] = errorCode; } if( errorCode == CL_SUCCESS) { - cmdbuf = new _cl_command_buffer_khr(flags); + cmdbuf = new _cl_command_buffer_khr(flags +#if defined(cl_khr_command_buffer_mutable_dispatch) + , mutableDispatchAsserts +#endif // defined(cl_khr_command_buffer_mutable_dispatch) + ); cmdbuf->Queues.reserve(num_queues); cmdbuf->Queues.insert( cmdbuf->Queues.begin(), @@ -1281,6 +1278,13 @@ typedef struct _cl_command_buffer_khr return Queues[0]; } +#if defined(cl_khr_command_buffer_mutable_dispatch) + cl_mutable_dispatch_asserts_khr getMutableDispatchAsserts() const + { + return MutableDispatchAsserts; + } +#endif + cl_int getInfo( cl_command_buffer_info_khr param_name, size_t param_value_size, @@ -1362,7 +1366,7 @@ typedef struct _cl_command_buffer_khr ptr ); } break; -#endif +#endif // defined(CL_COMMAND_BUFFER_CONTEXT_KHR) default: break; } @@ -1550,6 +1554,7 @@ typedef struct _cl_command_buffer_khr } if( cl_int errorCode = ((NDRangeKernel*)dispatchConfig->command)->mutate( + MutableDispatchAsserts, dispatchConfig ) ) { return errorCode; @@ -1573,24 +1578,35 @@ typedef struct _cl_command_buffer_khr std::vector Properties; cl_command_buffer_state_khr State; cl_command_buffer_flags_khr Flags; +#if defined(cl_khr_command_buffer_mutable_dispatch) + cl_mutable_dispatch_asserts_khr MutableDispatchAsserts; +#endif // defined(cl_khr_command_buffer_mutable_dispatch) std::atomic RefCount; std::vector> Commands; std::atomic NextSyncPoint; - _cl_command_buffer_khr(cl_command_buffer_flags_khr flags) : + _cl_command_buffer_khr( + cl_command_buffer_flags_khr flags +#if defined(cl_khr_command_buffer_mutable_dispatch) + , cl_mutable_dispatch_asserts_khr mutableDispatchAsserts +#endif // defined(cl_khr_command_buffer_mutable_dispatch) + ) : Magic(cMagic), State(CL_COMMAND_BUFFER_STATE_RECORDING_KHR), Flags(flags), +#if defined(cl_khr_command_buffer_mutable_dispatch) + MutableDispatchAsserts(mutableDispatchAsserts), +#endif // defined(cl_khr_command_buffer_mutable_dispatch) RefCount(1), NextSyncPoint(1) {} } CommandBuffer; /////////////////////////////////////////////////////////////////////////////// // -// We need to define the mutable command constructor separately and after the -// definition of a command buffer because we will call into the command buffer -// to get the queue if the passed-in queue is NULL. +// These functions need to be defined separately and after the definition of a +// command buffer because they call into the command buffer. + _cl_mutable_command_khr::_cl_mutable_command_khr( cl_command_buffer_khr cmdbuf, cl_command_queue queue, @@ -1600,6 +1616,138 @@ _cl_mutable_command_khr::_cl_mutable_command_khr( CmdBuf(cmdbuf), Queue(queue ? queue : cmdbuf->getQueue()) {} +std::unique_ptr NDRangeKernel::create( + const cl_ndrange_kernel_command_properties_khr* properties, + cl_command_buffer_khr cmdbuf, + cl_command_queue queue, + cl_kernel kernel, + cl_uint work_dim, + const size_t* global_work_offset, + const size_t* global_work_size, + const size_t* local_work_size, + cl_int& errorCode) +{ + errorCode = CL_SUCCESS; + + ptrdiff_t numProperties = 0; +#if defined(cl_khr_command_buffer_mutable_dispatch) + cl_mutable_dispatch_fields_khr mutableFields = g_MutableDispatchCaps; + cl_mutable_dispatch_asserts_khr mutableAsserts = 0; +#endif // defined(cl_khr_command_buffer_mutable_dispatch) + + if( properties ) + { + const cl_ndrange_kernel_command_properties_khr* check = properties; + bool found_CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR = false; + bool found_CL_MUTABLE_DISPATCH_ASSERTS_KHR = false; + while( errorCode == CL_SUCCESS && check[0] != 0 ) + { + cl_int property = (cl_int)check[0]; + switch( property ) + { +#if defined(cl_khr_command_buffer_mutable_dispatch) + case CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR: + if( found_CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR ) + { + errorCode = CL_INVALID_VALUE; + return nullptr; + } + else + { + found_CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR = true; + mutableFields = ((const cl_mutable_dispatch_fields_khr*)(check + 1))[0]; + check += 2; + } + break; + case CL_MUTABLE_DISPATCH_ASSERTS_KHR: + if( found_CL_MUTABLE_DISPATCH_ASSERTS_KHR ) + { + errorCode = CL_INVALID_VALUE; + return nullptr; + } + else + { + found_CL_MUTABLE_DISPATCH_ASSERTS_KHR = true; + mutableAsserts = ((const cl_mutable_dispatch_asserts_khr*)(check + 1))[0]; + check += 2; + } + break; +#endif // defined(cl_khr_command_buffer_mutable_dispatch) + default: + errorCode = CL_INVALID_VALUE; + return nullptr; + } + } + numProperties = check - properties + 1; + } + +#if defined(cl_khr_command_buffer_mutable_dispatch) + if( local_work_size == nullptr ) + { + const auto mutableAssertsCmdBuf = cmdbuf->getMutableDispatchAsserts(); + if( mutableAssertsCmdBuf & CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR || + mutableAsserts & CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR ) + { + errorCode = CL_INVALID_WORK_GROUP_SIZE; + return nullptr; + } + } +#endif // defined(cl_khr_command_buffer_mutable_dispatch) + + auto command = std::unique_ptr( + new NDRangeKernel(cmdbuf, queue)); + + command->original_kernel = kernel; + command->kernel = g_pNextDispatch->clCloneKernel(kernel, NULL); + command->work_dim = work_dim; + +#if defined(cl_khr_command_buffer_mutable_dispatch) + command->mutableFields = mutableFields; + command->mutableAsserts = mutableAsserts; + command->numWorkGroups = getNumWorkGroups( + work_dim, + global_work_size, + local_work_size ); +#endif // defined(cl_khr_command_buffer_mutable_dispatch) + + command->properties.reserve(numProperties); + command->properties.insert( + command->properties.begin(), + properties, + properties + numProperties ); + + if( global_work_offset ) + { + command->global_work_offset.reserve(work_dim); + command->global_work_offset.insert( + command->global_work_offset.begin(), + global_work_offset, + global_work_offset + work_dim); + } + + if( global_work_size ) + { + command->global_work_size.reserve(work_dim); + command->global_work_size.insert( + command->global_work_size.begin(), + global_work_size, + global_work_size + work_dim); + } + + if( local_work_size ) + { + command->local_work_size.reserve(work_dim); + command->local_work_size.insert( + command->local_work_size.begin(), + local_work_size, + local_work_size + work_dim); + } + + g_pNextDispatch->clRetainKernel(command->original_kernel); + + return command; +} + /////////////////////////////////////////////////////////////////////////////// // // cl_khr_command_buffer @@ -2230,7 +2378,7 @@ cl_int CL_API_CALL clCommandNDRangeKernelKHR_EMU( { return CL_INVALID_VALUE; } -#endif // cl_khr_command_buffer_mutable_dispatch +#endif // !defined(cl_khr_command_buffer_mutable_dispatch) cl_int errorCode = CL_SUCCESS; auto command = NDRangeKernel::create( @@ -2300,7 +2448,7 @@ cl_command_buffer_khr CL_API_CALL clRemapCommandBufferKHR_EMU( return nullptr; } -#endif +#endif // defined(cl_khr_command_buffer_multi_device) #if defined(cl_khr_command_buffer_mutable_dispatch) @@ -2402,7 +2550,7 @@ bool clGetDeviceInfo_override( #if defined(cl_khr_command_buffer_mutable_dispatch) newExtensions += ' '; newExtensions += CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME; -#endif +#endif // defined(cl_khr_command_buffer_mutable_dispatch) std::string oldExtensions(deviceExtensions.data()); @@ -2502,7 +2650,7 @@ bool clGetDeviceInfo_override( extension.version = version_cl_khr_command_buffer_mutable_dispatch; } -#endif +#endif // defined(cl_khr_command_buffer_mutable_dispatch) auto ptr = (cl_name_version*)param_value; cl_int errorCode = writeVectorToMemory( @@ -2602,7 +2750,7 @@ bool clGetDeviceInfo_override( return true; } break; -#endif +#endif // defined(cl_khr_command_buffer_mutable_dispatch) default: break; } @@ -2734,7 +2882,7 @@ bool clGetPlatformInfo_override( #if defined(cl_khr_command_buffer_mutable_dispatch) newExtensions += ' '; newExtensions += CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME; -#endif +#endif // defined(cl_khr_command_buffer_mutable_dispatch) std::string oldExtensions(platformExtensions.data()); @@ -2834,7 +2982,7 @@ bool clGetPlatformInfo_override( extension.version = version_cl_khr_command_buffer_mutable_dispatch; } -#endif +#endif // defined(cl_khr_command_buffer_mutable_dispatch) auto ptr = (cl_name_version*)param_value; cl_int errorCode = writeVectorToMemory( diff --git a/samples/13_mutablecommandbuffers/main.cpp b/samples/13_mutablecommandbuffers/main.cpp index a307298..6139754 100644 --- a/samples/13_mutablecommandbuffers/main.cpp +++ b/samples/13_mutablecommandbuffers/main.cpp @@ -173,6 +173,10 @@ int main( NULL ); printf("\tMutable Dispatch Capabilities:\n"); PrintMutableDispatchCapabilities(mutableCaps); + if (!(mutableCaps & CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR)) { + printf("Device does not support modifying the global work size, exiting.\n"); + return -1; + } cl::Context context{devices[deviceIndex]}; cl::CommandQueue commandQueue{context, devices[deviceIndex]}; diff --git a/samples/15_mutablecommandbufferasserts/CMakeLists.txt b/samples/15_mutablecommandbufferasserts/CMakeLists.txt new file mode 100644 index 0000000..f8e3978 --- /dev/null +++ b/samples/15_mutablecommandbufferasserts/CMakeLists.txt @@ -0,0 +1,11 @@ +# Copyright (c) 2022-2024 Ben Ashbaugh +# +# SPDX-License-Identifier: MIT + +add_opencl_sample( + TEST + NUMBER 12 + TARGET mutablecommandbufferasserts + VERSION 120 + SOURCES main.cpp + LIBS OpenCLExt) diff --git a/samples/15_mutablecommandbufferasserts/README.md b/samples/15_mutablecommandbufferasserts/README.md new file mode 100644 index 0000000..12ea5a4 --- /dev/null +++ b/samples/15_mutablecommandbufferasserts/README.md @@ -0,0 +1,30 @@ +# mutablecommandbufferasserts + +## Sample Purpose + +This is an intermediate-level sample that demonstrates how to pass assertions guaranteeing certain behavior when modifying command buffers using the OpenCL extension [cl_khr_command_buffer_mutable_dispatch](https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_command_buffer_mutable_dispatch). +As of this writing, `cl_khr_command_buffer_mutable_dispatch` is a provisional extension. +This sample uses the functionality described in v0.9.1 of the extension. + +This is an optional extension and some devices may not support `cl_khr_command_buffer_mutable_dispatch`, but the sample may still run using the [cl_khr_command_buffer emulation layer](../../layers/10_cmdbufemu). + +This sample requires the OpenCL Extension Loader to get the extension APIs for command buffers. + +## Key APIs and Concepts + +This sample demonstrates how to pass mutable dispatch assertions when command buffer is created or when an ND-range kernel command is recorded into a command buffer. + +``` +CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR +CL_MUTABLE_DISPATCH_ASSERTS_KHR +CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR +``` + +## Command Line Options + +| Option | Default Value | Description | +|:--|:-:|:--| +| `-d ` | 0 | Specify the index of the OpenCL device in the platform to execute on the sample on. +| `-p ` | 0 | Specify the index of the OpenCL platform to execute the sample on. +| `--noCmdBufAssert` | N/A | Do not pass an assertion when the command buffer is created (`CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR`) +| `--noCmdAssert` | N/A | Do not pass an assertion when the command is recorded into the command buffer (`CL_MUTABLE_DISPATCH_ASSERTS_KHR`) diff --git a/samples/15_mutablecommandbufferasserts/main.cpp b/samples/15_mutablecommandbufferasserts/main.cpp new file mode 100644 index 0000000..7c3ad21 --- /dev/null +++ b/samples/15_mutablecommandbufferasserts/main.cpp @@ -0,0 +1,300 @@ +/* +// Copyright (c) 2022-2024 Ben Ashbaugh +// +// SPDX-License-Identifier: MIT +*/ + +#include + +#include + +#include + +#include "util.hpp" + +#if defined(cl_khr_command_buffer_mutable_dispatch) + +#if !defined(CL_MUTABLE_DISPATCH_ASSERTS_KHR) +typedef cl_bitfield cl_mutable_dispatch_asserts_khr; +#define CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR 0x12B7 +#define CL_MUTABLE_DISPATCH_ASSERTS_KHR 0x12B8 +#define CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR (1 << 0) +#endif // !defined(CL_MUTABLE_DISPATCH_ASSERTS_KHR) + +const size_t gwx = 1024; +const size_t lwx = 16; + +static const char kernelString[] = R"CLC( +kernel void CopyBuffer( global uint* dst, global uint* src ) +{ + uint id = get_global_id(0); + dst[id] = src[id]; +} +)CLC"; + +int main( + int argc, + char** argv ) +{ + int platformIndex = 0; + int deviceIndex = 0; + bool noCmdBufAssert = false; + bool noCmdAssert = false; + + { + popl::OptionParser op("Supported Options"); + op.add>("p", "platform", "Platform Index", platformIndex, &platformIndex); + op.add>("d", "device", "Device Index", deviceIndex, &deviceIndex); + op.add("", "noCmdBufAssert", "Skip Command Buffer Assert", &noCmdBufAssert); + op.add("", "noCmdAssert", "Skip Command Assert", &noCmdAssert); + + bool printUsage = false; + try { + op.parse(argc, argv); + } catch (std::exception& e) { + fprintf(stderr, "Error: %s\n\n", e.what()); + printUsage = true; + } + + if (printUsage || !op.unknown_options().empty() || !op.non_option_args().empty()) { + fprintf(stderr, + "Usage: mutablecommandbufferasserts [options]\n" + "%s", op.help().c_str()); + return -1; + } + } + + std::vector platforms; + cl::Platform::get(&platforms); + + printf("Running on platform: %s\n", + platforms[platformIndex].getInfo().c_str() ); + + std::vector devices; + platforms[platformIndex].getDevices(CL_DEVICE_TYPE_ALL, &devices); + + printf("Running on device: %s\n", + devices[deviceIndex].getInfo().c_str() ); + + // device queries: + + bool has_cl_khr_command_buffer = + checkDeviceForExtension(devices[deviceIndex], CL_KHR_COMMAND_BUFFER_EXTENSION_NAME); + if (has_cl_khr_command_buffer) { + printf("Device supports " CL_KHR_COMMAND_BUFFER_EXTENSION_NAME ".\n"); + } else { + printf("Device does not support " CL_KHR_COMMAND_BUFFER_EXTENSION_NAME ", exiting.\n"); + return -1; + } + bool has_cl_khr_command_buffer_mutable_dispatch = + checkDeviceForExtension(devices[deviceIndex], CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME); + if (has_cl_khr_command_buffer_mutable_dispatch) { + printf("Device supports " CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME ".\n"); + } else { + printf("Device does not support " CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME ", exiting.\n"); + return -1; + } + + cl_mutable_dispatch_fields_khr mutableCaps = 0; + clGetDeviceInfo( + devices[deviceIndex](), + CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, + sizeof(mutableCaps), + &mutableCaps, + NULL ); + if (!(mutableCaps & CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR)) { + printf("Device does not support modifying the global work size, exiting.\n"); + return -1; + } + + printf("Adding Command Buffer Assert? %s\n", noCmdBufAssert ? "No" : "Yes"); + printf("Adding Command Assert? %s\n", noCmdAssert ? "No" : "Yes"); + + cl::Context context{devices[deviceIndex]}; + cl::CommandQueue commandQueue{context, devices[deviceIndex]}; + + cl::Program program{ context, kernelString }; + program.build(); + cl::Kernel kernel = cl::Kernel{ program, "CopyBuffer" }; + + cl::Buffer deviceMemSrc = cl::Buffer{ + context, + CL_MEM_ALLOC_HOST_PTR, + gwx * sizeof( cl_uint ) }; + + cl::Buffer deviceMemDst = cl::Buffer{ + context, + CL_MEM_ALLOC_HOST_PTR, + gwx * sizeof( cl_uint ) }; + + // initialization + { + cl_uint* pSrc = (cl_uint*)commandQueue.enqueueMapBuffer( + deviceMemSrc, + CL_TRUE, + CL_MAP_WRITE_INVALIDATE_REGION, + 0, + gwx * sizeof(cl_uint) ); + + for( size_t i = 0; i < gwx; i++ ) + { + pSrc[i] = (cl_uint)(i); + } + + commandQueue.enqueueUnmapMemObject( + deviceMemSrc, + pSrc ); + } + + const cl_command_buffer_properties_khr cbprops[] = { + CL_COMMAND_BUFFER_FLAGS_KHR, + CL_COMMAND_BUFFER_MUTABLE_KHR, + CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR, + noCmdBufAssert + ? 0 + : (cl_command_buffer_properties_khr) + CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR, + 0, + }; + cl_command_buffer_khr cmdbuf = clCreateCommandBufferKHR( + 1, + &commandQueue(), + cbprops, + NULL); + + kernel.setArg(0, deviceMemDst); + kernel.setArg(1, deviceMemSrc); + const cl_ndrange_kernel_command_properties_khr cmdprops[] = { + CL_MUTABLE_DISPATCH_ASSERTS_KHR, + noCmdAssert + ? 0 + : (cl_ndrange_kernel_command_properties_khr) + CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR, + 0, + }; + const size_t gwx_x2 = gwx * 2; + cl_sync_point_khr sync_point; + cl_mutable_command_khr command; + clCommandNDRangeKernelKHR( + cmdbuf, // command_buffer + NULL, // command_queue - note NULL! + cmdprops, // properties + kernel(), // kernel + 1, // work_dim + NULL, // global_work_offset + &gwx_x2, // global_work_size + &lwx, // local_work_size + 0, // num_sync_points_in_wait_list + NULL, // sync_point_wait_list + &sync_point,// sync_point + &command); // mutable_handle + + clFinalizeCommandBufferKHR(cmdbuf); + + // mutate the command buffer, adding work-groups. + // This should generate an error with mutable dispatch asserts. + { + const size_t gwx_x4 = gwx * 4; + cl_mutable_dispatch_config_khr dispatchConfig = {}; + dispatchConfig.type = CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR; + dispatchConfig.command = command; + dispatchConfig.global_work_size = &gwx_x4; + + cl_mutable_base_config_khr baseConfig = {}; + baseConfig.type = CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR; + baseConfig.num_mutable_dispatch = 1; + baseConfig.mutable_dispatch_list = &dispatchConfig; + + cl_int check = clUpdateMutableCommandsKHR( + cmdbuf, + &baseConfig ); + printf("clUpdateMutableCommandsKHR() to increase work-groups returned %s.\n", + check == CL_SUCCESS ? "SUCCESS" : "an ERROR"); + } + + // mutate the command buffer, reducing work-groups. + // This should not generate an error even with mutable dispatch asserts. + { + cl_mutable_dispatch_config_khr dispatchConfig = {}; + dispatchConfig.type = CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR; + dispatchConfig.command = command; + dispatchConfig.global_work_size = &gwx; + + cl_mutable_base_config_khr baseConfig = {}; + baseConfig.type = CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR; + baseConfig.num_mutable_dispatch = 1; + baseConfig.mutable_dispatch_list = &dispatchConfig; + + cl_int check = clUpdateMutableCommandsKHR( + cmdbuf, + &baseConfig ); + printf("clUpdateMutableCommandsKHR() to reduce work-groups returned %s.\n", + check == CL_SUCCESS ? "SUCCESS" : "an ERROR"); + } + + clEnqueueCommandBufferKHR( + 0, + NULL, + cmdbuf, + 0, + NULL, + NULL); + + // verification + { + const cl_uint* pDst = (const cl_uint*)commandQueue.enqueueMapBuffer( + deviceMemDst, + CL_TRUE, + CL_MAP_READ, + 0, + gwx * sizeof(cl_uint) ); + + unsigned int mismatches = 0; + + for( size_t i = 0; i < gwx; i++ ) + { + if( pDst[i] != i ) + { + if( mismatches < 16 ) + { + fprintf(stderr, "MisMatch! dst[%d] == %08X, want %08X\n", + (unsigned int)i, + pDst[i], + (unsigned int)i ); + } + mismatches++; + } + } + + if( mismatches ) + { + fprintf(stderr, "Error: Found %d mismatches / %d values!!!\n", + mismatches, + (unsigned int)gwx ); + } + else + { + printf("Success.\n"); + } + + commandQueue.enqueueUnmapMemObject( + deviceMemDst, + (void*)pDst ); + } + + clReleaseCommandBufferKHR(cmdbuf); + + return 0; +} + +#else + +#pragma message("mutablecommandbuffers: cl_khr_command_buffer_mutable_dispatch not found. Please update your OpenCL headers.") + +int main() +{ + printf("mutablecommandbuffers: cl_khr_command_buffer_mutable_dispatch not found. Please update your OpenCL headers.\n"); + return 0; +}; + +#endif diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index b327b34..c66bf41 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -87,4 +87,5 @@ if(BUILD_EXTENSION_SAMPLES) add_subdirectory( 12_commandbufferspp ) add_subdirectory( 13_mutablecommandbuffers ) add_subdirectory( 14_ooqcommandbuffers ) + add_subdirectory( 15_mutablecommandbufferasserts ) endif()