From b0057ee85f2b62825741e469011bf0bdeb034b9a Mon Sep 17 00:00:00 2001
From: Ben Ashbaugh <ben.ashbaugh@intel.com>
Date: Tue, 12 Mar 2024 15:14:54 -0700
Subject: [PATCH] support for mutable dispatch asserts (#106)

* initial support for mutable dispatch assertions (untested)

* update to latest version of the spec

* check command buffer mutable dispatch asserts also

* add a sample demonstrating use of mutable dispatch asserts

* update sample documentation

* minor document formatting fix
---
 layers/10_cmdbufemu/emulate.cpp               | 360 ++++++++++++------
 samples/13_mutablecommandbuffers/main.cpp     |   4 +
 .../CMakeLists.txt                            |  11 +
 .../15_mutablecommandbufferasserts/README.md  |  30 ++
 .../15_mutablecommandbufferasserts/main.cpp   | 300 +++++++++++++++
 samples/CMakeLists.txt                        |   1 +
 6 files changed, 600 insertions(+), 106 deletions(-)
 create mode 100644 samples/15_mutablecommandbufferasserts/CMakeLists.txt
 create mode 100644 samples/15_mutablecommandbufferasserts/README.md
 create mode 100644 samples/15_mutablecommandbufferasserts/main.cpp
diff --git a/layers/10_cmdbufemu/emulate.cpp b/layers/10_cmdbufemu/emulate.cpp
index e465ab7..1da1792 100644
--- a/layers/10_cmdbufemu/emulate.cpp
+++ b/layers/10_cmdbufemu/emulate.cpp
@@ -22,7 +22,7 @@
 static constexpr cl_version version_cl_khr_command_buffer =
     CL_MAKE_VERSION(0, 9, 4);
 static constexpr cl_version version_cl_khr_command_buffer_mutable_dispatch =
-    CL_MAKE_VERSION(0, 9, 0);
+    CL_MAKE_VERSION(0, 9, 1);
 
 SLayerContext& getLayerContext(void)
 {
@@ -41,6 +41,13 @@ const cl_mutable_dispatch_fields_khr g_MutableDispatchCaps =
     CL_MUTABLE_DISPATCH_ARGUMENTS_KHR |
     CL_MUTABLE_DISPATCH_EXEC_INFO_KHR;
 
+#if !defined(CL_MUTABLE_DISPATCH_ASSERTS_KHR)
+typedef cl_bitfield         cl_mutable_dispatch_asserts_khr;
+#define CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR  0x12B7
+#define CL_MUTABLE_DISPATCH_ASSERTS_KHR                 0x12B8
+#define CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR (1 << 0)
+#endif // !defined(CL_MUTABLE_DISPATCH_ASSERTS_KHR)
+
 #endif // defined(cl_khr_command_buffer_mutable_dispatch)
 
 typedef struct _cl_mutable_command_khr
@@ -812,95 +819,7 @@ struct NDRangeKernel : Command
         const size_t* global_work_offset,
         const size_t* global_work_size,
         const size_t* local_work_size,
-        cl_int& errorCode)
-    {
-        errorCode = CL_SUCCESS;
-
-        ptrdiff_t numProperties = 0;
-#if defined(cl_khr_command_buffer_mutable_dispatch)
-        cl_mutable_dispatch_fields_khr mutableFields = g_MutableDispatchCaps;
-#endif
-
-        if( properties )
-        {
-            const cl_ndrange_kernel_command_properties_khr* check = properties;
-            bool found_CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR = false;
-            while( errorCode == CL_SUCCESS && check[0] != 0 )
-            {
-                cl_int  property = (cl_int)check[0];
-                switch( property )
-                {
-#if defined(cl_khr_command_buffer_mutable_dispatch)
-                case CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR:
-                    if( found_CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR )
-                    {
-                        errorCode = CL_INVALID_VALUE;
-                        return nullptr;
-                    }
-                    else
-                    {
-                        found_CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR = true;
-                        mutableFields = ((const cl_mutable_dispatch_fields_khr*)(check + 1))[0];
-                        check += 2;
-                    }
-                    break;
-#endif
-                default:
-                    errorCode = CL_INVALID_VALUE;
-                    return nullptr;
-                }
-            }
-            numProperties = check - properties + 1;
-        }
-
-        auto command = std::unique_ptr<NDRangeKernel>(
-            new NDRangeKernel(cmdbuf, queue));
-
-        command->original_kernel = kernel;
-        command->kernel = g_pNextDispatch->clCloneKernel(kernel, NULL);
-        command->work_dim = work_dim;
-
-#if defined(cl_khr_command_buffer_mutable_dispatch)
-        command->mutableFields = mutableFields;
-#endif
-
-        command->properties.reserve(numProperties);
-        command->properties.insert(
-            command->properties.begin(),
-            properties,
-            properties + numProperties );
-
-        if( global_work_offset )
-        {
-            command->global_work_offset.reserve(work_dim);
-            command->global_work_offset.insert(
-                command->global_work_offset.begin(),
-                global_work_offset,
-                global_work_offset + work_dim);
-        }
-
-        if( global_work_size )
-        {
-            command->global_work_size.reserve(work_dim);
-            command->global_work_size.insert(
-                command->global_work_size.begin(),
-                global_work_size,
-                global_work_size + work_dim);
-        }
-
-        if( local_work_size )
-        {
-            command->local_work_size.reserve(work_dim);
-            command->local_work_size.insert(
-                command->local_work_size.begin(),
-                local_work_size,
-                local_work_size + work_dim);
-        }
-
-        g_pNextDispatch->clRetainKernel(command->original_kernel);
-
-        return command;
-    }
+        cl_int& errorCode);
 
     ~NDRangeKernel()
     {
@@ -1015,9 +934,10 @@ struct NDRangeKernel : Command
         return CL_INVALID_VALUE;
     }
 
-    cl_int  mutate( const cl_mutable_dispatch_config_khr* dispatchConfig )
+    cl_int  mutate(
+                const cl_mutable_dispatch_asserts_khr mutableAssertsCmdBuf,
+                const cl_mutable_dispatch_config_khr* dispatchConfig )
     {
-        //CL_INVALID_OPERATION if values of local_work_size and/or global_work_size result in an increase to the number of work-groups in the ND-range.
         //CL_INVALID_OPERATION if the values of local_work_size and/or global_work_size result in a change to work-group uniformity.
         if( dispatchConfig->work_dim != 0 && dispatchConfig->work_dim != work_dim )
         {
@@ -1100,6 +1020,36 @@ struct NDRangeKernel : Command
             }
         }
 
+        if( mutableAssertsCmdBuf & CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR ||
+            mutableAsserts & CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR )
+        {
+            const size_t* check_global_work_size =
+                dispatchConfig->global_work_size ?
+                dispatchConfig->global_work_size :
+                global_work_size.size() > 0 ?
+                global_work_size.data() :
+                nullptr;
+            const size_t* check_local_work_size =
+                dispatchConfig->local_work_size ?
+                dispatchConfig->local_work_size :
+                local_work_size.size() > 0 ?
+                local_work_size.data() :
+                nullptr;
+            if( check_local_work_size == nullptr )
+            {
+                return CL_INVALID_WORK_GROUP_SIZE;
+            }
+
+            size_t newNumWorkGroups = getNumWorkGroups(
+                work_dim,
+                check_global_work_size,
+                check_local_work_size );
+            if( newNumWorkGroups > numWorkGroups )
+            {
+                return CL_INVALID_WORK_GROUP_SIZE;
+            }
+        }
+
         if( dispatchConfig->global_work_offset )
         {
             global_work_offset.assign(
@@ -1148,7 +1098,9 @@ struct NDRangeKernel : Command
     cl_uint work_dim = 0;
 #if defined(cl_khr_command_buffer_mutable_dispatch)
     cl_mutable_dispatch_fields_khr mutableFields = 0;
-#endif
+    cl_mutable_dispatch_asserts_khr mutableAsserts = 0;
+    size_t  numWorkGroups = 0;
+#endif // defined(cl_khr_command_buffer_mutable_dispatch)
     std::vector<cl_command_buffer_properties_khr> properties;
     std::vector<size_t> global_work_offset;
     std::vector<size_t> global_work_size;
@@ -1159,6 +1111,29 @@ struct NDRangeKernel : Command
         cl_command_buffer_khr cmdbuf,
         cl_command_queue queue)
         : Command(cmdbuf, queue, CL_COMMAND_NDRANGE_KERNEL) {};
+
+    static size_t getNumWorkGroups(
+        cl_uint work_dim,
+        const size_t* global_work_size,
+        const size_t* local_work_size )
+    {
+        if( work_dim == 0 || 
+            global_work_size == nullptr ||
+            local_work_size == nullptr )
+        {
+            return 1;
+        }
+
+        size_t  count = 1;
+        for( cl_uint i = 0; i < work_dim; i++ )
+        {
+            size_t  gws = global_work_size[i];
+            size_t  lws = local_work_size[i];
+            count *= (gws + lws - 1) / lws;
+        }
+
+        return count;
+    }
 };
 
 typedef struct _cl_command_buffer_khr
@@ -1174,6 +1149,9 @@ typedef struct _cl_command_buffer_khr
 
         ptrdiff_t numProperties = 0;
         cl_command_buffer_flags_khr flags = 0;
+#if defined(cl_khr_command_buffer_mutable_dispatch)
+        cl_mutable_dispatch_asserts_khr mutableDispatchAsserts = 0;
+#endif // defined(cl_khr_command_buffer_mutable_dispatch)
 
         if( num_queues != 1 || queues == NULL )
         {
@@ -1183,6 +1161,7 @@ typedef struct _cl_command_buffer_khr
         {
             const cl_command_buffer_properties_khr* check = properties;
             bool found_CL_COMMAND_BUFFER_FLAGS_KHR = false;
+            bool found_CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR = false;
             while( errorCode == CL_SUCCESS && check[0] != 0 )
             {
                 cl_int  property = (cl_int)check[0];
@@ -1201,6 +1180,20 @@ typedef struct _cl_command_buffer_khr
                         check += 2;
                     }
                     break;
+#if defined(cl_khr_command_buffer_mutable_dispatch)
+                case CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR:
+                    if( found_CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR )
+                    {
+                        errorCode = CL_INVALID_VALUE;
+                    }
+                    else
+                    {
+                        found_CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR = true;
+                        mutableDispatchAsserts = ((const cl_mutable_dispatch_asserts_khr*)(check + 1))[0];
+                        check += 2;
+                    }
+                    break;
+#endif // defined(cl_khr_command_buffer_mutable_dispatch)
                 default:
                     errorCode = CL_INVALID_VALUE;
                     break;
@@ -1213,7 +1206,11 @@ typedef struct _cl_command_buffer_khr
             errcode_ret[0] = errorCode;
         }
         if( errorCode == CL_SUCCESS) {
-            cmdbuf = new _cl_command_buffer_khr(flags);
+            cmdbuf = new _cl_command_buffer_khr(flags
+#if defined(cl_khr_command_buffer_mutable_dispatch)
+                , mutableDispatchAsserts
+#endif // defined(cl_khr_command_buffer_mutable_dispatch)
+            );
             cmdbuf->Queues.reserve(num_queues);
             cmdbuf->Queues.insert(
                 cmdbuf->Queues.begin(),
@@ -1281,6 +1278,13 @@ typedef struct _cl_command_buffer_khr
         return Queues[0];
     }
 
+#if defined(cl_khr_command_buffer_mutable_dispatch)
+    cl_mutable_dispatch_asserts_khr getMutableDispatchAsserts() const
+    {
+        return MutableDispatchAsserts;
+    }
+#endif
+
     cl_int  getInfo(
                 cl_command_buffer_info_khr param_name,
                 size_t param_value_size,
@@ -1362,7 +1366,7 @@ typedef struct _cl_command_buffer_khr
                     ptr );
             }
             break;
-#endif
+#endif // defined(CL_COMMAND_BUFFER_CONTEXT_KHR)
         default:
             break;
         }
@@ -1550,6 +1554,7 @@ typedef struct _cl_command_buffer_khr
                 }
                 
                 if( cl_int errorCode = ((NDRangeKernel*)dispatchConfig->command)->mutate(
+                        MutableDispatchAsserts,
                         dispatchConfig ) )
                 {
                     return errorCode;
@@ -1573,24 +1578,35 @@ typedef struct _cl_command_buffer_khr
     std::vector<cl_command_buffer_properties_khr>   Properties;
     cl_command_buffer_state_khr State;
     cl_command_buffer_flags_khr Flags;
+#if defined(cl_khr_command_buffer_mutable_dispatch)
+    cl_mutable_dispatch_asserts_khr MutableDispatchAsserts;
+#endif // defined(cl_khr_command_buffer_mutable_dispatch)
     std::atomic<uint32_t> RefCount;
 
     std::vector<std::unique_ptr<Command>> Commands;
     std::atomic<uint32_t> NextSyncPoint;
 
-    _cl_command_buffer_khr(cl_command_buffer_flags_khr flags) :
+    _cl_command_buffer_khr(
+            cl_command_buffer_flags_khr flags
+#if defined(cl_khr_command_buffer_mutable_dispatch)
+            , cl_mutable_dispatch_asserts_khr mutableDispatchAsserts
+#endif // defined(cl_khr_command_buffer_mutable_dispatch)
+            ) :
         Magic(cMagic),
         State(CL_COMMAND_BUFFER_STATE_RECORDING_KHR),
         Flags(flags),
+#if defined(cl_khr_command_buffer_mutable_dispatch)
+        MutableDispatchAsserts(mutableDispatchAsserts),
+#endif // defined(cl_khr_command_buffer_mutable_dispatch)
         RefCount(1),
         NextSyncPoint(1) {}
 } CommandBuffer;
 
 ///////////////////////////////////////////////////////////////////////////////
 //
-// We need to define the mutable command constructor separately and after the
-// definition of a command buffer because we will call into the command buffer
-// to get the queue if the passed-in queue is NULL.
+// These functions need to be defined separately and after the definition of a
+// command buffer because they call into the command buffer.
+
 _cl_mutable_command_khr::_cl_mutable_command_khr(
     cl_command_buffer_khr cmdbuf,
     cl_command_queue queue,
@@ -1600,6 +1616,138 @@ _cl_mutable_command_khr::_cl_mutable_command_khr(
     CmdBuf(cmdbuf),
     Queue(queue ? queue : cmdbuf->getQueue()) {}
 
+std::unique_ptr<NDRangeKernel> NDRangeKernel::create(
+    const cl_ndrange_kernel_command_properties_khr* properties,
+    cl_command_buffer_khr cmdbuf,
+    cl_command_queue queue,
+    cl_kernel kernel,
+    cl_uint work_dim,
+    const size_t* global_work_offset,
+    const size_t* global_work_size,
+    const size_t* local_work_size,
+    cl_int& errorCode)
+{
+    errorCode = CL_SUCCESS;
+
+    ptrdiff_t numProperties = 0;
+#if defined(cl_khr_command_buffer_mutable_dispatch)
+    cl_mutable_dispatch_fields_khr mutableFields = g_MutableDispatchCaps;
+    cl_mutable_dispatch_asserts_khr mutableAsserts = 0;
+#endif // defined(cl_khr_command_buffer_mutable_dispatch)
+
+    if( properties )
+    {
+        const cl_ndrange_kernel_command_properties_khr* check = properties;
+        bool found_CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR = false;
+        bool found_CL_MUTABLE_DISPATCH_ASSERTS_KHR = false;
+        while( errorCode == CL_SUCCESS && check[0] != 0 )
+        {
+            cl_int  property = (cl_int)check[0];
+            switch( property )
+            {
+#if defined(cl_khr_command_buffer_mutable_dispatch)
+            case CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR:
+                if( found_CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR )
+                {
+                    errorCode = CL_INVALID_VALUE;
+                    return nullptr;
+                }
+                else
+                {
+                    found_CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR = true;
+                    mutableFields = ((const cl_mutable_dispatch_fields_khr*)(check + 1))[0];
+                    check += 2;
+                }
+                break;
+            case CL_MUTABLE_DISPATCH_ASSERTS_KHR:
+                if( found_CL_MUTABLE_DISPATCH_ASSERTS_KHR )
+                {
+                    errorCode = CL_INVALID_VALUE;
+                    return nullptr;
+                }
+                else
+                {
+                    found_CL_MUTABLE_DISPATCH_ASSERTS_KHR = true;
+                    mutableAsserts = ((const cl_mutable_dispatch_asserts_khr*)(check + 1))[0];
+                    check += 2;
+                }
+                break;
+#endif // defined(cl_khr_command_buffer_mutable_dispatch)
+            default:
+                errorCode = CL_INVALID_VALUE;
+                return nullptr;
+            }
+        }
+        numProperties = check - properties + 1;
+    }
+
+#if defined(cl_khr_command_buffer_mutable_dispatch)
+    if( local_work_size == nullptr )
+    {
+        const auto mutableAssertsCmdBuf = cmdbuf->getMutableDispatchAsserts();
+        if( mutableAssertsCmdBuf & CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR ||
+            mutableAsserts & CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR )
+        {
+            errorCode = CL_INVALID_WORK_GROUP_SIZE;
+            return nullptr;
+        }
+    }
+#endif // defined(cl_khr_command_buffer_mutable_dispatch)
+
+    auto command = std::unique_ptr<NDRangeKernel>(
+        new NDRangeKernel(cmdbuf, queue));
+
+    command->original_kernel = kernel;
+    command->kernel = g_pNextDispatch->clCloneKernel(kernel, NULL);
+    command->work_dim = work_dim;
+
+#if defined(cl_khr_command_buffer_mutable_dispatch)
+    command->mutableFields = mutableFields;
+    command->mutableAsserts = mutableAsserts;
+    command->numWorkGroups = getNumWorkGroups(
+        work_dim,
+        global_work_size,
+        local_work_size );
+#endif // defined(cl_khr_command_buffer_mutable_dispatch)
+
+    command->properties.reserve(numProperties);
+    command->properties.insert(
+        command->properties.begin(),
+        properties,
+        properties + numProperties );
+
+    if( global_work_offset )
+    {
+        command->global_work_offset.reserve(work_dim);
+        command->global_work_offset.insert(
+            command->global_work_offset.begin(),
+            global_work_offset,
+            global_work_offset + work_dim);
+    }
+
+    if( global_work_size )
+    {
+        command->global_work_size.reserve(work_dim);
+        command->global_work_size.insert(
+            command->global_work_size.begin(),
+            global_work_size,
+            global_work_size + work_dim);
+    }
+
+    if( local_work_size )
+    {
+        command->local_work_size.reserve(work_dim);
+        command->local_work_size.insert(
+            command->local_work_size.begin(),
+            local_work_size,
+            local_work_size + work_dim);
+    }
+
+    g_pNextDispatch->clRetainKernel(command->original_kernel);
+
+    return command;
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 //
 // cl_khr_command_buffer
@@ -2230,7 +2378,7 @@ cl_int CL_API_CALL clCommandNDRangeKernelKHR_EMU(
     {
         return CL_INVALID_VALUE;
     }
-#endif // cl_khr_command_buffer_mutable_dispatch
+#endif // !defined(cl_khr_command_buffer_mutable_dispatch)
 
     cl_int errorCode = CL_SUCCESS;
     auto command = NDRangeKernel::create(
@@ -2300,7 +2448,7 @@ cl_command_buffer_khr CL_API_CALL clRemapCommandBufferKHR_EMU(
     return nullptr;
 }
 
-#endif
+#endif // defined(cl_khr_command_buffer_multi_device)
 
 #if defined(cl_khr_command_buffer_mutable_dispatch)
 
@@ -2402,7 +2550,7 @@ bool clGetDeviceInfo_override(
 #if defined(cl_khr_command_buffer_mutable_dispatch)
                 newExtensions += ' ';
                 newExtensions += CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME;
-#endif
+#endif // defined(cl_khr_command_buffer_mutable_dispatch)
 
                 std::string oldExtensions(deviceExtensions.data());
 
@@ -2502,7 +2650,7 @@ bool clGetDeviceInfo_override(
 
                     extension.version = version_cl_khr_command_buffer_mutable_dispatch;
                 }
-#endif
+#endif // defined(cl_khr_command_buffer_mutable_dispatch)
 
                 auto ptr = (cl_name_version*)param_value;
                 cl_int errorCode = writeVectorToMemory(
@@ -2602,7 +2750,7 @@ bool clGetDeviceInfo_override(
             return true;
         }
         break;
-#endif
+#endif // defined(cl_khr_command_buffer_mutable_dispatch)
     default: break;
     }
 
@@ -2734,7 +2882,7 @@ bool clGetPlatformInfo_override(
 #if defined(cl_khr_command_buffer_mutable_dispatch)
                 newExtensions += ' ';
                 newExtensions += CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME;
-#endif
+#endif // defined(cl_khr_command_buffer_mutable_dispatch)
 
                 std::string oldExtensions(platformExtensions.data());
 
@@ -2834,7 +2982,7 @@ bool clGetPlatformInfo_override(
 
                     extension.version = version_cl_khr_command_buffer_mutable_dispatch;
                 }
-#endif
+#endif // defined(cl_khr_command_buffer_mutable_dispatch)
 
                 auto ptr = (cl_name_version*)param_value;
                 cl_int errorCode = writeVectorToMemory(
diff --git a/samples/13_mutablecommandbuffers/main.cpp b/samples/13_mutablecommandbuffers/main.cpp
index a307298..6139754 100644
--- a/samples/13_mutablecommandbuffers/main.cpp
+++ b/samples/13_mutablecommandbuffers/main.cpp
@@ -173,6 +173,10 @@ int main(
         NULL );
     printf("\tMutable Dispatch Capabilities:\n");
     PrintMutableDispatchCapabilities(mutableCaps);
+    if (!(mutableCaps & CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR)) {
+        printf("Device does not support modifying the global work size, exiting.\n");
+        return -1;
+    }
 
     cl::Context context{devices[deviceIndex]};
     cl::CommandQueue commandQueue{context, devices[deviceIndex]};
diff --git a/samples/15_mutablecommandbufferasserts/CMakeLists.txt b/samples/15_mutablecommandbufferasserts/CMakeLists.txt
new file mode 100644
index 0000000..f8e3978
--- /dev/null
+++ b/samples/15_mutablecommandbufferasserts/CMakeLists.txt
@@ -0,0 +1,11 @@
+# Copyright (c) 2022-2024 Ben Ashbaugh
+#
+# SPDX-License-Identifier: MIT
+
+add_opencl_sample(
+    TEST
+    NUMBER 12
+    TARGET mutablecommandbufferasserts
+    VERSION 120
+    SOURCES main.cpp
+    LIBS OpenCLExt)
diff --git a/samples/15_mutablecommandbufferasserts/README.md b/samples/15_mutablecommandbufferasserts/README.md
new file mode 100644
index 0000000..12ea5a4
--- /dev/null
+++ b/samples/15_mutablecommandbufferasserts/README.md
@@ -0,0 +1,30 @@
+# mutablecommandbufferasserts
+
+## Sample Purpose
+
+This is an intermediate-level sample that demonstrates how to pass assertions guaranteeing certain behavior when modifying command buffers using the OpenCL extension [cl_khr_command_buffer_mutable_dispatch](https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_command_buffer_mutable_dispatch).
+As of this writing, `cl_khr_command_buffer_mutable_dispatch` is a provisional extension.
+This sample uses the functionality described in v0.9.1 of the extension.
+
+This is an optional extension and some devices may not support `cl_khr_command_buffer_mutable_dispatch`, but the sample may still run using the [cl_khr_command_buffer emulation layer](../../layers/10_cmdbufemu).
+
+This sample requires the OpenCL Extension Loader to get the extension APIs for command buffers.
+
+## Key APIs and Concepts
+
+This sample demonstrates how to pass mutable dispatch assertions when command buffer is created or when an ND-range kernel command is recorded into a command buffer.
+
+```
+CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR
+CL_MUTABLE_DISPATCH_ASSERTS_KHR
+CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR
+```
+
+## Command Line Options
+
+| Option | Default Value | Description |
+|:--|:-:|:--|
+| `-d <index>` | 0 | Specify the index of the OpenCL device in the platform to execute on the sample on.
+| `-p <index>` | 0 | Specify the index of the OpenCL platform to execute the sample on.
+| `--noCmdBufAssert` | N/A | Do not pass an assertion when the command buffer is created (`CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR`)
+| `--noCmdAssert` | N/A | Do not pass an assertion when the command is recorded into the command buffer (`CL_MUTABLE_DISPATCH_ASSERTS_KHR`)
diff --git a/samples/15_mutablecommandbufferasserts/main.cpp b/samples/15_mutablecommandbufferasserts/main.cpp
new file mode 100644
index 0000000..7c3ad21
--- /dev/null
+++ b/samples/15_mutablecommandbufferasserts/main.cpp
@@ -0,0 +1,300 @@
+/*
+// Copyright (c) 2022-2024 Ben Ashbaugh
+//
+// SPDX-License-Identifier: MIT
+*/
+
+#include <popl/popl.hpp>
+
+#include <CL/opencl.hpp>
+
+#include <cinttypes>
+
+#include "util.hpp"
+
+#if defined(cl_khr_command_buffer_mutable_dispatch)
+
+#if !defined(CL_MUTABLE_DISPATCH_ASSERTS_KHR)
+typedef cl_bitfield         cl_mutable_dispatch_asserts_khr;
+#define CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR  0x12B7
+#define CL_MUTABLE_DISPATCH_ASSERTS_KHR                 0x12B8
+#define CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR (1 << 0)
+#endif // !defined(CL_MUTABLE_DISPATCH_ASSERTS_KHR)
+
+const size_t    gwx = 1024;
+const size_t    lwx = 16;
+
+static const char kernelString[] = R"CLC(
+kernel void CopyBuffer( global uint* dst, global uint* src )
+{
+    uint id = get_global_id(0);
+    dst[id] = src[id];
+}
+)CLC";
+
+int main(
+    int argc,
+    char** argv )
+{
+    int platformIndex = 0;
+    int deviceIndex = 0;
+    bool noCmdBufAssert = false;
+    bool noCmdAssert = false;
+
+    {
+        popl::OptionParser op("Supported Options");
+        op.add<popl::Value<int>>("p", "platform", "Platform Index", platformIndex, &platformIndex);
+        op.add<popl::Value<int>>("d", "device", "Device Index", deviceIndex, &deviceIndex);
+        op.add<popl::Switch>("", "noCmdBufAssert", "Skip Command Buffer Assert", &noCmdBufAssert);
+        op.add<popl::Switch>("", "noCmdAssert", "Skip Command Assert", &noCmdAssert);
+
+        bool printUsage = false;
+        try {
+            op.parse(argc, argv);
+        } catch (std::exception& e) {
+            fprintf(stderr, "Error: %s\n\n", e.what());
+            printUsage = true;
+        }
+
+        if (printUsage || !op.unknown_options().empty() || !op.non_option_args().empty()) {
+            fprintf(stderr,
+                "Usage: mutablecommandbufferasserts [options]\n"
+                "%s", op.help().c_str());
+            return -1;
+        }
+    }
+
+    std::vector<cl::Platform> platforms;
+    cl::Platform::get(&platforms);
+
+    printf("Running on platform: %s\n",
+        platforms[platformIndex].getInfo<CL_PLATFORM_NAME>().c_str() );
+
+    std::vector<cl::Device> devices;
+    platforms[platformIndex].getDevices(CL_DEVICE_TYPE_ALL, &devices);
+
+    printf("Running on device: %s\n",
+        devices[deviceIndex].getInfo<CL_DEVICE_NAME>().c_str() );
+
+    // device queries:
+
+    bool has_cl_khr_command_buffer =
+        checkDeviceForExtension(devices[deviceIndex], CL_KHR_COMMAND_BUFFER_EXTENSION_NAME);
+    if (has_cl_khr_command_buffer) {
+        printf("Device supports " CL_KHR_COMMAND_BUFFER_EXTENSION_NAME ".\n");
+    } else {
+        printf("Device does not support " CL_KHR_COMMAND_BUFFER_EXTENSION_NAME ", exiting.\n");
+        return -1;
+    }
+    bool has_cl_khr_command_buffer_mutable_dispatch =
+        checkDeviceForExtension(devices[deviceIndex], CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME);
+    if (has_cl_khr_command_buffer_mutable_dispatch) {
+        printf("Device supports " CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME ".\n");
+    } else {
+        printf("Device does not support " CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME ", exiting.\n");
+        return -1;
+    }
+
+    cl_mutable_dispatch_fields_khr mutableCaps = 0;
+    clGetDeviceInfo(
+        devices[deviceIndex](),
+        CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR,
+        sizeof(mutableCaps),
+        &mutableCaps,
+        NULL );
+    if (!(mutableCaps & CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR)) {
+        printf("Device does not support modifying the global work size, exiting.\n");
+        return -1;
+    }
+
+    printf("Adding Command Buffer Assert?  %s\n", noCmdBufAssert ? "No" : "Yes");
+    printf("Adding Command Assert?  %s\n", noCmdAssert ? "No" : "Yes");
+
+    cl::Context context{devices[deviceIndex]};
+    cl::CommandQueue commandQueue{context, devices[deviceIndex]};
+
+    cl::Program program{ context, kernelString };
+    program.build();
+    cl::Kernel kernel = cl::Kernel{ program, "CopyBuffer" };
+
+    cl::Buffer deviceMemSrc = cl::Buffer{
+        context,
+        CL_MEM_ALLOC_HOST_PTR,
+        gwx * sizeof( cl_uint ) };
+
+    cl::Buffer deviceMemDst = cl::Buffer{
+        context,
+        CL_MEM_ALLOC_HOST_PTR,
+        gwx * sizeof( cl_uint ) };
+
+    // initialization
+    {
+        cl_uint*    pSrc = (cl_uint*)commandQueue.enqueueMapBuffer(
+            deviceMemSrc,
+            CL_TRUE,
+            CL_MAP_WRITE_INVALIDATE_REGION,
+            0,
+            gwx * sizeof(cl_uint) );
+
+        for( size_t i = 0; i < gwx; i++ )
+        {
+            pSrc[i] = (cl_uint)(i);
+        }
+
+        commandQueue.enqueueUnmapMemObject(
+            deviceMemSrc,
+            pSrc );
+    }
+
+    const cl_command_buffer_properties_khr cbprops[] = {
+        CL_COMMAND_BUFFER_FLAGS_KHR,
+        CL_COMMAND_BUFFER_MUTABLE_KHR,
+        CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR,
+        noCmdBufAssert
+            ? 0
+            : (cl_command_buffer_properties_khr)
+                  CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR,
+        0,
+    };
+    cl_command_buffer_khr cmdbuf = clCreateCommandBufferKHR(
+        1,
+        &commandQueue(),
+        cbprops,
+        NULL);
+
+    kernel.setArg(0, deviceMemDst);
+    kernel.setArg(1, deviceMemSrc);
+    const cl_ndrange_kernel_command_properties_khr cmdprops[] = {
+        CL_MUTABLE_DISPATCH_ASSERTS_KHR,
+        noCmdAssert
+            ? 0
+            : (cl_ndrange_kernel_command_properties_khr)
+                  CL_MUTABLE_DISPATCH_ASSERT_NO_ADDITIONAL_WORK_GROUPS_KHR,
+        0,
+    };
+    const size_t gwx_x2 = gwx * 2;
+    cl_sync_point_khr sync_point;
+    cl_mutable_command_khr command;
+    clCommandNDRangeKernelKHR(
+        cmdbuf,     // command_buffer
+        NULL,       // command_queue - note NULL!
+        cmdprops,   // properties
+        kernel(),   // kernel
+        1,          // work_dim
+        NULL,       // global_work_offset
+        &gwx_x2,    // global_work_size
+        &lwx,       // local_work_size
+        0,          // num_sync_points_in_wait_list
+        NULL,       // sync_point_wait_list
+        &sync_point,// sync_point
+        &command);  // mutable_handle
+
+    clFinalizeCommandBufferKHR(cmdbuf);
+
+    // mutate the command buffer, adding work-groups.
+    // This should generate an error with mutable dispatch asserts.
+    {
+        const size_t gwx_x4 = gwx * 4;
+        cl_mutable_dispatch_config_khr dispatchConfig = {};
+        dispatchConfig.type = CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR;
+        dispatchConfig.command = command;
+        dispatchConfig.global_work_size = &gwx_x4;
+
+        cl_mutable_base_config_khr baseConfig = {};
+        baseConfig.type = CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR;
+        baseConfig.num_mutable_dispatch = 1;
+        baseConfig.mutable_dispatch_list = &dispatchConfig;
+
+        cl_int check = clUpdateMutableCommandsKHR(
+            cmdbuf,
+            &baseConfig );
+        printf("clUpdateMutableCommandsKHR() to increase work-groups returned %s.\n",
+            check == CL_SUCCESS ? "SUCCESS" : "an ERROR");
+    }
+
+    // mutate the command buffer, reducing work-groups.
+    // This should not generate an error even with mutable dispatch asserts.
+    {
+        cl_mutable_dispatch_config_khr dispatchConfig = {};
+        dispatchConfig.type = CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR;
+        dispatchConfig.command = command;
+        dispatchConfig.global_work_size = &gwx;
+
+        cl_mutable_base_config_khr baseConfig = {};
+        baseConfig.type = CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR;
+        baseConfig.num_mutable_dispatch = 1;
+        baseConfig.mutable_dispatch_list = &dispatchConfig;
+
+        cl_int check = clUpdateMutableCommandsKHR(
+            cmdbuf,
+            &baseConfig );
+        printf("clUpdateMutableCommandsKHR() to reduce work-groups returned %s.\n",
+            check == CL_SUCCESS ? "SUCCESS" : "an ERROR");
+    }
+
+    clEnqueueCommandBufferKHR(
+        0,
+        NULL,
+        cmdbuf,
+        0,
+        NULL,
+        NULL);
+
+    // verification
+    {
+        const cl_uint*  pDst = (const cl_uint*)commandQueue.enqueueMapBuffer(
+            deviceMemDst,
+            CL_TRUE,
+            CL_MAP_READ,
+            0,
+            gwx * sizeof(cl_uint) );
+
+        unsigned int    mismatches = 0;
+
+        for( size_t i = 0; i < gwx; i++ )
+        {
+            if( pDst[i] != i )
+            {
+                if( mismatches < 16 )
+                {
+                    fprintf(stderr, "MisMatch!  dst[%d] == %08X, want %08X\n",
+                        (unsigned int)i,
+                        pDst[i],
+                        (unsigned int)i );
+                }
+                mismatches++;
+            }
+        }
+
+        if( mismatches )
+        {
+            fprintf(stderr, "Error: Found %d mismatches / %d values!!!\n",
+                mismatches,
+                (unsigned int)gwx );
+        }
+        else
+        {
+            printf("Success.\n");
+        }
+
+        commandQueue.enqueueUnmapMemObject(
+            deviceMemDst,
+            (void*)pDst );
+    }
+
+    clReleaseCommandBufferKHR(cmdbuf);
+
+    return 0;
+}
+
+#else
+
+#pragma message("mutablecommandbuffers: cl_khr_command_buffer_mutable_dispatch not found.  Please update your OpenCL headers.")
+
+int main()
+{
+    printf("mutablecommandbuffers: cl_khr_command_buffer_mutable_dispatch not found.  Please update your OpenCL headers.\n");
+    return 0;
+};
+
+#endif
diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt
index b327b34..c66bf41 100644
--- a/samples/CMakeLists.txt
+++ b/samples/CMakeLists.txt
@@ -87,4 +87,5 @@ if(BUILD_EXTENSION_SAMPLES)
     add_subdirectory( 12_commandbufferspp )
     add_subdirectory( 13_mutablecommandbuffers )
     add_subdirectory( 14_ooqcommandbuffers )
+    add_subdirectory( 15_mutablecommandbufferasserts )
 endif()