From 8e6c519354a7d03cbd3f8b3438ad4ce212b5f02c Mon Sep 17 00:00:00 2001
From: Ewan Crawford <ewan@codeplay.com>
Date: Tue, 25 Jun 2024 17:40:32 +0100
Subject: [PATCH 01/18] Negative test for CL_INVALID_QUEUE_PROPERTIES (#1935)

Test that verifies `CL_INVALID_QUEUE_PROPERTIES` is returned from
`clCreateCommandQueue`, `clCreateCommandQueueWithProperties`, and
`clCreateCommandQueueWithPropertiesKHR` to match spec wording

> CL_INVALID_QUEUE_PROPERTIES if values specified in properties are
valid but are not supported by the device.
---
 test_conformance/api/CMakeLists.txt     |   1 +
 test_conformance/api/main.cpp           |   5 +
 test_conformance/api/negative_queue.cpp | 174 ++++++++++++++++++++++++
 test_conformance/api/procs.h            |  11 ++
 4 files changed, 191 insertions(+)
 create mode 100644 test_conformance/api/negative_queue.cpp

diff --git a/test_conformance/api/CMakeLists.txt b/test_conformance/api/CMakeLists.txt
index 5b1f491ce3..96d12f435d 100644
--- a/test_conformance/api/CMakeLists.txt
+++ b/test_conformance/api/CMakeLists.txt
@@ -3,6 +3,7 @@ set(MODULE_NAME API)
 set(${MODULE_NAME}_SOURCES
          main.cpp
          negative_platform.cpp
+         negative_queue.cpp
          test_api_consistency.cpp
          test_bool.cpp
          test_retain.cpp
diff --git a/test_conformance/api/main.cpp b/test_conformance/api/main.cpp
index e0900df07c..cdbf1f799a 100644
--- a/test_conformance/api/main.cpp
+++ b/test_conformance/api/main.cpp
@@ -152,6 +152,11 @@ test_definition test_list[] = {
     ADD_TEST(work_group_suggested_local_size_1D),
     ADD_TEST(work_group_suggested_local_size_2D),
     ADD_TEST(work_group_suggested_local_size_3D),
+
+    ADD_TEST(negative_create_command_queue),
+    ADD_TEST_VERSION(negative_create_command_queue_with_properties,
+                     Version(2, 0)),
+    ADD_TEST(negative_create_command_queue_with_properties_khr),
 };
 
 const int test_num = ARRAY_SIZE(test_list);
diff --git a/test_conformance/api/negative_queue.cpp b/test_conformance/api/negative_queue.cpp
new file mode 100644
index 0000000000..f3b4fb2c90
--- /dev/null
+++ b/test_conformance/api/negative_queue.cpp
@@ -0,0 +1,174 @@
+//
+// Copyright (c) 2024 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "testBase.h"
+#include "harness/typeWrappers.h"
+
+int test_negative_create_command_queue(cl_device_id deviceID,
+                                       cl_context context,
+                                       cl_command_queue queue, int num_elements)
+{
+    cl_command_queue_properties device_props = 0;
+    cl_int error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_PROPERTIES,
+                                   sizeof(device_props), &device_props, NULL);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed");
+
+    // CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE is the only optional property to
+    // clCreateCommandQueue, CL_QUEUE_PROFILING_ENABLE is mandatory.
+    const bool out_of_order_device_support =
+        device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
+    if (out_of_order_device_support)
+    {
+        // Early return as we can't check correct error is returned for
+        // unsupported property.
+        return TEST_PASS;
+    }
+
+    // Try create a command queue with out-of-order property and check return
+    // code
+    cl_int test_error = CL_SUCCESS;
+    clCommandQueueWrapper test_queue = clCreateCommandQueue(
+        context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &test_error);
+
+    test_failure_error_ret(
+        test_error, CL_INVALID_QUEUE_PROPERTIES,
+        "clCreateCommandQueue should return CL_INVALID_QUEUE_PROPERTIES if "
+        "values specified in properties are valid but are not supported by "
+        "the "
+        "device.",
+        TEST_FAIL);
+    return TEST_PASS;
+}
+
+int test_negative_create_command_queue_with_properties(cl_device_id deviceID,
+                                                       cl_context context,
+                                                       cl_command_queue queue,
+                                                       int num_elements)
+{
+    cl_command_queue_properties device_props = 0;
+    cl_int error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_PROPERTIES,
+                                   sizeof(device_props), &device_props, NULL);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed");
+
+    cl_command_queue_properties device_on_host_props = 0;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES,
+                            sizeof(device_on_host_props), &device_on_host_props,
+                            NULL);
+    test_error(error,
+               "clGetDeviceInfo for CL_DEVICE_QUEUE_ON_HOST_PROPERTIES failed");
+
+    if (device_on_host_props != device_props)
+    {
+        log_error(
+            "ERROR: CL_DEVICE_QUEUE_PROPERTIES and "
+            "CL_DEVICE_QUEUE_ON_HOST_PROPERTIES properties should match\n");
+        return TEST_FAIL;
+    }
+
+    // CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE is the only optional host-queue
+    // property to clCreateCommandQueueWithProperties,
+    // CL_QUEUE_PROFILING_ENABLE is mandatory.
+    const bool out_of_order_device_support =
+        device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
+    if (out_of_order_device_support)
+    {
+        // Early return as we can't check correct error is returned for
+        // unsupported property.
+        return TEST_PASS;
+    }
+
+    // Try create a command queue with out-of-order property and check return
+    // code
+    cl_command_queue_properties queue_prop_def[] = {
+        CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0
+    };
+
+    cl_int test_error = CL_SUCCESS;
+    clCommandQueueWrapper test_queue = clCreateCommandQueueWithProperties(
+        context, deviceID, queue_prop_def, &test_error);
+
+    test_failure_error_ret(test_error, CL_INVALID_QUEUE_PROPERTIES,
+                           "clCreateCommandQueueWithProperties should "
+                           "return CL_INVALID_QUEUE_PROPERTIES if "
+                           "values specified in properties are valid but "
+                           "are not supported by the "
+                           "device.",
+                           TEST_FAIL);
+
+    return TEST_PASS;
+}
+
+int test_negative_create_command_queue_with_properties_khr(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    if (!is_extension_available(deviceID, "cl_khr_create_command_queue"))
+    {
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    cl_platform_id platform;
+    cl_int error = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM,
+                                   sizeof(cl_platform_id), &platform, NULL);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed");
+
+    clCreateCommandQueueWithPropertiesKHR_fn
+        clCreateCommandQueueWithPropertiesKHR =
+            (clCreateCommandQueueWithPropertiesKHR_fn)
+                clGetExtensionFunctionAddressForPlatform(
+                    platform, "clCreateCommandQueueWithPropertiesKHR");
+    if (clCreateCommandQueueWithPropertiesKHR == NULL)
+    {
+        log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed\n");
+        return -1;
+    }
+
+    cl_command_queue_properties device_props = 0;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_PROPERTIES,
+                            sizeof(device_props), &device_props, NULL);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed");
+
+    // CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE is the only optional host-queue
+    // property to clCreateCommandQueueWithPropertiesKHR,
+    // CL_QUEUE_PROFILING_ENABLE is mandatory.
+    const bool out_of_order_device_support =
+        device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
+    if (out_of_order_device_support)
+    {
+        // Early return as we can't check correct error is returned for
+        // unsupported property.
+        return TEST_PASS;
+    }
+
+    // Try create a command queue with out-of-order property and check return
+    // code
+    cl_queue_properties_khr queue_prop_def[] = {
+        CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0
+    };
+
+    cl_int test_error = CL_SUCCESS;
+    clCommandQueueWrapper test_khr_queue =
+        clCreateCommandQueueWithPropertiesKHR(context, deviceID, queue_prop_def,
+                                              &test_error);
+
+    test_failure_error_ret(test_error, CL_INVALID_QUEUE_PROPERTIES,
+                           "clCreateCommandQueueWithPropertiesKHR should "
+                           "return CL_INVALID_QUEUE_PROPERTIES if "
+                           "values specified in properties are valid but "
+                           "are not supported by the "
+                           "device.",
+                           TEST_FAIL);
+    return TEST_PASS;
+}
diff --git a/test_conformance/api/procs.h b/test_conformance/api/procs.h
index 320ad65a1e..22426be1b8 100644
--- a/test_conformance/api/procs.h
+++ b/test_conformance/api/procs.h
@@ -217,3 +217,14 @@ extern int test_work_group_suggested_local_size_3D(cl_device_id device,
                                                    cl_context context,
                                                    cl_command_queue queue,
                                                    int n_elems);
+
+extern int test_negative_create_command_queue(cl_device_id deviceID,
+                                              cl_context context,
+                                              cl_command_queue queue,
+                                              int num_elements);
+extern int test_negative_create_command_queue_with_properties(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_negative_create_command_queue_with_properties_khr(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    int num_elements);

From e2c7e901b8cdf2ec6ebed685224ad1e56b89a065 Mon Sep 17 00:00:00 2001
From: Pedro Olsen Ferreira <silverclaw@users.noreply.github.com>
Date: Tue, 25 Jun 2024 17:41:23 +0100
Subject: [PATCH 02/18] Add special test for generic address space atomics
 (#1959)

The atomic operations are tested with generic pointers but in a way
where the compiler can infer the original pointer address space. This
commit adds tests specifically for the case where the compiler cannot
make inferences.
Test that the correct address is used with atomics when the address is
group variant and invariant.
---
 .../generic_address_space/CMakeLists.txt      |   1 +
 .../generic_address_space/atomic_tests.cpp    | 224 ++++++++++++++++++
 .../generic_address_space/main.cpp            |  50 ++--
 3 files changed, 254 insertions(+), 21 deletions(-)
 create mode 100644 test_conformance/generic_address_space/atomic_tests.cpp

diff --git a/test_conformance/generic_address_space/CMakeLists.txt b/test_conformance/generic_address_space/CMakeLists.txt
index e74bcf4a57..951c5ab4ed 100644
--- a/test_conformance/generic_address_space/CMakeLists.txt
+++ b/test_conformance/generic_address_space/CMakeLists.txt
@@ -2,6 +2,7 @@ set(MODULE_NAME GENERIC_ADDRESS_SPACE)
 
 set(${MODULE_NAME}_SOURCES
     advanced_tests.cpp
+    atomic_tests.cpp
     basic_tests.cpp
     main.cpp
     stress_tests.cpp
diff --git a/test_conformance/generic_address_space/atomic_tests.cpp b/test_conformance/generic_address_space/atomic_tests.cpp
new file mode 100644
index 0000000000..a24c6ae2fc
--- /dev/null
+++ b/test_conformance/generic_address_space/atomic_tests.cpp
@@ -0,0 +1,224 @@
+//
+// Copyright (c) 2024 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#ifdef __APPLE__
+#include <OpenCL/opencl.h>
+#else
+#include <CL/cl.h>
+#endif
+
+#include "errorHelpers.h"
+#include "typeWrappers.h"
+
+namespace {
+// In this source, each workgroup will generate one value.
+// Every other workgroup will use either a global or local
+// pointer on an atomic operation.
+const char* KernelSourceInvariant = R"OpenCLC(
+kernel void testKernel(global atomic_int* globalPtr, local atomic_int* localPtr) {
+    int gid = get_global_id(0);
+    int tid = get_local_id(0);
+    int wgid = get_group_id(0);
+    int wgsize = get_local_size(0);
+
+    if (tid == 0) atomic_store(localPtr, 0);
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    // Initialise the generic pointer to
+    // the global.
+    generic atomic_int* ptr = globalPtr + wgid;
+
+    // In a workgroup-invariant way, select a localPtr instead.
+    if ((wgid % 2) == 0)
+        ptr = localPtr;
+
+    int inc = atomic_fetch_add(ptr, 1);
+
+    // In the cases where the local memory ptr was used,
+    // save off the final value.
+    if ((wgid % 2) == 0 && inc == (wgsize-1))
+        atomic_store(&globalPtr[wgid], inc);
+}
+)OpenCLC";
+
+// In this source, each workgroup will generate two values.
+// Every other work item in the workgroup will select either
+// a local or global memory pointer and perform an atomic
+// operation on that.
+const char* KernelSourceVariant = R"OpenCLC(
+kernel void testKernel(global atomic_int* globalPtr, local atomic_int* localPtr) {
+    int gid = get_global_id(0);
+    int tid = get_local_id(0);
+    int wgid = get_group_id(0);
+    int wgsize = get_local_size(0);
+
+    if (tid == 0) atomic_store(localPtr, 0);
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    // Initialise the generic pointer to
+    // the global.  Two values are written per WG.
+    generic atomic_int* ptr = globalPtr + (wgid * 2);
+
+    // In a workgroup-invariant way, select a localPtr instead.
+    if ((tid % 2) == 0)
+        ptr = localPtr;
+
+    atomic_fetch_add(ptr, 1);
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    // In the cases where the local memory ptr was used,
+    // save off the final value.
+    if (tid == 0)
+        atomic_store(&globalPtr[(wgid * 2) + 1], atomic_load(localPtr));
+}
+)OpenCLC";
+}
+
+int test_generic_atomics_invariant(cl_device_id deviceID, cl_context context,
+                                   cl_command_queue queue, int)
+{
+    const auto version = get_device_cl_version(deviceID);
+
+    if (version < Version(2, 0)) return TEST_SKIPPED_ITSELF;
+
+    cl_int err = CL_SUCCESS;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+
+    err = create_single_kernel_helper(context, &program, &kernel, 1,
+                                      &KernelSourceInvariant, "testKernel");
+    test_error(err, "Failed to create test kernel");
+
+    size_t wgSize, retSize;
+    // Attempt to find the simd unit size for the device.
+    err = clGetKernelWorkGroupInfo(kernel, deviceID,
+                                   CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
+                                   sizeof(wgSize), &wgSize, &retSize);
+    test_error(err, "clGetKernelWorkGroupInfo failed");
+
+    // How many workgroups to run for the test.
+    const int numWGs = 2;
+    const size_t bufferSize = numWGs * sizeof(cl_uint);
+    clMemWrapper buffer =
+        clCreateBuffer(context, CL_MEM_WRITE_ONLY, bufferSize, nullptr, &err);
+    test_error(err, "clCreateBuffer failed");
+    const cl_int zero = 0;
+    err = clEnqueueFillBuffer(queue, buffer, &zero, sizeof(zero), 0, bufferSize,
+                              0, nullptr, nullptr);
+    test_error(err, "clEnqueueFillBuffer failed");
+
+    err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &buffer);
+    test_error(err, "clSetKernelArg failed");
+    err = clSetKernelArg(kernel, 1, bufferSize, nullptr);
+    test_error(err, "clSetKernelArg failed");
+
+    const size_t globalSize = wgSize * numWGs;
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, &globalSize,
+                                 &wgSize, 0, nullptr, nullptr);
+    test_error(err, "clEnqueueNDRangeKernel failed");
+
+    std::vector<cl_int> results(numWGs);
+    err = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, bufferSize,
+                              results.data(), 0, nullptr, nullptr);
+    test_error(err, "clEnqueueReadBuffer failed");
+
+    clFinish(queue);
+
+    for (size_t i = 0; i < numWGs; ++i)
+    {
+        const cl_int expected = ((i % 2) == 0) ? wgSize - 1 : wgSize;
+        if (results[i] != expected)
+        {
+            log_error("Verification on device failed at index %zu\n", i);
+            return TEST_FAIL;
+        }
+    }
+
+    return CL_SUCCESS;
+}
+
+int test_generic_atomics_variant(cl_device_id deviceID, cl_context context,
+                                 cl_command_queue queue, int)
+{
+    const auto version = get_device_cl_version(deviceID);
+
+    if (version < Version(2, 0)) return TEST_SKIPPED_ITSELF;
+
+    cl_int err = CL_SUCCESS;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+
+    err = create_single_kernel_helper(context, &program, &kernel, 1,
+                                      &KernelSourceVariant, "testKernel");
+    test_error(err, "Failed to create test kernel");
+
+    size_t wgSize, retSize;
+    // Attempt to find the simd unit size for the device.
+    err = clGetKernelWorkGroupInfo(kernel, deviceID,
+                                   CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
+                                   sizeof(wgSize), &wgSize, &retSize);
+    test_error(err, "clGetKernelWorkGroupInfo failed");
+
+    // How many workgroups to run for the test.
+    const int numWGs = 2;
+    const size_t bufferSize = numWGs * sizeof(cl_uint) * 2;
+    clMemWrapper buffer =
+        clCreateBuffer(context, CL_MEM_WRITE_ONLY, bufferSize, nullptr, &err);
+    test_error(err, "clCreateBuffer failed");
+    const cl_int zero = 0;
+    err = clEnqueueFillBuffer(queue, buffer, &zero, sizeof(zero), 0, bufferSize,
+                              0, nullptr, nullptr);
+    test_error(err, "clEnqueueFillBuffer failed");
+
+    err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &buffer);
+    test_error(err, "clSetKernelArg failed");
+    err = clSetKernelArg(kernel, 1, bufferSize, nullptr);
+    test_error(err, "clSetKernelArg failed");
+
+    const size_t globalSize = wgSize * numWGs;
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, &globalSize,
+                                 &wgSize, 0, nullptr, nullptr);
+    test_error(err, "clEnqueueNDRangeKernel failed");
+
+    std::vector<cl_int> results(numWGs * 2);
+    err = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, bufferSize,
+                              results.data(), 0, nullptr, nullptr);
+    test_error(err, "clEnqueueReadBuffer failed");
+
+    clFinish(queue);
+
+    const cl_int expected = wgSize / 2;
+    for (size_t i = 0; i < (numWGs * 2); i += 2)
+    {
+        if (results[i] != expected)
+        {
+            log_error("Verification on device failed at index %zu\n", i);
+            return TEST_FAIL;
+        }
+        if (results[i + 1] != expected)
+        {
+            const size_t index = i + 1;
+            log_error("Verification on device failed at index %zu\n", index);
+            return TEST_FAIL;
+        }
+    }
+
+    return CL_SUCCESS;
+}
diff --git a/test_conformance/generic_address_space/main.cpp b/test_conformance/generic_address_space/main.cpp
index 0114758390..a7897367ba 100644
--- a/test_conformance/generic_address_space/main.cpp
+++ b/test_conformance/generic_address_space/main.cpp
@@ -40,31 +40,39 @@ extern int test_generic_advanced_casting(cl_device_id deviceID, cl_context conte
 extern int test_generic_ptr_to_host_mem(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int test_generic_ptr_to_host_mem_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int test_max_number_of_params(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+// atomic tests
+int test_generic_atomics_invariant(cl_device_id deviceID, cl_context context,
+                                   cl_command_queue queue, int num_elements);
+int test_generic_atomics_variant(cl_device_id deviceID, cl_context context,
+                                 cl_command_queue queue, int num_elements);
 
 test_definition test_list[] = {
     // basic tests
-    ADD_TEST( function_get_fence ),
-    ADD_TEST( function_to_address_space ),
-    ADD_TEST( variable_get_fence ),
-    ADD_TEST( variable_to_address_space ),
-    ADD_TEST( casting ),
-    ADD_TEST( conditional_casting ),
-    ADD_TEST( chain_casting ),
-    ADD_TEST( ternary_operator_casting ),
-    ADD_TEST( language_struct ),
-    ADD_TEST( language_union ),
-    ADD_TEST( multiple_calls_same_function ),
-    ADD_TEST( compare_pointers ),
+    ADD_TEST(function_get_fence),
+    ADD_TEST(function_to_address_space),
+    ADD_TEST(variable_get_fence),
+    ADD_TEST(variable_to_address_space),
+    ADD_TEST(casting),
+    ADD_TEST(conditional_casting),
+    ADD_TEST(chain_casting),
+    ADD_TEST(ternary_operator_casting),
+    ADD_TEST(language_struct),
+    ADD_TEST(language_union),
+    ADD_TEST(multiple_calls_same_function),
+    ADD_TEST(compare_pointers),
     // advanced tests
-    ADD_TEST( library_function ),
-    ADD_TEST( generic_variable_volatile ),
-    ADD_TEST( generic_variable_const ),
-    ADD_TEST( generic_variable_gentype ),
-    ADD_TEST( builtin_functions ),
-    ADD_TEST( generic_advanced_casting ),
-    ADD_TEST( generic_ptr_to_host_mem ),
-    ADD_TEST( generic_ptr_to_host_mem_svm ),
-    ADD_TEST( max_number_of_params ),
+    ADD_TEST(library_function),
+    ADD_TEST(generic_variable_volatile),
+    ADD_TEST(generic_variable_const),
+    ADD_TEST(generic_variable_gentype),
+    ADD_TEST(builtin_functions),
+    ADD_TEST(generic_advanced_casting),
+    ADD_TEST(generic_ptr_to_host_mem),
+    ADD_TEST(generic_ptr_to_host_mem_svm),
+    ADD_TEST(max_number_of_params),
+    // atomic tests
+    ADD_TEST(generic_atomics_invariant),
+    ADD_TEST(generic_atomics_variant),
 };
 
 const int test_num = ARRAY_SIZE( test_list );

From 80a02cbd4897e3301477e2d2cc73d7134d7ae466 Mon Sep 17 00:00:00 2001
From: Harald van Dijk <harald.vandijk@codeplay.com>
Date: Tue, 25 Jun 2024 17:42:58 +0100
Subject: [PATCH 03/18] Disable implicit conversion of copysign arguments.
 (#1970)

In binary_float.cpp, copysign is special cased. All the reference
functions there take double arguments, except for reference_copysign
which takes float. This commit copies that approach to special case
copysign in binary_double.cpp as well: all the reference functions there
take long double arguments, but this commit changes reference_copysignl
to take double. The rationale for this in binary_float applies equally
to binary_double: conversions of NAN are not required to preserve its
sign bit. On architectures where conversion of NAN resets the sign bit,
copysign fp64 would return incorrect reference results.
---
 .../math_brute_force/binary_double.cpp        | 25 +++++++++------
 .../math_brute_force/function_list.cpp        | 17 +++++++++-
 .../math_brute_force/function_list.h          |  1 +
 .../math_brute_force/reference_math.cpp       | 32 +++++++++----------
 .../math_brute_force/reference_math.h         |  4 +--
 5 files changed, 50 insertions(+), 29 deletions(-)

diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp
index cd47c76bb4..feeedc471d 100644
--- a/test_conformance/math_brute_force/binary_double.cpp
+++ b/test_conformance/math_brute_force/binary_double.cpp
@@ -219,6 +219,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     cl_double *r;
     cl_double *s;
     cl_double *s2;
+    cl_int copysign_test = 0;
 
     Force64BitFPUPrecision();
 
@@ -377,12 +378,16 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
     if (gSkipCorrectnessTesting) return CL_SUCCESS;
 
+    if (!strcmp(name, "copysign")) copysign_test = 1;
+
+#define ref_func(s, s2) (copysign_test ? func.f_ff_d(s, s2) : func.f_ff(s, s2))
+
     // Calculate the correctly rounded reference result
     r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
     s = (cl_double *)gIn + thread_id * buffer_elements;
     s2 = (cl_double *)gIn2 + thread_id * buffer_elements;
     for (size_t j = 0; j < buffer_elements; j++)
-        r[j] = (cl_double)func.f_ff(s[j], s2[j]);
+        r[j] = (cl_double)ref_func(s[j], s2[j]);
 
     // Read the data back -- no need to wait for the first N-1 buffers but wait
     // for the last buffer. This is an in order queue.
@@ -412,7 +417,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             if (t[j] != q[j])
             {
                 cl_double test = ((cl_double *)q)[j];
-                long double correct = func.f_ff(s[j], s2[j]);
+                long double correct = ref_func(s[j], s2[j]);
                 float err = Bruteforce_Ulp_Error_Double(test, correct);
                 int fail = !(fabsf(err) <= ulps);
 
@@ -449,8 +454,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                         // retry per section 6.5.3.3
                         if (IsDoubleSubnormal(s[j]))
                         {
-                            long double correct2 = func.f_ff(0.0, s2[j]);
-                            long double correct3 = func.f_ff(-0.0, s2[j]);
+                            long double correct2 = ref_func(0.0, s2[j]);
+                            long double correct3 = ref_func(-0.0, s2[j]);
                             float err2 =
                                 Bruteforce_Ulp_Error_Double(test, correct2);
                             float err3 =
@@ -472,10 +477,10 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                             // try with both args as zero
                             if (IsDoubleSubnormal(s2[j]))
                             {
-                                correct2 = func.f_ff(0.0, 0.0);
-                                correct3 = func.f_ff(-0.0, 0.0);
-                                long double correct4 = func.f_ff(0.0, -0.0);
-                                long double correct5 = func.f_ff(-0.0, -0.0);
+                                correct2 = ref_func(0.0, 0.0);
+                                correct3 = ref_func(-0.0, 0.0);
+                                long double correct4 = ref_func(0.0, -0.0);
+                                long double correct5 = ref_func(-0.0, -0.0);
                                 err2 =
                                     Bruteforce_Ulp_Error_Double(test, correct2);
                                 err3 =
@@ -507,8 +512,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                         }
                         else if (IsDoubleSubnormal(s2[j]))
                         {
-                            long double correct2 = func.f_ff(s[j], 0.0);
-                            long double correct3 = func.f_ff(s[j], -0.0);
+                            long double correct2 = ref_func(s[j], 0.0);
+                            long double correct3 = ref_func(s[j], -0.0);
                             float err2 =
                                 Bruteforce_Ulp_Error_Double(test, correct2);
                             float err3 =
diff --git a/test_conformance/math_brute_force/function_list.cpp b/test_conformance/math_brute_force/function_list.cpp
index bd76ef2cc2..832615e1bc 100644
--- a/test_conformance/math_brute_force/function_list.cpp
+++ b/test_conformance/math_brute_force/function_list.cpp
@@ -73,6 +73,8 @@
 #define binaryF_two_results_i NULL
 #define mad_function NULL
 
+#define reference_copysignf NULL
+#define reference_copysign NULL
 #define reference_sqrt NULL
 #define reference_sqrtl NULL
 #define reference_divide NULL
@@ -250,7 +252,20 @@ const Func functionList[] = {
     ENTRY(atan2pi, 6.0f, 6.0f, 2.0f, FTZ_OFF, binaryF),
     ENTRY(cbrt, 2.0f, 4.0f, 2.f, FTZ_OFF, unaryF),
     ENTRY(ceil, 0.0f, 0.0f, 0.f, FTZ_OFF, unaryF),
-    ENTRY(copysign, 0.0f, 0.0f, 0.f, FTZ_OFF, binaryF),
+    { "copysign",
+      "copysign",
+      { (void*)reference_copysignf },
+      { (void*)reference_copysign },
+      { (void*)reference_copysignf },
+      0.0f,
+      0.0f,
+      0.0f,
+      0.0f,
+      INFINITY,
+      INFINITY,
+      FTZ_OFF,
+      RELAXED_OFF,
+      binaryF },
     ENTRY_EXT(cos, 4.0f, 4.0f, 2.f, 0.00048828125f, FTZ_OFF, unaryF,
               0.00048828125f), // relaxed ulp 2^-11
     ENTRY(cosh, 4.0f, 4.0f, 2.f, FTZ_OFF, unaryF),
diff --git a/test_conformance/math_brute_force/function_list.h b/test_conformance/math_brute_force/function_list.h
index 71bde9fdb5..56190e334d 100644
--- a/test_conformance/math_brute_force/function_list.h
+++ b/test_conformance/math_brute_force/function_list.h
@@ -52,6 +52,7 @@ union dptr {
     long double (*f_f)(long double);
     long double (*f_u)(cl_ulong);
     int (*i_f)(long double);
+    double (*f_ff_d)(double, double);
     long double (*f_ff)(long double, long double);
     int (*i_ff)(long double, long double);
     long double (*f_fi)(long double, int);
diff --git a/test_conformance/math_brute_force/reference_math.cpp b/test_conformance/math_brute_force/reference_math.cpp
index c43b03b661..3912fd7973 100644
--- a/test_conformance/math_brute_force/reference_math.cpp
+++ b/test_conformance/math_brute_force/reference_math.cpp
@@ -691,7 +691,7 @@ double reference_rootn(double x, int i)
     double sign = x;
     x = reference_fabs(x);
     x = reference_exp2(reference_log2(x) / (double)i);
-    return reference_copysignd(x, sign);
+    return reference_copysign(x, sign);
 }
 
 double reference_rsqrt(double x) { return 1.0 / reference_sqrt(x); }
@@ -707,7 +707,7 @@ double reference_sinpi(double x)
         r = 1 - r;
 
     // sinPi zeros have the same sign as x
-    if (r == 0.0) return reference_copysignd(0.0, x);
+    if (r == 0.0) return reference_copysign(0.0, x);
 
     return reference_sin(r * M_PI);
 }
@@ -717,7 +717,7 @@ double reference_relaxed_sinpi(double x) { return reference_sinpi(x); }
 double reference_tanpi(double x)
 {
     // set aside the sign  (allows us to preserve sign of -0)
-    double sign = reference_copysignd(1.0, x);
+    double sign = reference_copysign(1.0, x);
     double z = reference_fabs(x);
 
     // if big and even  -- caution: only works if x only has single precision
@@ -725,7 +725,7 @@ double reference_tanpi(double x)
     {
         if (z == INFINITY) return x - x; // nan
 
-        return reference_copysignd(
+        return reference_copysign(
             0.0, x); // tanpi ( n ) is copysign( 0.0, n)  for even integers n.
     }
 
@@ -739,7 +739,7 @@ double reference_tanpi(double x)
     if ((i & 1) && z == 0.0) sign = -sign;
 
     // track changes to the sign
-    sign *= reference_copysignd(1.0, z); // really should just be an xor
+    sign *= reference_copysign(1.0, z); // really should just be an xor
     z = reference_fabs(z); // remove the sign again
 
     // reduce once more
@@ -1070,7 +1070,7 @@ int reference_signbit(float x) { return 0 != signbit(x); }
 // Missing functions for win32
 
 
-float reference_copysign(float x, float y)
+float reference_copysignf(float x, float y)
 {
     union {
         float f;
@@ -1084,7 +1084,7 @@ float reference_copysign(float x, float y)
 }
 
 
-double reference_copysignd(double x, double y)
+double reference_copysign(double x, double y)
 {
     union {
         double f;
@@ -1101,10 +1101,10 @@ double reference_copysignd(double x, double y)
 double reference_round(double x)
 {
     double absx = reference_fabs(x);
-    if (absx < 0.5) return reference_copysignd(0.0, x);
+    if (absx < 0.5) return reference_copysign(0.0, x);
 
     if (absx < HEX_DBL(+, 1, 0, +, 53))
-        x = reference_trunc(x + reference_copysignd(0.5, x));
+        x = reference_trunc(x + reference_copysign(0.5, x));
 
     return x;
 }
@@ -1115,7 +1115,7 @@ double reference_trunc(double x)
     {
         cl_long l = (cl_long)x;
 
-        return reference_copysignd((double)l, x);
+        return reference_copysign((double)l, x);
     }
 
     return x;
@@ -1132,16 +1132,16 @@ double reference_trunc(double x)
 
 double reference_cbrt(double x)
 {
-    return reference_copysignd(reference_pow(reference_fabs(x), 1.0 / 3.0), x);
+    return reference_copysign(reference_pow(reference_fabs(x), 1.0 / 3.0), x);
 }
 
 double reference_rint(double x)
 {
     if (reference_fabs(x) < HEX_DBL(+, 1, 0, +, 52))
     {
-        double magic = reference_copysignd(HEX_DBL(+, 1, 0, +, 52), x);
+        double magic = reference_copysign(HEX_DBL(+, 1, 0, +, 52), x);
         double rounded = (x + magic) - magic;
-        x = reference_copysignd(rounded, x);
+        x = reference_copysign(rounded, x);
     }
 
     return x;
@@ -1174,7 +1174,7 @@ double reference_asinh(double x)
     double absx = reference_fabs(x);
     if (absx < HEX_DBL(+, 1, 0, -, 28)) return x;
 
-    double sign = reference_copysignd(1.0, x);
+    double sign = reference_copysign(1.0, x);
 
     if (absx > HEX_DBL(+, 1, 0, +, 28))
         return sign
@@ -1206,7 +1206,7 @@ double reference_atanh(double x)
      */
     if (isnan(x)) return x + x;
 
-    double signed_half = reference_copysignd(0.5, x);
+    double signed_half = reference_copysign(0.5, x);
     x = reference_fabs(x);
     if (x > 1.0) return cl_make_nan();
 
@@ -5333,7 +5333,7 @@ double reference_pow(double x, double y)
     __log2_ep(&hi, &lo, fabsx);
     double prod = y * hi;
     double result = reference_exp2(prod);
-    return isOddInt ? reference_copysignd(result, x) : result;
+    return isOddInt ? reference_copysign(result, x) : result;
 }
 
 double reference_sqrt(double x) { return sqrt(x); }
diff --git a/test_conformance/math_brute_force/reference_math.h b/test_conformance/math_brute_force/reference_math.h
index 03eeacb8ac..aef8d2eaf8 100644
--- a/test_conformance/math_brute_force/reference_math.h
+++ b/test_conformance/math_brute_force/reference_math.h
@@ -88,8 +88,8 @@ double reference_acosh(double x);
 double reference_asinh(double x);
 double reference_atanh(double x);
 double reference_cbrt(double x);
-float reference_copysign(float x, float y);
-double reference_copysignd(double x, double y);
+float reference_copysignf(float x, float y);
+double reference_copysign(double x, double y);
 double reference_exp10(double);
 double reference_exp2(double x);
 double reference_expm1(double x);

From a281046e0e3001a149e23702fd6f14244a095701 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?K=C3=A9vin=20Petit?= <kpet@free.fr>
Date: Tue, 25 Jun 2024 17:44:31 +0100
Subject: [PATCH 04/18] Bump clang-format version to 14 (#1983)

Default version coming with Ubuntu 22.04 that the CI now uses.
---
 .github/workflows/presubmit.yml | 2 +-
 check-format.sh                 | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml
index 0da329e18b..26c4af9984 100644
--- a/.github/workflows/presubmit.yml
+++ b/.github/workflows/presubmit.yml
@@ -63,7 +63,7 @@ jobs:
     runs-on: ubuntu-22.04
     steps:
       - name: Install packages
-        run: sudo apt install -y clang-format clang-format-11
+        run: sudo apt install -y clang-format clang-format-14
       - uses: actions/checkout@v4
         with:
           fetch-depth: 0
diff --git a/check-format.sh b/check-format.sh
index b5dc0a72ca..b76117c15c 100755
--- a/check-format.sh
+++ b/check-format.sh
@@ -2,7 +2,7 @@
 
 # Arg used to specify non-'origin/main' comparison branch
 ORIGIN_BRANCH=${1:-"origin/main"}
-CLANG_BINARY=${2:-"`which clang-format-11`"}
+CLANG_BINARY=${2:-"`which clang-format-14`"}
 
 # Run git-clang-format to check for violations
 CLANG_FORMAT_OUTPUT=$(git-clang-format --diff $ORIGIN_BRANCH --extensions c,cpp,h,hpp --binary $CLANG_BINARY)

From ac6931198d51c16a83b3aeb639c695fcda48094f Mon Sep 17 00:00:00 2001
From: paulfradgley <39525348+paulfradgley@users.noreply.github.com>
Date: Tue, 25 Jun 2024 17:46:49 +0100
Subject: [PATCH 05/18] Fixed incorrect usage of clGetDeviceIDs num_devices
 (#1952)

clGetDeviceIDs 'num_devices' output parameter is described as:
> num_devices returns the number of OpenCL devices available that match
device_type.

but the _test_events
out_of_order_event_waitlist_multi_queue_multi_device_ test expects that
after calling:
`clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 2, two_device_ids,
&number_returned);`

the content of number_returned needs to be 2, but it should be valid to
return a larger number.

---------

Co-authored-by: Ben Ashbaugh <ben.ashbaugh@intel.com>
---
 test_conformance/events/test_event_dependencies.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test_conformance/events/test_event_dependencies.cpp b/test_conformance/events/test_event_dependencies.cpp
index 4efe1a6562..3cc183bd82 100644
--- a/test_conformance/events/test_event_dependencies.cpp
+++ b/test_conformance/events/test_event_dependencies.cpp
@@ -97,7 +97,7 @@ int test_event_enqueue_wait_for_events_run_test(
         error = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 2, two_device_ids,
                                &number_returned);
         test_error(error, "clGetDeviceIDs for CL_DEVICE_TYPE_ALL failed.");
-        if (number_returned != 2)
+        if (number_returned < 2)
         {
             log_info("Failed to obtain two devices. Test can not run.\n");
             free(two_device_ids);

From c7b682f12af7a263ccf661eaef9bbb220663004d Mon Sep 17 00:00:00 2001
From: Harald van Dijk <harald.vandijk@codeplay.com>
Date: Tue, 25 Jun 2024 17:47:55 +0100
Subject: [PATCH 06/18] spirv_new: fix test_decorate. (#1980)

test_decorate was checking for half-precision NAN incorrectly, calling
cl_half_from_float where cl_half_to_float was intended, causing a wrong
expected result.

test_decorate was also printing the expected and actual results
reversed, printing "got -1, want 0", when our implementation already
returned 0.
---
 test_conformance/spirv_new/test_decorate.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test_conformance/spirv_new/test_decorate.cpp b/test_conformance/spirv_new/test_decorate.cpp
index 4c2f82b540..3a1f422aff 100644
--- a/test_conformance/spirv_new/test_decorate.cpp
+++ b/test_conformance/spirv_new/test_decorate.cpp
@@ -229,7 +229,7 @@ static inline To compute_saturated_output(Ti lhs, Ti rhs)
         f = cl_half_to_float(cl_half_from_float(f, CL_HALF_RTE));
 
         To val = (To)std::min<float>(std::max<float>(f, loVal), hiVal);
-        if (isnan(cl_half_from_float(rhs, CL_HALF_RTE)))
+        if (isnan(cl_half_to_float(rhs)))
         {
             val = 0;
         }
@@ -309,7 +309,7 @@ int verify_saturated_results(cl_device_id deviceID, cl_context context,
 
         if (val != h_res[i])
         {
-            log_error("Value error at %d: got %d, want %d\n", i, val, h_res[i]);
+            log_error("Value error at %d: got %d, want %d\n", i, h_res[i], val);
             return -1;
         }
     }

From fcbccab4d1970479df338c4d1809378cb3ea3749 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Thu, 27 Jun 2024 09:46:33 +0200
Subject: [PATCH 07/18] [NFC] math_brute_force: drop unneeded gotos (#1843)

Simplify code by returning directly instead of using goto statements.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 .../math_brute_force/i_unary_double.cpp            | 14 ++++++--------
 .../math_brute_force/i_unary_float.cpp             | 14 ++++++--------
 2 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/test_conformance/math_brute_force/i_unary_double.cpp b/test_conformance/math_brute_force/i_unary_double.cpp
index 953c33bbb4..2ed8087441 100644
--- a/test_conformance/math_brute_force/i_unary_double.cpp
+++ b/test_conformance/math_brute_force/i_unary_double.cpp
@@ -98,7 +98,7 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
                     vlog_error(
                         "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
                         error, j);
-                    goto exit;
+                    return error;
                 }
             }
             else
@@ -124,13 +124,13 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
                                         sizeof(gOutBuffer[j]), &gOutBuffer[j])))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
             if ((error = clSetKernelArg(kernels[j][thread_id], 1,
                                         sizeof(gInBuffer), &gInBuffer)))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
@@ -138,7 +138,7 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
                                                 NULL, NULL)))
             {
                 vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
+                return error;
             }
         }
 
@@ -159,7 +159,7 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
             {
                 vlog_error("ReadArray failed %d\n", error);
-                goto exit;
+                return error;
             }
         }
 
@@ -188,8 +188,7 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
                         "\nERROR: %sD%s: %d ulp error at %.13la: *%d vs. %d\n",
                         f->name, sizeNames[k], err, ((double *)gIn)[j], t[j],
                         q[j]);
-                    error = -1;
-                    goto exit;
+                    return -1;
                 }
             }
         }
@@ -221,6 +220,5 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
 
     vlog("\n");
 
-exit:
     return error;
 }
diff --git a/test_conformance/math_brute_force/i_unary_float.cpp b/test_conformance/math_brute_force/i_unary_float.cpp
index 0ce37cc8cc..0df35c4add 100644
--- a/test_conformance/math_brute_force/i_unary_float.cpp
+++ b/test_conformance/math_brute_force/i_unary_float.cpp
@@ -97,7 +97,7 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
                     vlog_error(
                         "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
                         error, j);
-                    goto exit;
+                    return error;
                 }
             }
             else
@@ -123,13 +123,13 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
                                         sizeof(gOutBuffer[j]), &gOutBuffer[j])))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
             if ((error = clSetKernelArg(kernels[j][thread_id], 1,
                                         sizeof(gInBuffer), &gInBuffer)))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
@@ -137,7 +137,7 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
                                                 NULL, NULL)))
             {
                 vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
+                return error;
             }
         }
 
@@ -158,7 +158,7 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
             {
                 vlog_error("ReadArray failed %d\n", error);
-                goto exit;
+                return error;
             }
         }
 
@@ -187,8 +187,7 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
                                "*%d vs. %d\n",
                                f->name, sizeNames[k], err, ((float *)gIn)[j],
                                ((cl_uint *)gIn)[j], t[j], q[j]);
-                    error = -1;
-                    goto exit;
+                    return -1;
                 }
             }
         }
@@ -219,6 +218,5 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
 
     vlog("\n");
 
-exit:
     return error;
 }

From 340b7c956a093cfe42e56ecc4e893ddb33877798 Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Tue, 2 Jul 2024 17:43:52 +0200
Subject: [PATCH 08/18] Added new cl_khr_semaphore tests to verify
 clEnqueueWaitSemaphoresKHR negative results (#1965)

According to work plan from
https://github.com/KhronosGroup/OpenCL-CTS/issues/1691

After consultations with @bashbaug I skipped this case:

`CL_INVALID_VALUE if any of the semaphore objects specified by
sema_objects requires a semaphore payload and sema_payload_list is NULL`
---
 .../cl_khr_semaphore/CMakeLists.txt           |   2 +
 .../extensions/cl_khr_semaphore/main.cpp      |   9 +
 .../extensions/cl_khr_semaphore/procs.h       |  20 +
 .../cl_khr_semaphore/semaphore_base.h         | 202 +++++++++
 .../test_semaphores_negative_wait.cpp         | 395 ++++++++++++++++++
 5 files changed, 628 insertions(+)
 create mode 100644 test_conformance/extensions/cl_khr_semaphore/semaphore_base.h
 create mode 100644 test_conformance/extensions/cl_khr_semaphore/test_semaphores_negative_wait.cpp

diff --git a/test_conformance/extensions/cl_khr_semaphore/CMakeLists.txt b/test_conformance/extensions/cl_khr_semaphore/CMakeLists.txt
index 824784a135..5618ebd640 100644
--- a/test_conformance/extensions/cl_khr_semaphore/CMakeLists.txt
+++ b/test_conformance/extensions/cl_khr_semaphore/CMakeLists.txt
@@ -3,6 +3,8 @@ set(MODULE_NAME CL_KHR_SEMAPHORE)
 set(${MODULE_NAME}_SOURCES
          main.cpp
          test_semaphores.cpp
+         test_semaphores_negative_wait.cpp
+         semaphore_base.h
 )
 
 include(../../CMakeCommon.txt)
diff --git a/test_conformance/extensions/cl_khr_semaphore/main.cpp b/test_conformance/extensions/cl_khr_semaphore/main.cpp
index 0ae7206a0d..dc360ab6b9 100644
--- a/test_conformance/extensions/cl_khr_semaphore/main.cpp
+++ b/test_conformance/extensions/cl_khr_semaphore/main.cpp
@@ -35,6 +35,15 @@ test_definition test_list[] = {
     ADD_TEST_VERSION(semaphores_multi_wait, Version(1, 2)),
     ADD_TEST_VERSION(semaphores_queries, Version(1, 2)),
     ADD_TEST_VERSION(semaphores_import_export_fd, Version(1, 2)),
+    ADD_TEST_VERSION(semaphores_negative_wait_invalid_command_queue,
+                     Version(1, 2)),
+    ADD_TEST_VERSION(semaphores_negative_wait_invalid_value, Version(1, 2)),
+    ADD_TEST_VERSION(semaphores_negative_wait_invalid_semaphore, Version(1, 2)),
+    ADD_TEST_VERSION(semaphores_negative_wait_invalid_context, Version(1, 2)),
+    ADD_TEST_VERSION(semaphores_negative_wait_invalid_event_wait_list,
+                     Version(1, 2)),
+    ADD_TEST_VERSION(semaphores_negative_wait_invalid_event_status,
+                     Version(1, 2)),
 };
 
 const int test_num = ARRAY_SIZE(test_list);
diff --git a/test_conformance/extensions/cl_khr_semaphore/procs.h b/test_conformance/extensions/cl_khr_semaphore/procs.h
index f7c1aaa301..9fb174583a 100644
--- a/test_conformance/extensions/cl_khr_semaphore/procs.h
+++ b/test_conformance/extensions/cl_khr_semaphore/procs.h
@@ -45,3 +45,23 @@ extern int test_semaphores_import_export_fd(cl_device_id deviceID,
                                             cl_context context,
                                             cl_command_queue queue,
                                             int num_elements);
+extern int test_semaphores_negative_wait_invalid_command_queue(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_semaphores_negative_wait_invalid_value(cl_device_id device,
+                                                       cl_context context,
+                                                       cl_command_queue queue,
+                                                       int num_elements);
+extern int test_semaphores_negative_wait_invalid_semaphore(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_semaphores_negative_wait_invalid_context(cl_device_id device,
+                                                         cl_context context,
+                                                         cl_command_queue queue,
+                                                         int num_elements);
+extern int test_semaphores_negative_wait_invalid_event_wait_list(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_semaphores_negative_wait_invalid_event_status(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
diff --git a/test_conformance/extensions/cl_khr_semaphore/semaphore_base.h b/test_conformance/extensions/cl_khr_semaphore/semaphore_base.h
new file mode 100644
index 0000000000..e50f33aedd
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_semaphore/semaphore_base.h
@@ -0,0 +1,202 @@
+//
+// Copyright (c) 2024 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef CL_KHR_SEMAPHORE_BASE_H
+#define CL_KHR_SEMAPHORE_BASE_H
+
+#include <CL/cl_ext.h>
+#include "harness/deviceInfo.h"
+#include "harness/testHarness.h"
+
+#include "harness/typeWrappers.h"
+
+struct SemaphoreBase
+{
+    SemaphoreBase(cl_device_id device): device(device) {}
+
+    cl_int init_extension_functions()
+    {
+        cl_platform_id platform;
+        cl_int error =
+            clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(cl_platform_id),
+                            &platform, nullptr);
+        test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed");
+
+        // If it is supported get the addresses of all the APIs here.
+        // clang-format off
+#define GET_EXTENSION_ADDRESS(FUNC)                                            \
+        FUNC = reinterpret_cast<FUNC##_fn>(                                    \
+            clGetExtensionFunctionAddressForPlatform(platform, #FUNC));        \
+        if (FUNC == nullptr)                                                   \
+        {                                                                      \
+            log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed" \
+                      " with " #FUNC "\n");                                    \
+            return TEST_FAIL;                                                  \
+        }
+        // clang-format on
+
+        GET_EXTENSION_ADDRESS(clCreateSemaphoreWithPropertiesKHR);
+        GET_EXTENSION_ADDRESS(clEnqueueSignalSemaphoresKHR);
+        GET_EXTENSION_ADDRESS(clEnqueueWaitSemaphoresKHR);
+        GET_EXTENSION_ADDRESS(clReleaseSemaphoreKHR);
+        GET_EXTENSION_ADDRESS(clGetSemaphoreInfoKHR);
+        GET_EXTENSION_ADDRESS(clRetainSemaphoreKHR);
+        GET_EXTENSION_ADDRESS(clGetSemaphoreHandleForTypeKHR);
+
+#undef GET_EXTENSION_ADDRESS
+        return CL_SUCCESS;
+    }
+
+    clCreateSemaphoreWithPropertiesKHR_fn clCreateSemaphoreWithPropertiesKHR =
+        nullptr;
+    clEnqueueSignalSemaphoresKHR_fn clEnqueueSignalSemaphoresKHR = nullptr;
+    clEnqueueWaitSemaphoresKHR_fn clEnqueueWaitSemaphoresKHR = nullptr;
+    clReleaseSemaphoreKHR_fn clReleaseSemaphoreKHR = nullptr;
+    clGetSemaphoreInfoKHR_fn clGetSemaphoreInfoKHR = nullptr;
+    clRetainSemaphoreKHR_fn clRetainSemaphoreKHR = nullptr;
+    clGetSemaphoreHandleForTypeKHR_fn clGetSemaphoreHandleForTypeKHR = nullptr;
+
+    cl_device_id device = nullptr;
+};
+
+// Wrapper class based off generic typeWrappers.h wrappers. However, because
+// the release/retain functions are queried at runtime from the platform,
+// rather than known at compile time we cannot link the instantiated template.
+// Instead, pass an instance of `SemaphoreTestBase` on wrapper construction
+// to access the release/retain functions.
+class clSemaphoreWrapper {
+    cl_semaphore_khr object = nullptr;
+
+    void retain()
+    {
+        if (!object) return;
+
+        auto err = base->clRetainSemaphoreKHR(object);
+        if (err != CL_SUCCESS)
+        {
+            print_error(err, "clRetainCommandBufferKHR() failed");
+            std::abort();
+        }
+    }
+
+    void release()
+    {
+        if (!object) return;
+
+        auto err = base->clReleaseSemaphoreKHR(object);
+        if (err != CL_SUCCESS)
+        {
+            print_error(err, "clReleaseCommandBufferKHR() failed");
+            std::abort();
+        }
+    }
+
+    // Used to access release/retain functions
+    SemaphoreBase *base;
+
+public:
+    // We always want to have base available to dereference
+    clSemaphoreWrapper() = delete;
+
+    clSemaphoreWrapper(SemaphoreBase *base): base(base) {}
+
+    // On assignment, assume the object has a refcount of one.
+    clSemaphoreWrapper &operator=(cl_semaphore_khr rhs)
+    {
+        reset(rhs);
+        return *this;
+    }
+
+    // Copy semantics, increase retain count.
+    clSemaphoreWrapper(clSemaphoreWrapper const &w) { *this = w; }
+    clSemaphoreWrapper &operator=(clSemaphoreWrapper const &w)
+    {
+        reset(w.object);
+        retain();
+        return *this;
+    }
+
+    // Move semantics, directly take ownership.
+    clSemaphoreWrapper(clSemaphoreWrapper &&w) { *this = std::move(w); }
+    clSemaphoreWrapper &operator=(clSemaphoreWrapper &&w)
+    {
+        reset(w.object);
+        w.object = nullptr;
+        return *this;
+    }
+
+    ~clSemaphoreWrapper() { reset(); }
+
+    // Release the existing object, if any, and own the new one, if any.
+    void reset(cl_semaphore_khr new_object = nullptr)
+    {
+        release();
+        object = new_object;
+    }
+
+    operator cl_semaphore_khr() const { return object; }
+    operator const cl_semaphore_khr *() { return &object; }
+};
+
+struct SemaphoreTestBase : public SemaphoreBase
+{
+    SemaphoreTestBase(cl_device_id device, cl_context context,
+                      cl_command_queue queue)
+        : SemaphoreBase(device), context(context), semaphore(this)
+    {
+        cl_int error = init_extension_functions();
+        if (error != CL_SUCCESS)
+            throw std::runtime_error("init_extension_functions failed\n");
+
+        error = clRetainCommandQueue(queue);
+        if (error != CL_SUCCESS)
+            throw std::runtime_error("clRetainCommandQueue failed\n");
+        this->queue = queue;
+    }
+
+    virtual cl_int Run() = 0;
+
+protected:
+    cl_context context = nullptr;
+    clCommandQueueWrapper queue = nullptr;
+    clSemaphoreWrapper semaphore = nullptr;
+};
+
+template <class T>
+int MakeAndRunTest(cl_device_id device, cl_context context,
+                   cl_command_queue queue)
+{
+    if (!is_extension_available(device, "cl_khr_semaphore"))
+    {
+        log_info(
+            "Device does not support 'cl_khr_semaphore'. Skipping the test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    cl_int status = TEST_PASS;
+    try
+    {
+        auto test_fixture = T(device, context, queue);
+        status = test_fixture.Run();
+    } catch (const std::runtime_error &e)
+    {
+        log_error("%s", e.what());
+        return TEST_FAIL;
+    }
+
+    return status;
+}
+
+#endif // CL_KHR_SEMAPHORE_BASE_H
diff --git a/test_conformance/extensions/cl_khr_semaphore/test_semaphores_negative_wait.cpp b/test_conformance/extensions/cl_khr_semaphore/test_semaphores_negative_wait.cpp
new file mode 100644
index 0000000000..dab28d96cc
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_semaphore/test_semaphores_negative_wait.cpp
@@ -0,0 +1,395 @@
+//
+// Copyright (c) 2024 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "semaphore_base.h"
+
+#include "harness/errorHelpers.h"
+#include <chrono>
+#include <system_error>
+#include <thread>
+#include <vector>
+
+namespace {
+
+// the device associated with command_queue is not same as one of the devices
+// specified by CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR at the time of creating one
+// or more of sema_objects.
+
+struct WaitInvalidCommandQueue : public SemaphoreTestBase
+{
+    WaitInvalidCommandQueue(cl_device_id device, cl_context context,
+                            cl_command_queue queue)
+        : SemaphoreTestBase(device, context, queue)
+    {}
+
+    cl_int Run() override
+    {
+        // Create semaphore
+        cl_semaphore_properties_khr sema_props[] = {
+            static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
+            static_cast<cl_semaphore_properties_khr>(
+                CL_SEMAPHORE_TYPE_BINARY_KHR),
+            static_cast<cl_semaphore_properties_khr>(
+                CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR),
+            (cl_semaphore_properties_khr)device,
+            CL_SEMAPHORE_DEVICE_HANDLE_LIST_END_KHR,
+            0
+        };
+
+        cl_int err = CL_SUCCESS;
+        semaphore =
+            clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
+        test_error(err, "Could not create semaphore");
+
+        // find other device
+        cl_platform_id platform_id = 0;
+        // find out what platform the harness is using.
+        err = clGetDeviceInfo(device, CL_DEVICE_PLATFORM,
+                              sizeof(cl_platform_id), &platform_id, nullptr);
+        test_error(err, "clGetDeviceInfo failed");
+
+        cl_uint num_platforms = 0;
+        err = clGetPlatformIDs(16, nullptr, &num_platforms);
+        test_error(err, "clGetPlatformIDs failed");
+
+        std::vector<cl_platform_id> platforms(num_platforms);
+
+        err = clGetPlatformIDs(num_platforms, platforms.data(), &num_platforms);
+        test_error(err, "clGetPlatformIDs failed");
+
+        cl_device_id device_sec = nullptr;
+        cl_uint num_devices = 0;
+        for (int p = 0; p < (int)num_platforms; p++)
+        {
+            if (platform_id == platforms[p]) continue;
+
+            err = clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, 0, nullptr,
+                                 &num_devices);
+            test_error(err, "clGetDeviceIDs failed");
+
+            std::vector<cl_device_id> devices(num_devices);
+            err = clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, num_devices,
+                                 devices.data(), nullptr);
+            test_error(err, "clGetDeviceIDs failed");
+
+            device_sec = devices.front();
+            break;
+        }
+
+        if (device_sec == nullptr)
+        {
+            log_info("Can't find needed resources. Skipping the test.\n");
+            return TEST_SKIPPED_ITSELF;
+        }
+
+        // Create secondary context
+        clContextWrapper context_sec =
+            clCreateContext(0, 1, &device_sec, nullptr, nullptr, &err);
+        test_error(err, "Failed to create context");
+
+        // Create secondary queue
+        clCommandQueueWrapper queue_sec =
+            clCreateCommandQueue(context_sec, device_sec, 0, &err);
+        test_error(err, "Could not create command queue");
+
+        // Signal semaphore
+        err = clEnqueueSignalSemaphoresKHR(queue, 1, semaphore, nullptr, 0,
+                                           nullptr, nullptr);
+        test_error(err, "Could not signal semaphore");
+
+        // Wait semaphore
+        err = clEnqueueWaitSemaphoresKHR(queue_sec, 1, semaphore, nullptr, 0,
+                                         nullptr, nullptr);
+        test_failure_error(err, CL_INVALID_COMMAND_QUEUE,
+                           "Unexpected clEnqueueWaitSemaphoresKHR return");
+
+        return TEST_PASS;
+    }
+};
+
+
+// num_sema_objects is 0.
+
+struct WaitInvalidValue : public SemaphoreTestBase
+{
+    WaitInvalidValue(cl_device_id device, cl_context context,
+                     cl_command_queue queue)
+        : SemaphoreTestBase(device, context, queue)
+    {}
+
+    cl_int Run() override
+    {
+        // Wait semaphore
+        cl_int err = CL_SUCCESS;
+        err = clEnqueueWaitSemaphoresKHR(queue, 0, semaphore, nullptr, 0,
+                                         nullptr, nullptr);
+        test_failure_error(err, CL_INVALID_VALUE,
+                           "Unexpected clEnqueueWaitSemaphoresKHR return");
+
+        return CL_SUCCESS;
+    }
+};
+
+// any of the semaphore objects specified by sema_objects is not valid.
+
+struct WaitInvalidSemaphore : public SemaphoreTestBase
+{
+    WaitInvalidSemaphore(cl_device_id device, cl_context context,
+                         cl_command_queue queue)
+        : SemaphoreTestBase(device, context, queue)
+    {}
+
+    cl_int Run() override
+    {
+        // Wait semaphore
+        cl_semaphore_khr sema_objects[] = { nullptr, nullptr, nullptr };
+        cl_int err = CL_SUCCESS;
+        err = clEnqueueWaitSemaphoresKHR(
+            queue, sizeof(sema_objects) / sizeof(sema_objects[0]), sema_objects,
+            nullptr, 0, nullptr, nullptr);
+        test_failure_error(err, CL_INVALID_SEMAPHORE_KHR,
+                           "Unexpected clEnqueueWaitSemaphoresKHR return");
+
+        return CL_SUCCESS;
+    }
+};
+
+// 1) the context associated with command_queue and any of the semaphore objects
+// in sema_objects are not the same, or
+// 2) the context associated with command_queue and that associated with events
+// in event_wait_list are not the same.
+
+struct WaitInvalidContext : public SemaphoreTestBase
+{
+    WaitInvalidContext(cl_device_id device, cl_context context,
+                       cl_command_queue queue)
+        : SemaphoreTestBase(device, context, queue)
+    {}
+
+    cl_int Run() override
+    {
+        // Create semaphore
+        cl_semaphore_properties_khr sema_props[] = {
+            static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
+            static_cast<cl_semaphore_properties_khr>(
+                CL_SEMAPHORE_TYPE_BINARY_KHR),
+            0
+        };
+
+        cl_int err = CL_SUCCESS;
+        semaphore =
+            clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
+        test_error(err, "Could not create semaphore");
+
+        // Create secondary context
+        clContextWrapper context_sec =
+            clCreateContext(0, 1, &device, nullptr, nullptr, &err);
+        test_error(err, "Failed to create context");
+
+        // Create secondary queue
+        clCommandQueueWrapper queue_sec =
+            clCreateCommandQueue(context_sec, device, 0, &err);
+        test_error(err, "Could not create command queue");
+
+        // Signal semaphore
+        err = clEnqueueSignalSemaphoresKHR(queue, 1, semaphore, nullptr, 0,
+                                           nullptr, nullptr);
+        test_error(err, "Could not signal semaphore");
+
+        // (1) Wait semaphore
+        err = clEnqueueWaitSemaphoresKHR(queue_sec, 1, semaphore, nullptr, 0,
+                                         nullptr, nullptr);
+        test_failure_error(err, CL_INVALID_CONTEXT,
+                           "Unexpected clEnqueueWaitSemaphoresKHR return");
+
+        // Create user event
+        clEventWrapper user_event = clCreateUserEvent(context_sec, &err);
+        test_error(err, "Could not create user event");
+
+        // (2) Wait semaphore
+        err = clEnqueueWaitSemaphoresKHR(queue, 1, semaphore, nullptr, 1,
+                                         &user_event, nullptr);
+
+        cl_int signal_error = clSetUserEventStatus(user_event, CL_COMPLETE);
+        test_error(signal_error, "clSetUserEventStatus failed");
+
+        test_failure_error(err, CL_INVALID_CONTEXT,
+                           "Unexpected clEnqueueWaitSemaphoresKHR return");
+
+        return TEST_PASS;
+    }
+};
+
+// (1) event_wait_list is NULL and num_events_in_wait_list is not 0, or
+// (2) event_wait_list is not NULL and num_events_in_wait_list is 0, or
+// (3) event objects in event_wait_list are not valid events.
+
+struct WaitInvalidEventWaitList : public SemaphoreTestBase
+{
+    WaitInvalidEventWaitList(cl_device_id device, cl_context context,
+                             cl_command_queue queue)
+        : SemaphoreTestBase(device, context, queue)
+    {}
+
+    cl_int Run() override
+    {
+        // Create semaphore
+        cl_semaphore_properties_khr sema_props[] = {
+            static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
+            static_cast<cl_semaphore_properties_khr>(
+                CL_SEMAPHORE_TYPE_BINARY_KHR),
+            0
+        };
+
+        cl_int err = CL_SUCCESS;
+        semaphore =
+            clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
+        test_error(err, "Could not create semaphore");
+
+
+        // Signal semaphore
+        err = clEnqueueSignalSemaphoresKHR(queue, 1, semaphore, nullptr, 0,
+                                           nullptr, nullptr);
+        test_error(err, "Could not signal semaphore");
+
+        // (1) Wait semaphore
+        err = clEnqueueWaitSemaphoresKHR(queue, 1, semaphore, nullptr, 1,
+                                         nullptr, nullptr);
+        test_failure_error(err, CL_INVALID_EVENT_WAIT_LIST,
+                           "Unexpected clEnqueueWaitSemaphoresKHR return");
+
+        // Create user event
+        clEventWrapper user_event = clCreateUserEvent(context, &err);
+        test_error(err, "Could not create user event");
+
+        // (2) Wait semaphore
+        err = clEnqueueWaitSemaphoresKHR(queue, 1, semaphore, nullptr, 0,
+                                         &user_event, nullptr);
+
+        cl_int signal_error = clSetUserEventStatus(user_event, CL_COMPLETE);
+        test_error(signal_error, "clSetUserEventStatus failed");
+
+        test_failure_error(err, CL_INVALID_EVENT_WAIT_LIST,
+                           "Unexpected clEnqueueWaitSemaphoresKHR return");
+
+        // (3) Wait semaphore
+        cl_event wait_list[] = { nullptr, nullptr, nullptr };
+        err = clEnqueueWaitSemaphoresKHR(
+            queue, 1, semaphore, nullptr,
+            sizeof(wait_list) / sizeof(wait_list[0]), wait_list, nullptr);
+        test_failure_error(err, CL_INVALID_EVENT_WAIT_LIST,
+                           "Unexpected clEnqueueWaitSemaphoresKHR return");
+
+        return CL_SUCCESS;
+    }
+};
+
+// the execution status of any of the events in event_wait_list is a negative
+// integer value.
+
+struct WaitInvalidEventStatus : public SemaphoreTestBase
+{
+    WaitInvalidEventStatus(cl_device_id device, cl_context context,
+                           cl_command_queue queue)
+        : SemaphoreTestBase(device, context, queue)
+    {}
+
+    cl_int Run() override
+    {
+        // Create semaphore
+        cl_semaphore_properties_khr sema_props[] = {
+            static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
+            static_cast<cl_semaphore_properties_khr>(
+                CL_SEMAPHORE_TYPE_BINARY_KHR),
+            0
+        };
+
+        cl_int err = CL_SUCCESS;
+        semaphore =
+            clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
+        test_error(err, "Could not create semaphore");
+
+        // Signal semaphore
+        err = clEnqueueSignalSemaphoresKHR(queue, 1, semaphore, nullptr, 0,
+                                           nullptr, nullptr);
+        test_error(err, "Could not signal semaphore");
+
+        // Create user event
+        clEventWrapper user_event = clCreateUserEvent(context, &err);
+        test_error(err, "Could not create user event");
+
+        // Now release the user event, which will allow our actual action to run
+        err = clSetUserEventStatus(user_event, -1);
+        test_error(err, "Unable to set event status");
+
+        // Wait semaphore
+        err = clEnqueueWaitSemaphoresKHR(queue, 1, semaphore, nullptr, 1,
+                                         &user_event, nullptr);
+        test_failure_error(err, CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST,
+                           "Unexpected clEnqueueWaitSemaphoresKHR return");
+
+        return CL_SUCCESS;
+    }
+};
+
+}
+
+int test_semaphores_negative_wait_invalid_command_queue(cl_device_id device,
+                                                        cl_context context,
+                                                        cl_command_queue queue,
+                                                        int num_elements)
+{
+    return MakeAndRunTest<WaitInvalidCommandQueue>(device, context, queue);
+}
+
+int test_semaphores_negative_wait_invalid_value(cl_device_id device,
+                                                cl_context context,
+                                                cl_command_queue queue,
+                                                int num_elements)
+{
+    return MakeAndRunTest<WaitInvalidValue>(device, context, queue);
+}
+
+int test_semaphores_negative_wait_invalid_semaphore(cl_device_id device,
+                                                    cl_context context,
+                                                    cl_command_queue queue,
+                                                    int num_elements)
+{
+    return MakeAndRunTest<WaitInvalidSemaphore>(device, context, queue);
+}
+
+int test_semaphores_negative_wait_invalid_context(cl_device_id device,
+                                                  cl_context context,
+                                                  cl_command_queue queue,
+                                                  int num_elements)
+{
+    return MakeAndRunTest<WaitInvalidContext>(device, context, queue);
+}
+
+int test_semaphores_negative_wait_invalid_event_wait_list(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<WaitInvalidEventWaitList>(device, context, queue);
+}
+
+int test_semaphores_negative_wait_invalid_event_status(cl_device_id device,
+                                                       cl_context context,
+                                                       cl_command_queue queue,
+                                                       int num_elements)
+{
+    return MakeAndRunTest<WaitInvalidEventStatus>(device, context, queue);
+}

From 1cd0266ca142cb1a93bf3b1be00f1eb438a6dd00 Mon Sep 17 00:00:00 2001
From: Chuang-Yu Cheng <cycheng.buddhist@gmail.com>
Date: Wed, 3 Jul 2024 01:29:00 +0900
Subject: [PATCH 09/18] spirv_new: fix test_decorate to use the device's
 default rounding (#1987)

The verification code assumes the hardware uses CL_HALF_RTE, which
causes a mismatch computation results when the hardware uses RTZ. Fix to
use the hardware's default rounding mode.
---
 test_conformance/spirv_new/test_decorate.cpp | 34 ++++++++++++++++++--
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/test_conformance/spirv_new/test_decorate.cpp b/test_conformance/spirv_new/test_decorate.cpp
index 3a1f422aff..b85419300d 100644
--- a/test_conformance/spirv_new/test_decorate.cpp
+++ b/test_conformance/spirv_new/test_decorate.cpp
@@ -216,7 +216,8 @@ static inline Ti generate_saturated_rhs_input(RandomSeed &seed)
 }
 
 template <typename Ti, typename Tl, typename To>
-static inline To compute_saturated_output(Ti lhs, Ti rhs)
+static inline To compute_saturated_output(Ti lhs, Ti rhs,
+                                          cl_half_rounding_mode half_rounding)
 {
     constexpr auto loVal = std::numeric_limits<To>::min();
     constexpr auto hiVal = std::numeric_limits<To>::max();
@@ -226,7 +227,7 @@ static inline To compute_saturated_output(Ti lhs, Ti rhs)
         cl_float f = cl_half_to_float(lhs) * cl_half_to_float(rhs);
 
         // Quantize to fp16:
-        f = cl_half_to_float(cl_half_from_float(f, CL_HALF_RTE));
+        f = cl_half_to_float(cl_half_from_float(f, half_rounding));
 
         To val = (To)std::min<float>(std::max<float>(f, loVal), hiVal);
         if (isnan(cl_half_to_float(rhs)))
@@ -246,6 +247,26 @@ static inline To compute_saturated_output(Ti lhs, Ti rhs)
     return val;
 }
 
+static cl_half_rounding_mode get_half_rounding_mode(cl_device_id deviceID)
+{
+    const cl_device_fp_config fpConfigHalf =
+        get_default_rounding_mode(deviceID, CL_DEVICE_HALF_FP_CONFIG);
+
+    if (fpConfigHalf == CL_FP_ROUND_TO_NEAREST)
+    {
+        return CL_HALF_RTE;
+    }
+    else if (fpConfigHalf == CL_FP_ROUND_TO_ZERO)
+    {
+        return CL_HALF_RTZ;
+    }
+    else
+    {
+        log_error("Error while acquiring half rounding mode");
+    }
+    return CL_HALF_RTE;
+}
+
 template <typename Ti, typename Tl, typename To>
 int verify_saturated_results(cl_device_id deviceID, cl_context context,
                              cl_command_queue queue, const char *kname,
@@ -303,9 +324,16 @@ int verify_saturated_results(cl_device_id deviceID, cl_context context,
     err = clEnqueueReadBuffer(queue, res, CL_TRUE, 0, out_bytes, &h_res[0], 0, NULL, NULL);
     SPIRV_CHECK_ERROR(err, "Failed to read to output");
 
+    cl_half_rounding_mode half_rounding = CL_HALF_RTE;
+    if (std::is_same<Ti, cl_half>::value)
+    {
+        half_rounding = get_half_rounding_mode(deviceID);
+    }
+
     for (int i = 0; i < num; i++)
     {
-        To val = compute_saturated_output<Ti, Tl, To>(h_lhs[i], h_rhs[i]);
+        To val = compute_saturated_output<Ti, Tl, To>(h_lhs[i], h_rhs[i],
+                                                      half_rounding);
 
         if (val != h_res[i])
         {

From 89923f80047f587b4f9e6b6aa8f7ac08ee3f5aba Mon Sep 17 00:00:00 2001
From: Karol Herbst <kherbst@redhat.com>
Date: Tue, 2 Jul 2024 18:30:03 +0200
Subject: [PATCH 10/18] Extend printf tests with more %% corner cases (#1986)

We've had a couple of bugs inside mesa/rusticl processing %% correctly.
I've added those cases locally to make sure all corner cases are
properly handled.
---
 test_conformance/printf/util_printf.cpp | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/test_conformance/printf/util_printf.cpp b/test_conformance/printf/util_printf.cpp
index ca260573bb..29b7f8dc04 100644
--- a/test_conformance/printf/util_printf.cpp
+++ b/test_conformance/printf/util_printf.cpp
@@ -724,6 +724,12 @@ std::vector<printDataGenParameters> printStringGenParameters = {
 
     { {"%s"}, "\"%%\"" },
 
+    { {"%s"}, "\"foo%%bar%%bar%%foo\"" },
+
+    { {"%%%s%%"}, "\"foo\"" },
+
+    { {"%%s%s"}, "\"foo\"" },
+
     // special symbols
     // nested
 
@@ -764,6 +770,12 @@ std::vector<std::string> correctBufferString = {
 
     "%%",
 
+    "foo%%bar%%bar%%foo",
+
+    "%foo%",
+
+    "%sfoo",
+
     "\"%%\"",
 
     "\'%%\'",
@@ -819,6 +831,8 @@ std::vector<printDataGenParameters> printFormatStringGenParameters = {
 
     { {"\'%%\'"} },
 
+    { {"\'foo%%bar%%bar%%foo\'"} },
+
     // tabs
 
     { {"foo\\t\\t\\tfoo"} },
@@ -849,6 +863,8 @@ std::vector<std::string> correctBufferFormatString = {
 
     "\'%\'",
 
+    "\'foo%bar%bar%foo\'",
+
     "foo\t\t\tfoo",
 
 R"(foo

From 769984b02380087a7cb2f0330928784a5b20f178 Mon Sep 17 00:00:00 2001
From: Michal Babej <franz@users.noreply.github.com>
Date: Tue, 2 Jul 2024 19:32:12 +0300
Subject: [PATCH 11/18] fix bugs in negative command_buffer tests (#1969)

- when calling command buffer APIs, test with `command_queue != NULL`
should return `CL_INVALID_VALUE` only if the device doesn't support
`cl_khr_command_buffer_multi_device` (added `Skip`)

- some tests enqueued commands with multiple invalid arguments, e.g.
`clCommandCopyImageToBufferKHR` with two images and invalid sync points.
AFAIK the order of argument checking is not defined, so implementation
can return any valid error value for such API calls, but the tests
assumed only one particular error would be returned. Fix the API calls
to be unambiguous.
---
 .../negative_command_buffer_barrier.cpp       |  7 ++++
 .../negative_command_buffer_copy_image.cpp    | 41 +++++++++++--------
 .../negative_command_buffer_svm_mem.cpp       |  7 ++++
 .../negative_command_nd_range_kernel.cpp      | 11 ++++-
 4 files changed, 46 insertions(+), 20 deletions(-)

diff --git a/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_barrier.cpp b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_barrier.cpp
index 14f828cd5d..6e682aa381 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_barrier.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_barrier.cpp
@@ -37,6 +37,13 @@ struct CommandBufferBarrierNotNullQueue : public BasicCommandBufferTest
 
         return CL_SUCCESS;
     }
+
+    bool Skip() override
+    {
+        if (BasicCommandBufferTest::Skip()) return true;
+        return is_extension_available(device,
+                                      "cl_khr_command_buffer_multi_device");
+    }
 };
 
 // CL_INVALID_COMMAND_BUFFER_KHR if command_buffer is not a valid
diff --git a/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_copy_image.cpp b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_copy_image.cpp
index 843c0d54e1..80bb3b0245 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_copy_image.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_copy_image.cpp
@@ -28,15 +28,20 @@ struct CommandCopyBaseTest : BasicCommandBufferTest
 
     cl_int SetUp(int elements) override
     {
+        num_elements = elements;
+        origin[0] = origin[1] = origin[2] = 0;
+        region[0] = elements / 64;
+        region[1] = 64;
+        region[2] = 1;
         cl_int error = BasicCommandBufferTest::SetUp(elements);
         test_error(error, "BasicCommandBufferTest::SetUp failed");
 
-        src_image = create_image_2d(context, CL_MEM_READ_ONLY, &formats, 512,
-                                    512, 0, NULL, &error);
+        src_image = create_image_2d(context, CL_MEM_READ_ONLY, &formats,
+                                    elements / 64, 64, 0, NULL, &error);
         test_error(error, "create_image_2d failed");
 
-        dst_image = create_image_2d(context, CL_MEM_WRITE_ONLY, &formats, 512,
-                                    512, 0, NULL, &error);
+        dst_image = create_image_2d(context, CL_MEM_WRITE_ONLY, &formats,
+                                    elements / 64, 64, 0, NULL, &error);
         test_error(error, "create_image_2d failed");
 
         return CL_SUCCESS;
@@ -58,8 +63,8 @@ struct CommandCopyBaseTest : BasicCommandBufferTest
     clMemWrapper src_image;
     clMemWrapper dst_image;
     const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 };
-    const size_t origin[3] = { 0, 0, 0 };
-    const size_t region[3] = { 512, 512, 1 };
+    size_t origin[3];
+    size_t region[3];
 };
 
 namespace {
@@ -81,7 +86,7 @@ struct CommandBufferCopyImageQueueNotNull : public CommandCopyBaseTest
                                TEST_FAIL);
 
         error = clCommandCopyImageToBufferKHR(command_buffer, queue, src_image,
-                                              dst_image, origin, region, 0, 0,
+                                              out_mem, origin, region, 0, 0,
                                               nullptr, nullptr, nullptr);
 
         test_failure_error_ret(error, CL_INVALID_COMMAND_QUEUE,
@@ -119,8 +124,8 @@ struct CommandBufferCopyImageContextNotSame : public CommandCopyBaseTest
                                TEST_FAIL);
 
         error = clCommandCopyImageToBufferKHR(
-            command_buffer, nullptr, src_image_ctx, dst_image, origin, region,
-            0, 0, nullptr, nullptr, nullptr);
+            command_buffer, nullptr, src_image_ctx, out_mem, origin, region, 0,
+            0, nullptr, nullptr, nullptr);
 
         test_failure_error_ret(error, CL_INVALID_CONTEXT,
                                "clCommandCopyImageToBufferKHR should return "
@@ -159,7 +164,7 @@ struct CommandBufferCopyImageContextNotSame : public CommandCopyBaseTest
                                TEST_FAIL);
 
         error = clCommandCopyImageToBufferKHR(
-            command_buffer, nullptr, src_image, dst_image, origin, region, 0, 0,
+            command_buffer, nullptr, src_image, out_mem, origin, region, 0, 0,
             nullptr, nullptr, nullptr);
 
         test_failure_error_ret(error, CL_INVALID_CONTEXT,
@@ -179,11 +184,11 @@ struct CommandBufferCopyImageContextNotSame : public CommandCopyBaseTest
         test_error(error, "Failed to create context");
 
         src_image_ctx = create_image_2d(context1, CL_MEM_READ_ONLY, &formats,
-                                        512, 512, 0, NULL, &error);
+                                        elements / 64, 64, 0, NULL, &error);
         test_error(error, "create_image_2d failed");
 
         dst_image_ctx = create_image_2d(context1, CL_MEM_WRITE_ONLY, &formats,
-                                        512, 512, 0, NULL, &error);
+                                        elements / 64, 64, 0, NULL, &error);
         test_error(error, "create_image_2d failed");
 
         queue1 = clCreateCommandQueue(context1, device, 0, &error);
@@ -220,7 +225,7 @@ struct CommandBufferCopySyncPointsNullOrNumZero : public CommandCopyBaseTest
                                TEST_FAIL);
 
         error = clCommandCopyImageToBufferKHR(
-            command_buffer, nullptr, src_image, dst_image, origin, region, 0, 1,
+            command_buffer, nullptr, src_image, out_mem, origin, region, 0, 1,
             &invalid_point, nullptr, nullptr);
 
         test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
@@ -239,7 +244,7 @@ struct CommandBufferCopySyncPointsNullOrNumZero : public CommandCopyBaseTest
                                TEST_FAIL);
 
         error = clCommandCopyImageToBufferKHR(
-            command_buffer, nullptr, src_image, dst_image, origin, region, 0, 1,
+            command_buffer, nullptr, src_image, out_mem, origin, region, 0, 1,
             nullptr, nullptr, nullptr);
 
         test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
@@ -263,7 +268,7 @@ struct CommandBufferCopySyncPointsNullOrNumZero : public CommandCopyBaseTest
                                TEST_FAIL);
 
         error = clCommandCopyImageToBufferKHR(
-            command_buffer, nullptr, src_image, dst_image, origin, region, 0, 0,
+            command_buffer, nullptr, src_image, out_mem, origin, region, 0, 0,
             &point, nullptr, nullptr);
 
         test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
@@ -294,7 +299,7 @@ struct CommandBufferCopyImageInvalidCommandBuffer : public CommandCopyBaseTest
                                TEST_FAIL);
 
         error = clCommandCopyImageToBufferKHR(nullptr, nullptr, src_image,
-                                              dst_image, origin, region, 0, 0,
+                                              out_mem, origin, region, 0, 0,
                                               nullptr, nullptr, nullptr);
 
         test_failure_error_ret(error, CL_INVALID_COMMAND_BUFFER_KHR,
@@ -327,7 +332,7 @@ struct CommandBufferCopyImageFinalizedCommandBuffer : public CommandCopyBaseTest
                                TEST_FAIL);
 
         error = clCommandCopyImageToBufferKHR(
-            command_buffer, nullptr, src_image, dst_image, origin, region, 0, 0,
+            command_buffer, nullptr, src_image, out_mem, origin, region, 0, 0,
             nullptr, nullptr, nullptr);
 
         test_failure_error_ret(error, CL_INVALID_OPERATION,
@@ -358,7 +363,7 @@ struct CommandBufferCopyImageMutableHandleNotNull : public CommandCopyBaseTest
                                TEST_FAIL);
 
         error = clCommandCopyImageToBufferKHR(
-            command_buffer, nullptr, src_image, dst_image, origin, region, 0, 0,
+            command_buffer, nullptr, src_image, out_mem, origin, region, 0, 0,
             nullptr, nullptr, &mutable_handle);
 
         test_failure_error_ret(error, CL_INVALID_VALUE,
diff --git a/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_svm_mem.cpp b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_svm_mem.cpp
index 4d4e2cfb77..b5d2355b2a 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_svm_mem.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_svm_mem.cpp
@@ -50,6 +50,13 @@ struct CommandBufferCommandSVMQueueNotNull : public BasicSVMCommandBufferTest
     }
 
     const cl_char pattern_1 = 0x14;
+
+    bool Skip() override
+    {
+        if (BasicSVMCommandBufferTest::Skip()) return true;
+        return is_extension_available(device,
+                                      "cl_khr_command_buffer_multi_device");
+    }
 };
 
 // CL_INVALID_SYNC_POINT_WAIT_LIST_KHR if sync_point_wait_list is NULL and
diff --git a/test_conformance/extensions/cl_khr_command_buffer/negative_command_nd_range_kernel.cpp b/test_conformance/extensions/cl_khr_command_buffer/negative_command_nd_range_kernel.cpp
index e44e38c09c..ccbefd85f8 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/negative_command_nd_range_kernel.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/negative_command_nd_range_kernel.cpp
@@ -38,6 +38,13 @@ struct CommandNDRangeKernelQueueNotNull : public BasicCommandBufferTest
 
         return CL_SUCCESS;
     }
+
+    bool Skip() override
+    {
+        if (BasicCommandBufferTest::Skip()) return true;
+        return is_extension_available(device,
+                                      "cl_khr_command_buffer_multi_device");
+    }
 };
 
 // CL_INVALID_CONTEXT if the context associated with command_queue,
@@ -108,7 +115,7 @@ struct CommandNDRangeKerneSyncPointsNullOrNumZero
         cl_sync_point_khr invalid_point = 0;
         cl_sync_point_khr* invalid_sync_points[] = { &invalid_point };
         cl_int error = clCommandNDRangeKernelKHR(
-            command_buffer, nullptr, nullptr, kernel, 0, nullptr, &num_elements,
+            command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
             nullptr, 1, invalid_sync_points[0], nullptr, nullptr);
 
         test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
@@ -134,7 +141,7 @@ struct CommandNDRangeKerneSyncPointsNullOrNumZero
 
         cl_sync_point_khr* sync_points[] = { &point };
         error = clCommandNDRangeKernelKHR(
-            command_buffer, nullptr, nullptr, kernel, 0, nullptr, &num_elements,
+            command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
             nullptr, 0, sync_points[0], nullptr, nullptr);
 
         test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,

From 02471c8f56d75ef8e46c20a3dd00503faddf7a4e Mon Sep 17 00:00:00 2001
From: Julia Jiang <56359287+jujiang-del@users.noreply.github.com>
Date: Tue, 2 Jul 2024 12:34:53 -0400
Subject: [PATCH 12/18] =?UTF-8?q?Fix=20build=20errors=20related=20with=20v?=
 =?UTF-8?q?ariable=20defined=20array=20length=20and=20gl=20te=E2=80=A6=20(?=
 =?UTF-8?q?#1957)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

…sts logged error
---
 test_common/gl/setup_x11.cpp                  | 16 ++++---
 test_conformance/api/test_native_kernel.cpp   | 25 +++++------
 test_conformance/buffers/test_sub_buffers.cpp | 11 ++---
 test_conformance/gl/test_buffers.cpp          | 29 +++++-------
 .../relationals/test_shuffles.cpp             | 44 +++++++++----------
 5 files changed, 58 insertions(+), 67 deletions(-)

diff --git a/test_common/gl/setup_x11.cpp b/test_common/gl/setup_x11.cpp
index abc065c94c..3292902f6d 100644
--- a/test_common/gl/setup_x11.cpp
+++ b/test_common/gl/setup_x11.cpp
@@ -26,20 +26,26 @@ class X11GLEnvironment : public GLEnvironment
 private:
     cl_device_id m_devices[64];
     cl_uint m_device_count;
+    bool m_glut_init;
 
 public:
     X11GLEnvironment()
     {
         m_device_count = 0;
+        m_glut_init = false;
     }
     virtual int Init( int *argc, char **argv, int use_opencl_32 )
     {
          // Create a GLUT window to render into
-        glutInit( argc, argv );
-        glutInitWindowSize( 512, 512 );
-        glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE );
-        glutCreateWindow( "OpenCL <-> OpenGL Test" );
-        glewInit();
+         if (!m_glut_init)
+         {
+             glutInit(argc, argv);
+             glutInitWindowSize(512, 512);
+             glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE);
+             glutCreateWindow("OpenCL <-> OpenGL Test");
+             glewInit();
+             m_glut_init = true;
+         }
         return 0;
     }
 
diff --git a/test_conformance/api/test_native_kernel.cpp b/test_conformance/api/test_native_kernel.cpp
index 50505e226a..d9c93628b4 100644
--- a/test_conformance/api/test_native_kernel.cpp
+++ b/test_conformance/api/test_native_kernel.cpp
@@ -46,12 +46,7 @@ int test_native_kernel(cl_device_id device, cl_context context, cl_command_queue
     }
 
     clMemWrapper streams[ 2 ];
-#if !(defined (_WIN32) && defined (_MSC_VER))
-    cl_int inBuffer[ n_elems ], outBuffer[ n_elems ];
-#else
-    cl_int* inBuffer  = (cl_int *)_malloca( n_elems * sizeof(cl_int) );
-    cl_int* outBuffer = (cl_int *)_malloca( n_elems * sizeof(cl_int) );
-#endif
+    std::vector<cl_int> inBuffer(n_elems), outBuffer(n_elems);
     clEventWrapper finishEvent;
 
     struct arg_struct
@@ -63,11 +58,12 @@ int test_native_kernel(cl_device_id device, cl_context context, cl_command_queue
 
 
     // Create some input values
-    generate_random_data( kInt, n_elems, seed, inBuffer );
-
+    generate_random_data(kInt, n_elems, seed, inBuffer.data());
 
     // Create I/O streams
-    streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, n_elems * sizeof(cl_int), inBuffer, &error );
+    streams[0] =
+        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, n_elems * sizeof(cl_int),
+                       inBuffer.data(), &error);
     test_error( error, "Unable to create I/O stream" );
     streams[ 1 ] = clCreateBuffer( context, 0, n_elems * sizeof(cl_int), NULL, &error );
     test_error( error, "Unable to create I/O stream" );
@@ -97,15 +93,18 @@ int test_native_kernel(cl_device_id device, cl_context context, cl_command_queue
     test_error(error, "clWaitForEvents failed");
 
     // Now read the results and verify
-    error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, n_elems * sizeof(cl_int), outBuffer, 0, NULL, NULL );
+    error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0,
+                                n_elems * sizeof(cl_int), outBuffer.data(), 0,
+                                NULL, NULL);
     test_error( error, "Unable to read results" );
 
     for( int i = 0; i < n_elems; i++ )
     {
-        if( inBuffer[ i ] != outBuffer[ i ] )
+        if (inBuffer[i] != outBuffer[i])
         {
-            log_error( "ERROR: Data sample %d for native kernel did not validate (expected %d, got %d)\n",
-                      i, (int)inBuffer[ i ], (int)outBuffer[ i ] );
+            log_error("ERROR: Data sample %d for native kernel did not "
+                      "validate (expected %d, got %d)\n",
+                      i, (int)inBuffer[i], (int)outBuffer[i]);
             return 1;
         }
     }
diff --git a/test_conformance/buffers/test_sub_buffers.cpp b/test_conformance/buffers/test_sub_buffers.cpp
index d6ab111e1d..f1f07f84a3 100644
--- a/test_conformance/buffers/test_sub_buffers.cpp
+++ b/test_conformance/buffers/test_sub_buffers.cpp
@@ -16,6 +16,7 @@
 #include "procs.h"
 
 #include <algorithm>
+#include <vector>
 
 // Design:
 // To test sub buffers, we first create one main buffer. We then create several sub-buffers and
@@ -413,16 +414,13 @@ int test_sub_buffers_read_write_dual_devices( cl_device_id deviceID, cl_context
     size_t param_size;
     error = clGetDeviceInfo(otherDevice, CL_DEVICE_NAME, 0, NULL, &param_size );
     test_error( error, "Error obtaining device name" );
+    std::vector<char> device_name(param_size);
 
-#if !(defined(_WIN32) && defined(_MSC_VER))
-    char device_name[param_size];
-#else
-    char* device_name = (char*)_malloca(param_size);
-#endif
     error = clGetDeviceInfo(otherDevice, CL_DEVICE_NAME, param_size, &device_name[0], NULL );
     test_error( error, "Error obtaining device name" );
 
-    log_info( "\tOther device obtained for dual device test is type %s\n", device_name );
+    log_info("\tOther device obtained for dual device test is type %s\n",
+             device_name.data());
 
     // Create a shared context for these two devices
     cl_device_id devices[ 2 ] = { deviceID, otherDevice };
@@ -453,7 +451,6 @@ int test_sub_buffers_read_write_dual_devices( cl_device_id deviceID, cl_context
     test_error( error, "Unable to get secondary device's address alignment" );
 
     cl_uint addressAlign1 = std::max(addressAlign1Bits, addressAlign2Bits) / 8;
-
     // Finally time to run!
     return test_sub_buffers_read_write_core( testingContext, queue1, queue2, maxBuffer1, addressAlign1 );
 }
diff --git a/test_conformance/gl/test_buffers.cpp b/test_conformance/gl/test_buffers.cpp
index c61610d090..73701fb018 100644
--- a/test_conformance/gl/test_buffers.cpp
+++ b/test_conformance/gl/test_buffers.cpp
@@ -126,15 +126,10 @@ int test_buffer_kernel(cl_context context, cl_command_queue queue,
     clProgramWrapper program;
     clKernelWrapper kernel;
     clMemWrapper streams[3];
-    size_t dataSize = numElements * 16 * sizeof(cl_long);
-#if !(defined(_WIN32) && defined(_MSC_VER))
-    cl_long inData[numElements * 16], outDataCL[numElements * 16],
-        outDataGL[numElements * 16];
-#else
-    cl_long *inData = (cl_long *)_malloca(dataSize);
-    cl_long *outDataCL = (cl_long *)_malloca(dataSize);
-    cl_long *outDataGL = (cl_long *)_malloca(dataSize);
-#endif
+    size_t dataSize = numElements * 16;
+    std::vector<cl_long> inData(dataSize), outDataCL(dataSize),
+        outDataGL(dataSize);
+
     glBufferWrapper inGLBuffer, outGLBuffer;
     int i;
     size_t bufferSize;
@@ -168,21 +163,19 @@ int test_buffer_kernel(cl_context context, cl_command_queue queue,
     bufferSize = numElements * vecSize * get_explicit_type_size(vecType);
 
     /* Generate some almost-random input data */
-    gen_input_data(vecType, vecSize * numElements, d, inData);
-    memset(outDataCL, 0, dataSize);
-    memset(outDataGL, 0, dataSize);
+    gen_input_data(vecType, vecSize * numElements, d, inData.data());
 
     /* Generate some GL buffers to go against */
     glGenBuffers(1, &inGLBuffer);
     glGenBuffers(1, &outGLBuffer);
 
     glBindBuffer(GL_ARRAY_BUFFER, inGLBuffer);
-    glBufferData(GL_ARRAY_BUFFER, bufferSize, inData, GL_STATIC_DRAW);
+    glBufferData(GL_ARRAY_BUFFER, bufferSize, inData.data(), GL_STATIC_DRAW);
 
     // Note: we need to bind the output buffer, even though we don't care about
     // its values yet, because CL needs it to get the buffer size
     glBindBuffer(GL_ARRAY_BUFFER, outGLBuffer);
-    glBufferData(GL_ARRAY_BUFFER, bufferSize, outDataGL, GL_STATIC_DRAW);
+    glBufferData(GL_ARRAY_BUFFER, bufferSize, outDataGL.data(), GL_STATIC_DRAW);
 
     glBindBuffer(GL_ARRAY_BUFFER, 0);
     glFinish();
@@ -257,16 +250,16 @@ int test_buffer_kernel(cl_context context, cl_command_queue queue,
     // Get the results from both CL and GL and make sure everything looks
     // correct
     error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, bufferSize,
-                                outDataCL, 0, NULL, NULL);
+                                outDataCL.data(), 0, NULL, NULL);
     test_error(error, "Unable to read output CL array!");
 
     glBindBuffer(GL_ARRAY_BUFFER, outGLBuffer);
     void *glMem = glMapBuffer(GL_ARRAY_BUFFER, GL_READ_ONLY);
-    memcpy(outDataGL, glMem, bufferSize);
+    memcpy(outDataGL.data(), glMem, bufferSize);
     glUnmapBuffer(GL_ARRAY_BUFFER);
 
-    char *inP = (char *)inData, *glP = (char *)outDataGL,
-         *clP = (char *)outDataCL;
+    char *inP = (char *)inData.data(), *glP = (char *)outDataGL.data(),
+         *clP = (char *)outDataCL.data();
     error = 0;
     for (size_t i = 0; i < numElements * vecSize; i++)
     {
diff --git a/test_conformance/relationals/test_shuffles.cpp b/test_conformance/relationals/test_shuffles.cpp
index 223e29e6c2..2fb8ab3be6 100644
--- a/test_conformance/relationals/test_shuffles.cpp
+++ b/test_conformance/relationals/test_shuffles.cpp
@@ -15,7 +15,7 @@
 //
 
 #include <iomanip>
-
+#include <vector>
 #include "testBase.h"
 #include "harness/conversions.h"
 #include "harness/typeWrappers.h"
@@ -618,31 +618,25 @@ int test_shuffle_dual_kernel(cl_context context, cl_command_queue queue,
     if( error != 0 )
         return error;
 
-    typeSize = get_explicit_type_size( vecType );
-
-#if !(defined(_WIN32) && defined (_MSC_VER))
-    cl_long inData[ inVecSize * numOrders ];
-    cl_long inSecondData[ inVecSize * numOrders ];
-    cl_long outData[ outRealVecSize * numOrders ];
-#else
-    cl_long* inData  = (cl_long*)_malloca(inVecSize * numOrders * sizeof(cl_long));
-    cl_long* inSecondData  = (cl_long*)_malloca(inVecSize * numOrders * sizeof(cl_long));
-    cl_long* outData = (cl_long*)_malloca(outRealVecSize * numOrders * sizeof(cl_long));
-#endif
-    memset(outData, 0, outRealVecSize * numOrders * sizeof(cl_long) );
+    typeSize = get_explicit_type_size(vecType);
+    std::vector<cl_long> inData(inVecSize * numOrders);
+    std::vector<cl_long> inSecondData(inVecSize * numOrders);
+    std::vector<cl_long> outData(outRealVecSize * numOrders);
 
-    generate_random_data( vecType, (unsigned int)( numOrders * inVecSize ), d, inData );
+    generate_random_data(vecType, (unsigned int)(numOrders * inVecSize), d,
+                         inData.data());
     if( shuffleMode == kBuiltInDualInputFnMode )
-        generate_random_data( vecType, (unsigned int)( numOrders * inVecSize ), d, inSecondData );
+        generate_random_data(vecType, (unsigned int)(numOrders * inVecSize), d,
+                             inSecondData.data());
 
     streams[0] =
         clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       typeSize * inVecSize * numOrders, inData, &error);
+                       typeSize * inVecSize * numOrders, inData.data(), &error);
     test_error( error, "Unable to create input stream" );
 
-    streams[1] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       typeSize * outRealVecSize * numOrders, outData, &error);
+    streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+                                typeSize * outRealVecSize * numOrders,
+                                outData.data(), &error);
     test_error( error, "Unable to create output stream" );
 
     int argIndex = 0;
@@ -650,7 +644,7 @@ int test_shuffle_dual_kernel(cl_context context, cl_command_queue queue,
     {
         streams[2] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
                                     typeSize * inVecSize * numOrders,
-                                    inSecondData, &error);
+                                    inSecondData.data(), &error);
         test_error( error, "Unable to create second input stream" );
 
         error = clSetKernelArg( kernel, argIndex++, sizeof( streams[ 2 ] ), &streams[ 2 ] );
@@ -675,12 +669,14 @@ int test_shuffle_dual_kernel(cl_context context, cl_command_queue queue,
 
 
     // Read the results back
-    error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, typeSize * numOrders * outRealVecSize, outData, 0, NULL, NULL );
+    error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0,
+                                typeSize * numOrders * outRealVecSize,
+                                outData.data(), 0, NULL, NULL);
     test_error( error, "Unable to read results" );
 
-    unsigned char *inDataPtr = (unsigned char *)inData;
-    unsigned char *inSecondDataPtr = (unsigned char *)inSecondData;
-    unsigned char *outDataPtr = (unsigned char *)outData;
+    unsigned char *inDataPtr = (unsigned char *)inData.data();
+    unsigned char *inSecondDataPtr = (unsigned char *)inSecondData.data();
+    unsigned char *outDataPtr = (unsigned char *)outData.data();
     int ret = 0;
     int errors_printed = 0;
     for( size_t i = 0; i < numOrders; i++ )

From 07ddc66d8a3bc312a33797a94a1bbf9b96d9ef5f Mon Sep 17 00:00:00 2001
From: Haonan Yang <haonan.yang@intel.com>
Date: Wed, 3 Jul 2024 00:36:18 +0800
Subject: [PATCH 13/18] Fix typo for log. (#1995)

This aligns with unary_float.cpp.
---
 .../math_brute_force/macro_unary_double.cpp            | 10 +++++-----
 test_conformance/math_brute_force/unary_double.cpp     |  5 ++---
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp
index 2d75bc5c33..b747b9802f 100644
--- a/test_conformance/math_brute_force/macro_unary_double.cpp
+++ b/test_conformance/math_brute_force/macro_unary_double.cpp
@@ -241,7 +241,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             if (q[j] > t[j]) err = q[j] - t[j];
             vlog_error("\nERROR: %sD: %" PRId64
                        " ulp error at %.13la: *%" PRId64 " vs. %" PRId64 "\n",
-                       name, err, ((double *)gIn)[j], t[j], q[j]);
+                       name, err, s[j], t[j], q[j]);
             return -1;
         }
 
@@ -265,10 +265,10 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
                 cl_ulong err = -t[j] - q[j];
                 if (q[j] > -t[j]) err = q[j] + t[j];
-                vlog_error(
-                    "\nERROR: %sD%s: %" PRId64 " ulp error at %.13la: *%" PRId64
-                    " vs. %" PRId64 "\n",
-                    name, sizeNames[k], err, ((double *)gIn)[j], -t[j], q[j]);
+                vlog_error("\nERROR: %sD%s: %" PRId64
+                           " ulp error at %.13la: *%" PRId64 " vs. %" PRId64
+                           "\n",
+                           name, sizeNames[k], err, s[j], -t[j], q[j]);
                 return -1;
             }
         }
diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp
index 5da18f84b0..f3157fdf60 100644
--- a/test_conformance/math_brute_force/unary_double.cpp
+++ b/test_conformance/math_brute_force/unary_double.cpp
@@ -288,9 +288,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                 {
                     vlog_error("\nERROR: %s%s: %f ulp error at %.13la "
                                "(0x%16.16" PRIx64 "): *%.13la vs. %.13la\n",
-                               job->f->name, sizeNames[k], err,
-                               ((cl_double *)gIn)[j], ((cl_ulong *)gIn)[j],
-                               ((cl_double *)gOut_Ref)[j], test);
+                               job->f->name, sizeNames[k], err, s[j],
+                               ((cl_ulong *)s)[j], ((cl_double *)t)[j], test);
                     return -1;
                 }
             }

From 269e8185f63fc73b6dc17625439c9b85f215a75d Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Tue, 2 Jul 2024 18:37:13 +0200
Subject: [PATCH 14/18] math_brute_force: update README.txt for half testing
 (#1985)

Half precision math functions are fully tested as of #1975.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_conformance/math_brute_force/README.txt | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/test_conformance/math_brute_force/README.txt b/test_conformance/math_brute_force/README.txt
index 5b289868f0..3e9e2b6cf7 100644
--- a/test_conformance/math_brute_force/README.txt
+++ b/test_conformance/math_brute_force/README.txt
@@ -38,10 +38,7 @@ values, followed by a few billion random values. If an error is found in a funct
 the test for that function terminates early, reports an error, and moves on to the 
 next test, if any.
 
-The test currently doesn't support half precision math functions covered in section 
-9 of the OpenCL 1.0 specification, but does cover the half_func functions covered in 
-section six. It also doesn't test the native_<funcname> functions, for which any result 
-is conformant.  
+This test doesn't test the native_<funcname> functions, for which any result is conformant.
 
 For the OpenCL 1.0 time frame, the reference library shall be the operating system 
 math library, as modified by the test itself to conform to the OpenCL specification. 

From 2d8028668f6057f2f924549370e78dd233811fcf Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Tue, 2 Jul 2024 18:37:49 +0200
Subject: [PATCH 15/18] [NFC] api: remove duplicate and unused macros (#1949)

The removed macros were never used.

Use the more common `ARRAY_SIZE` macro instead of defining an identical
`NELEMS` macro.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 .../api/test_kernel_arg_multi_setup.cpp          |  2 --
 test_conformance/api/test_queries.cpp            | 14 --------------
 .../api/test_wg_suggested_local_work_size.cpp    | 16 +---------------
 3 files changed, 1 insertion(+), 31 deletions(-)

diff --git a/test_conformance/api/test_kernel_arg_multi_setup.cpp b/test_conformance/api/test_kernel_arg_multi_setup.cpp
index de3dc15e06..79294bd65c 100644
--- a/test_conformance/api/test_kernel_arg_multi_setup.cpp
+++ b/test_conformance/api/test_kernel_arg_multi_setup.cpp
@@ -27,8 +27,6 @@ const char *multi_arg_kernel_source_pattern =
 "    dst3[tid] = src3[tid];\n"
 "}\n";
 
-#define MAX_ERROR_TOLERANCE 0.0005f
-
 int test_multi_arg_set(cl_device_id device, cl_context context, cl_command_queue queue,
                        ExplicitType vec1Type, int vec1Size,
                        ExplicitType vec2Type, int vec2Size,
diff --git a/test_conformance/api/test_queries.cpp b/test_conformance/api/test_queries.cpp
index f07401077e..a1d8c0218e 100644
--- a/test_conformance/api/test_queries.cpp
+++ b/test_conformance/api/test_queries.cpp
@@ -507,20 +507,6 @@ int test_get_context_info(cl_device_id deviceID, cl_context context, cl_command_
     return -1;
 }
 
-#define TEST_MEM_OBJECT_PARAM( mem, paramName, val, expected, name, type, cast )    \
-error = clGetMemObjectInfo( mem, paramName, sizeof( val ), &val, &size );        \
-test_error( error, "Unable to get mem object " name );                            \
-if( val != expected )                                                                \
-{                                                                                    \
-log_error( "ERROR: Mem object " name " did not validate! (expected " type ", got " type ")\n", (cast)(expected), (cast)val );    \
-return -1;                                                                        \
-}            \
-if( size != sizeof( val ) )                \
-{                                        \
-log_error( "ERROR: Returned size of mem object " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size );    \
-return -1;    \
-}
-
 void CL_CALLBACK mem_obj_destructor_callback( cl_mem, void *data )
 {
     free( data );
diff --git a/test_conformance/api/test_wg_suggested_local_work_size.cpp b/test_conformance/api/test_wg_suggested_local_work_size.cpp
index 2b2a5404fd..6667ffda9a 100644
--- a/test_conformance/api/test_wg_suggested_local_work_size.cpp
+++ b/test_conformance/api/test_wg_suggested_local_work_size.cpp
@@ -24,19 +24,6 @@
 #include "procs.h"
 #include <CL/cl_ext.h>
 
-/** @brief Gets the number of elements of type s in a fixed length array of s */
-#define NELEMS(s) (sizeof(s) / sizeof((s)[0]))
-#define test_error_ret_and_free(errCode, msg, retValue, ptr)                   \
-    {                                                                          \
-        auto errCodeResult = errCode;                                          \
-        if (errCodeResult != CL_SUCCESS)                                       \
-        {                                                                      \
-            print_error(errCodeResult, msg);                                   \
-            free(ptr);                                                         \
-            return retValue;                                                   \
-        }                                                                      \
-    }
-
 const char* wg_scan_local_work_group_size = R"(
     bool is_zero_linear_id()
     {
@@ -107,7 +94,6 @@ bool is_not_even(size_t a) { return (is_prime(a) || (a % 2 == 1)); }
 
 bool is_not_odd(size_t a) { return (is_prime(a) || (a % 2 == 0)); }
 
-#define NELEMS(s) (sizeof(s) / sizeof((s)[0]))
 /* The value_range_nD contains numbers to be used for the experiments with 2D
    and 3D global work sizes. This is because we need smaller numbers so that the
    resulting number of work items is meaningful and does not become too large.
@@ -265,7 +251,7 @@ int do_test_work_group_suggested_local_size(
         // return error if no number is found due to the skip condition
         err = -1;
         unsigned int j = 0;
-        size_t num_elems = NELEMS(value_range_nD);
+        size_t num_elems = ARRAY_SIZE(value_range_nD);
         for (size_t i = start; i < end; i += incr)
         {
             if (skip_cond(i)) continue;

From f775377e6a90b13837028f1c006b858dd99aa234 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Tue, 2 Jul 2024 18:38:36 +0200
Subject: [PATCH 16/18] image_streams: fix -Wformat warnings (#1948)

The main sources of warnings were:

 * Printing of a `size_t` which requires the `%zu` specifier.

* Printing of 64-bit values which is now done using the `PRI*64` macros
to ensure portability across 32 and 64-bit builds.

* Calling log_error with a format string of `"%f %f %f %f"` but
specifying only three arguments.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 .../images/kernel_read_write/test_iterations.cpp         | 9 ++++++---
 .../images/kernel_read_write/test_read_1D.cpp            | 5 ++++-
 .../images/kernel_read_write/test_read_1D_array.cpp      | 6 +++++-
 .../images/kernel_read_write/test_read_2D_array.cpp      | 3 +--
 .../images/kernel_read_write/test_write_1D.cpp           | 4 ++--
 .../images/kernel_read_write/test_write_1D_array.cpp     | 4 ++--
 .../images/kernel_read_write/test_write_2D_array.cpp     | 4 ++--
 .../images/kernel_read_write/test_write_3D.cpp           | 4 ++--
 .../images/kernel_read_write/test_write_image.cpp        | 4 ++--
 9 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/test_conformance/images/kernel_read_write/test_iterations.cpp b/test_conformance/images/kernel_read_write/test_iterations.cpp
index 96f8933363..d30ac0d4e9 100644
--- a/test_conformance/images/kernel_read_write/test_iterations.cpp
+++ b/test_conformance/images/kernel_read_write/test_iterations.cpp
@@ -17,6 +17,7 @@
 #include <float.h>
 
 #include <algorithm>
+#include <cinttypes>
 
 #if defined( __APPLE__ )
     #include <signal.h>
@@ -1481,8 +1482,7 @@ int test_read_image_2D( cl_context context, cl_command_queue queue, cl_kernel ke
         char *imagePtr = (char *)imageValues + nextLevelOffset;
         if( gTestMipmaps )
         {
-            if(gDebugTrace)
-                log_info("\t- Working at mip level %d\n", lod);
+            if (gDebugTrace) log_info("\t- Working at mip level %zu\n", lod);
             error = clSetKernelArg( kernel, idx, sizeof(float), &lod_float);
         }
 
@@ -1743,7 +1743,10 @@ int test_read_image_set_2D(cl_device_id device, cl_context context,
         do
         {
             if( gDebugTrace )
-                log_info( "   at size %d,%d, starting round ramp at %llu for range %llu\n", (int)imageInfo.width, (int)imageInfo.height, gRoundingStartValue, typeRange );
+                log_info("   at size %d,%d, starting round ramp at %" PRIu64
+                         " for range %" PRIu64 "\n",
+                         (int)imageInfo.width, (int)imageInfo.height,
+                         gRoundingStartValue, typeRange);
             int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
             if( retCode )
                 return retCode;
diff --git a/test_conformance/images/kernel_read_write/test_read_1D.cpp b/test_conformance/images/kernel_read_write/test_read_1D.cpp
index 0cbf09891d..cab1fa8e3a 100644
--- a/test_conformance/images/kernel_read_write/test_read_1D.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_1D.cpp
@@ -18,6 +18,7 @@
 #include <float.h>
 
 #include <algorithm>
+#include <cinttypes>
 
 #if defined( __APPLE__ )
     #include <signal.h>
@@ -1151,7 +1152,9 @@ int test_read_image_set_1D(cl_device_id device, cl_context context,
         do
         {
             if( gDebugTrace )
-                log_info( "   at size %d, starting round ramp at %llu for range %llu\n", (int)imageInfo.width, gRoundingStartValue, typeRange );
+                log_info("   at size %d, starting round ramp at %" PRIu64
+                         " for range %" PRIu64 "\n",
+                         (int)imageInfo.width, gRoundingStartValue, typeRange);
             int retCode = test_read_image_1D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
             if( retCode )
                 return retCode;
diff --git a/test_conformance/images/kernel_read_write/test_read_1D_array.cpp b/test_conformance/images/kernel_read_write/test_read_1D_array.cpp
index a8009420e8..d55d1b09b7 100644
--- a/test_conformance/images/kernel_read_write/test_read_1D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_1D_array.cpp
@@ -17,6 +17,7 @@
 #include <float.h>
 
 #include <algorithm>
+#include <cinttypes>
 
 #if defined( __APPLE__ )
 #include <signal.h>
@@ -1261,7 +1262,10 @@ int test_read_image_set_1D_array(cl_device_id device, cl_context context,
         do
         {
             if( gDebugTrace )
-                log_info( "   at size %d,%d, starting round ramp at %llu for range %llu\n", (int)imageInfo.width, (int)imageInfo.arraySize, gRoundingStartValue, typeRange );
+                log_info("   at size %d,%d, starting round ramp at %" PRIu64
+                         " for range %" PRIu64 "\n",
+                         (int)imageInfo.width, (int)imageInfo.arraySize,
+                         gRoundingStartValue, typeRange);
             int retCode = test_read_image_1D_array( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
             if( retCode )
                 return retCode;
diff --git a/test_conformance/images/kernel_read_write/test_read_2D_array.cpp b/test_conformance/images/kernel_read_write/test_read_2D_array.cpp
index 533a0fe837..72f1238d3d 100644
--- a/test_conformance/images/kernel_read_write/test_read_2D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_2D_array.cpp
@@ -542,8 +542,7 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker
         float lod_float = (float)lod;
         if( gTestMipmaps )
         {
-            if(gDebugTrace)
-                log_info(" - Working at mip level %d\n", lod);
+            if (gDebugTrace) log_info(" - Working at mip level %zu\n", lod);
             error = clSetKernelArg( kernel, idx, sizeof(float), &lod_float);
         }
     for( int q = 0; q < loopCount; q++ )
diff --git a/test_conformance/images/kernel_read_write/test_write_1D.cpp b/test_conformance/images/kernel_read_write/test_write_1D.cpp
index 5f7267967e..8e5c15553b 100644
--- a/test_conformance/images/kernel_read_write/test_write_1D.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_1D.cpp
@@ -472,7 +472,7 @@ int test_write_image_1D( cl_device_id device, cl_context context, cl_command_que
                                                   test_value[0] & 0x1F,
                                                   (test_value[0] >> 5) & 0x3F,
                                                   (test_value[0] >> 11) & 0x1F);
-                                        log_error("    Error:    %f %f %f %f\n",
+                                        log_error("    Error:    %f %f %f\n",
                                                   errors[0], errors[1],
                                                   errors[2]);
                                         break;
@@ -497,7 +497,7 @@ int test_write_image_1D( cl_device_id device, cl_context context, cl_command_que
                                                   test_value[0] & 0x1F,
                                                   (test_value[0] >> 5) & 0x1F,
                                                   (test_value[0] >> 10) & 0x1F);
-                                        log_error("    Error:    %f %f %f %f\n",
+                                        log_error("    Error:    %f %f %f\n",
                                                   errors[0], errors[1],
                                                   errors[2]);
                                         break;
diff --git a/test_conformance/images/kernel_read_write/test_write_1D_array.cpp b/test_conformance/images/kernel_read_write/test_write_1D_array.cpp
index f90244052b..a6bf4ec25b 100644
--- a/test_conformance/images/kernel_read_write/test_write_1D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_1D_array.cpp
@@ -493,7 +493,7 @@ int test_write_image_1D_array( cl_device_id device, cl_context context, cl_comma
                                                   test_value[0] & 0x1F,
                                                   (test_value[0] >> 5) & 0x3F,
                                                   (test_value[0] >> 11) & 0x1F);
-                                        log_error("    Error:    %f %f %f %f\n",
+                                        log_error("    Error:    %f %f %f\n",
                                                   errors[0], errors[1],
                                                   errors[2]);
                                         break;
@@ -518,7 +518,7 @@ int test_write_image_1D_array( cl_device_id device, cl_context context, cl_comma
                                                   test_value[0] & 0x1F,
                                                   (test_value[0] >> 5) & 0x1F,
                                                   (test_value[0] >> 10) & 0x1F);
-                                        log_error("    Error:    %f %f %f %f\n",
+                                        log_error("    Error:    %f %f %f\n",
                                                   errors[0], errors[1],
                                                   errors[2]);
                                         break;
diff --git a/test_conformance/images/kernel_read_write/test_write_2D_array.cpp b/test_conformance/images/kernel_read_write/test_write_2D_array.cpp
index c1c5699458..40c90e7be9 100644
--- a/test_conformance/images/kernel_read_write/test_write_2D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_2D_array.cpp
@@ -525,7 +525,7 @@ int test_write_image_2D_array( cl_device_id device, cl_context context, cl_comma
                                                 (test_value[0] >> 5) & 0x3F,
                                                 (test_value[0] >> 11) & 0x1F);
                                             log_error(
-                                                "    Error:    %f %f %f %f\n",
+                                                "    Error:    %f %f %f\n",
                                                 errors[0], errors[1],
                                                 errors[2]);
                                             break;
@@ -554,7 +554,7 @@ int test_write_image_2D_array( cl_device_id device, cl_context context, cl_comma
                                                 (test_value[0] >> 5) & 0x1F,
                                                 (test_value[0] >> 10) & 0x1F);
                                             log_error(
-                                                "    Error:    %f %f %f %f\n",
+                                                "    Error:    %f %f %f\n",
                                                 errors[0], errors[1],
                                                 errors[2]);
                                             break;
diff --git a/test_conformance/images/kernel_read_write/test_write_3D.cpp b/test_conformance/images/kernel_read_write/test_write_3D.cpp
index 9da93695e3..b50ccb6112 100644
--- a/test_conformance/images/kernel_read_write/test_write_3D.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_3D.cpp
@@ -532,7 +532,7 @@ int test_write_image_3D( cl_device_id device, cl_context context, cl_command_que
                                                 (test_value[0] >> 5) & 0x3F,
                                                 (test_value[0] >> 11) & 0x1F);
                                             log_error(
-                                                "    Error:    %f %f %f %f\n",
+                                                "    Error:    %f %f %f\n",
                                                 errors[0], errors[1],
                                                 errors[2]);
                                             break;
@@ -561,7 +561,7 @@ int test_write_image_3D( cl_device_id device, cl_context context, cl_command_que
                                                 (test_value[0] >> 5) & 0x1F,
                                                 (test_value[0] >> 10) & 0x1F);
                                             log_error(
-                                                "    Error:    %f %f %f %f\n",
+                                                "    Error:    %f %f %f\n",
                                                 errors[0], errors[1],
                                                 errors[2]);
                                             break;
diff --git a/test_conformance/images/kernel_read_write/test_write_image.cpp b/test_conformance/images/kernel_read_write/test_write_image.cpp
index 2962697164..69097e3fa5 100644
--- a/test_conformance/images/kernel_read_write/test_write_image.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_image.cpp
@@ -592,7 +592,7 @@ int test_write_image( cl_device_id device, cl_context context, cl_command_queue
                                                   test_value[0] & 0x1F,
                                                   (test_value[0] >> 5) & 0x3F,
                                                   (test_value[0] >> 11) & 0x1F);
-                                        log_error("    Error:    %f %f %f %f\n",
+                                        log_error("    Error:    %f %f %f\n",
                                                   errors[0], errors[1],
                                                   errors[2]);
                                         break;
@@ -618,7 +618,7 @@ int test_write_image( cl_device_id device, cl_context context, cl_command_queue
                                                   test_value[0] & 0x1F,
                                                   (test_value[0] >> 5) & 0x1F,
                                                   (test_value[0] >> 10) & 0x1F);
-                                        log_error("    Error:    %f %f %f %f\n",
+                                        log_error("    Error:    %f %f %f\n",
                                                   errors[0], errors[1],
                                                   errors[2]);
                                         break;

From 38ae617ca442d6d7d286e91db0dcf95c64e9c12a Mon Sep 17 00:00:00 2001
From: Ben Ashbaugh <ben.ashbaugh@intel.com>
Date: Tue, 2 Jul 2024 09:39:33 -0700
Subject: [PATCH 17/18] add testing for OpExpectKHR with boolean sources
 (#1904)

Adds a missing test case for OpExpectKHR with boolean sources.
---
 .../spirv_new/spirv_asm/expect_bool.spvasm32  | 111 +++++++++++++++++
 .../spirv_new/spirv_asm/expect_bool.spvasm64  | 113 ++++++++++++++++++
 .../spirv_new/test_cl_khr_expect_assume.cpp   |  29 +++--
 3 files changed, 245 insertions(+), 8 deletions(-)
 create mode 100644 test_conformance/spirv_new/spirv_asm/expect_bool.spvasm32
 create mode 100644 test_conformance/spirv_new/spirv_asm/expect_bool.spvasm64

diff --git a/test_conformance/spirv_new/spirv_asm/expect_bool.spvasm32 b/test_conformance/spirv_new/spirv_asm/expect_bool.spvasm32
new file mode 100644
index 0000000000..600d64afe1
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/expect_bool.spvasm32
@@ -0,0 +1,111 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos LLVM/SPIR-V Translator; 14
+; Bound: 58
+; Schema: 0
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability Vector16
+               OpCapability ExpectAssumeKHR
+               OpExtension "SPV_KHR_expect_assume"
+          %1 = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical32 OpenCL
+               OpEntryPoint Kernel %expect_bool "expect_bool"
+               OpSource OpenCL_C 102000
+               OpDecorate %dst FuncParamAttr NoCapture
+               OpDecorate %dst Alignment 64
+       %void = OpTypeVoid
+       %bool = OpTypeBool
+      %bool2 = OpTypeVector %bool 2
+      %bool3 = OpTypeVector %bool 3
+      %bool4 = OpTypeVector %bool 4
+      %bool8 = OpTypeVector %bool 8
+     %bool16 = OpTypeVector %bool 16
+       %uint = OpTypeInt 32 0
+      %uint2 = OpTypeVector %uint 2
+      %uint3 = OpTypeVector %uint 3
+      %uint4 = OpTypeVector %uint 4
+      %uint8 = OpTypeVector %uint 8
+     %uint16 = OpTypeVector %uint 16
+     %uint_0 = OpConstantNull %uint
+    %uint2_0 = OpConstantNull %uint2
+    %uint3_0 = OpConstantNull %uint3
+    %uint4_0 = OpConstantNull %uint4
+    %uint8_0 = OpConstantNull %uint8
+   %uint16_0 = OpConstantNull %uint16
+ %bool_false = OpConstantNull %bool
+%bool2_false = OpConstantNull %bool2
+%bool3_false = OpConstantNull %bool3
+%bool4_false = OpConstantNull %bool4
+%bool8_false = OpConstantNull %bool8
+%bool16_false = OpConstantNull %bool16
+    %index_1 = OpConstant %uint 1
+    %index_2 = OpConstant %uint 2
+    %index_3 = OpConstant %uint 3
+    %index_4 = OpConstant %uint 4
+    %index_5 = OpConstant %uint 5
+%_ptr_CrossWorkgroup_uint16 = OpTypePointer CrossWorkgroup %uint16
+          %6 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint16 %uint
+%expect_bool = OpFunction %void None %6
+        %dst = OpFunctionParameter %_ptr_CrossWorkgroup_uint16
+      %value = OpFunctionParameter %uint
+         %10 = OpLabel
+                  ; setup
+  %value_vec = OpCompositeInsert %uint2 %value %uint2_0 0
+                  ; scalar expect:
+                  ;     bool test = value == 0
+                  ;     bool t1e = __builtin_expect(test, false);
+                  ;     int v1e = t1e ? 0 : value
+                  ;     dst[0] = (int16)(v1e, 0, ...);
+       %test = OpIEqual %bool %value %uint_0
+        %t1e = OpExpectKHR %bool %test %bool_false
+        %v1e = OpSelect %uint %t1e %uint_0 %value
+      %v1v16 = OpCompositeInsert %uint16 %v1e %uint16_0 0
+               OpStore %dst %v1v16 Aligned 64
+                  ; vec2 expect:
+                  ;     int2 v2 = (int2)(value);
+                  ;     bool2 test2 = v2 == 0
+                  ;     bool2 t2e = __builtin_expect(test2, false2)
+                  ;     int2 v2e = t2e ? : v2;
+                  ;     dst[1] = (int16)(v2e, 0, ...);
+         %v2 = OpVectorShuffle %uint2 %value_vec %value_vec 0 0
+      %test2 = OpIEqual %bool2 %v2 %uint2_0
+        %t2e = OpExpectKHR %bool2 %test2 %bool2_false
+        %v2e = OpSelect %uint2 %t2e %uint2_0 %v2
+      %v2v16 = OpVectorShuffle %uint16 %v2e %uint2_0 0 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 
+      %dst_1 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint16 %dst %index_1
+               OpStore %dst_1 %v2v16 Aligned 64
+                  ; vec3 expect
+         %v3 = OpVectorShuffle %uint3 %value_vec %value_vec 0 0 0
+      %test3 = OpIEqual %bool3 %v3 %uint3_0
+        %t3e = OpExpectKHR %bool3 %test3 %bool3_false
+        %v3e = OpSelect %uint3 %t3e %uint3_0 %v3
+      %v3v16 = OpVectorShuffle %uint16 %v3e %uint2_0 0 1 2 3 3 3 3 3 3 3 3 3 3 3 3 3
+      %dst_2 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint16 %dst %index_2
+               OpStore %dst_2 %v3v16 Aligned 64
+                  ; vec4 expect
+         %v4 = OpVectorShuffle %uint4 %value_vec %value_vec 0 0 0 0
+      %test4 = OpIEqual %bool4 %v4 %uint4_0
+        %t4e = OpExpectKHR %bool4 %test4 %bool4_false
+        %v4e = OpSelect %uint4 %t4e %uint4_0 %v4
+      %v4v16 = OpVectorShuffle %uint16 %v4e %uint2_0 0 1 2 3 4 4 4 4 4 4 4 4 4 4 4 4
+      %dst_3 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint16 %dst %index_3
+               OpStore %dst_3 %v4v16 Aligned 64
+                  ; vec8 expect
+         %v8 = OpVectorShuffle %uint8 %value_vec %value_vec 0 0 0 0 0 0 0 0
+      %test8 = OpIEqual %bool8 %v8 %uint8_0
+        %t8e = OpExpectKHR %bool8 %test8 %bool8_false
+        %v8e = OpSelect %uint8 %t8e %uint8_0 %v8
+      %v8v16 = OpVectorShuffle %uint16 %v8e %uint2_0 0 1 2 3 4 5 6 7 8 8 8 8 8 8 8 8
+      %dst_4 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint16 %dst %index_4
+               OpStore %dst_4 %v8v16 Aligned 64
+                  ; vec16 expect
+        %v16 = OpVectorShuffle %uint16 %value_vec %value_vec 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+     %test16 = OpIEqual %bool16 %v16 %uint16_0
+       %t16e = OpExpectKHR %bool16 %test16 %bool16_false
+       %v16e = OpSelect %uint16 %t16e %uint16_0 %v16
+      %dst_5 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint16 %dst %index_5
+               OpStore %dst_5 %v16e Aligned 64
+               OpReturn
+               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/expect_bool.spvasm64 b/test_conformance/spirv_new/spirv_asm/expect_bool.spvasm64
new file mode 100644
index 0000000000..f512a3a98e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/expect_bool.spvasm64
@@ -0,0 +1,113 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos LLVM/SPIR-V Translator; 14
+; Bound: 58
+; Schema: 0
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability Vector16
+               OpCapability Int64
+               OpCapability ExpectAssumeKHR
+               OpExtension "SPV_KHR_expect_assume"
+          %1 = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical64 OpenCL
+               OpEntryPoint Kernel %expect_bool "expect_bool"
+               OpSource OpenCL_C 102000
+               OpDecorate %dst FuncParamAttr NoCapture
+               OpDecorate %dst Alignment 64
+       %void = OpTypeVoid
+       %bool = OpTypeBool
+      %bool2 = OpTypeVector %bool 2
+      %bool3 = OpTypeVector %bool 3
+      %bool4 = OpTypeVector %bool 4
+      %bool8 = OpTypeVector %bool 8
+     %bool16 = OpTypeVector %bool 16
+       %uint = OpTypeInt 32 0
+      %uint2 = OpTypeVector %uint 2
+      %uint3 = OpTypeVector %uint 3
+      %uint4 = OpTypeVector %uint 4
+      %uint8 = OpTypeVector %uint 8
+     %uint16 = OpTypeVector %uint 16
+      %ulong = OpTypeInt 64 0
+     %uint_0 = OpConstantNull %uint
+    %uint2_0 = OpConstantNull %uint2
+    %uint3_0 = OpConstantNull %uint3
+    %uint4_0 = OpConstantNull %uint4
+    %uint8_0 = OpConstantNull %uint8
+   %uint16_0 = OpConstantNull %uint16
+ %bool_false = OpConstantNull %bool
+%bool2_false = OpConstantNull %bool2
+%bool3_false = OpConstantNull %bool3
+%bool4_false = OpConstantNull %bool4
+%bool8_false = OpConstantNull %bool8
+%bool16_false = OpConstantNull %bool16
+    %index_1 = OpConstant %ulong 1
+    %index_2 = OpConstant %ulong 2
+    %index_3 = OpConstant %ulong 3
+    %index_4 = OpConstant %ulong 4
+    %index_5 = OpConstant %ulong 5
+%_ptr_CrossWorkgroup_uint16 = OpTypePointer CrossWorkgroup %uint16
+          %6 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint16 %uint
+%expect_bool = OpFunction %void None %6
+        %dst = OpFunctionParameter %_ptr_CrossWorkgroup_uint16
+      %value = OpFunctionParameter %uint
+         %10 = OpLabel
+                  ; setup
+  %value_vec = OpCompositeInsert %uint2 %value %uint2_0 0
+                  ; scalar expect:
+                  ;     bool test = value == 0
+                  ;     bool t1e = __builtin_expect(test, false);
+                  ;     int v1e = t1e ? 0 : value
+                  ;     dst[0] = (int16)(v1e, 0, ...);
+       %test = OpIEqual %bool %value %uint_0
+        %t1e = OpExpectKHR %bool %test %bool_false
+        %v1e = OpSelect %uint %t1e %uint_0 %value
+      %v1v16 = OpCompositeInsert %uint16 %v1e %uint16_0 0
+               OpStore %dst %v1v16 Aligned 64
+                  ; vec2 expect:
+                  ;     int2 v2 = (int2)(value);
+                  ;     bool2 test2 = v2 == 0
+                  ;     bool2 t2e = __builtin_expect(test2, false2)
+                  ;     int2 v2e = t2e ? : v2;
+                  ;     dst[1] = (int16)(v2e, 0, ...);
+         %v2 = OpVectorShuffle %uint2 %value_vec %value_vec 0 0
+      %test2 = OpIEqual %bool2 %v2 %uint2_0
+        %t2e = OpExpectKHR %bool2 %test2 %bool2_false
+        %v2e = OpSelect %uint2 %t2e %uint2_0 %v2
+      %v2v16 = OpVectorShuffle %uint16 %v2e %uint2_0 0 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 
+      %dst_1 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint16 %dst %index_1
+               OpStore %dst_1 %v2v16 Aligned 64
+                  ; vec3 expect
+         %v3 = OpVectorShuffle %uint3 %value_vec %value_vec 0 0 0
+      %test3 = OpIEqual %bool3 %v3 %uint3_0
+        %t3e = OpExpectKHR %bool3 %test3 %bool3_false
+        %v3e = OpSelect %uint3 %t3e %uint3_0 %v3
+      %v3v16 = OpVectorShuffle %uint16 %v3e %uint2_0 0 1 2 3 3 3 3 3 3 3 3 3 3 3 3 3
+      %dst_2 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint16 %dst %index_2
+               OpStore %dst_2 %v3v16 Aligned 64
+                  ; vec4 expect
+         %v4 = OpVectorShuffle %uint4 %value_vec %value_vec 0 0 0 0
+      %test4 = OpIEqual %bool4 %v4 %uint4_0
+        %t4e = OpExpectKHR %bool4 %test4 %bool4_false
+        %v4e = OpSelect %uint4 %t4e %uint4_0 %v4
+      %v4v16 = OpVectorShuffle %uint16 %v4e %uint2_0 0 1 2 3 4 4 4 4 4 4 4 4 4 4 4 4
+      %dst_3 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint16 %dst %index_3
+               OpStore %dst_3 %v4v16 Aligned 64
+                  ; vec8 expect
+         %v8 = OpVectorShuffle %uint8 %value_vec %value_vec 0 0 0 0 0 0 0 0
+      %test8 = OpIEqual %bool8 %v8 %uint8_0
+        %t8e = OpExpectKHR %bool8 %test8 %bool8_false
+        %v8e = OpSelect %uint8 %t8e %uint8_0 %v8
+      %v8v16 = OpVectorShuffle %uint16 %v8e %uint2_0 0 1 2 3 4 5 6 7 8 8 8 8 8 8 8 8
+      %dst_4 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint16 %dst %index_4
+               OpStore %dst_4 %v8v16 Aligned 64
+                  ; vec16 expect
+        %v16 = OpVectorShuffle %uint16 %value_vec %value_vec 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+     %test16 = OpIEqual %bool16 %v16 %uint16_0
+       %t16e = OpExpectKHR %bool16 %test16 %bool16_false
+       %v16e = OpSelect %uint16 %t16e %uint16_0 %v16
+      %dst_5 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint16 %dst %index_5
+               OpStore %dst_5 %v16e Aligned 64
+               OpReturn
+               OpFunctionEnd
diff --git a/test_conformance/spirv_new/test_cl_khr_expect_assume.cpp b/test_conformance/spirv_new/test_cl_khr_expect_assume.cpp
index 05c5068a03..62a3c2baca 100644
--- a/test_conformance/spirv_new/test_cl_khr_expect_assume.cpp
+++ b/test_conformance/spirv_new/test_cl_khr_expect_assume.cpp
@@ -23,36 +23,48 @@ template <typename T> struct TestInfo
 };
 template <> struct TestInfo<cl_char>
 {
+    using argType = cl_char;
     static constexpr const char* typeName = "char";
     static constexpr const char* testName = "expect_char";
 };
 template <> struct TestInfo<cl_short>
 {
+    using argType = cl_short;
     static constexpr const char* typeName = "short";
     static constexpr const char* testName = "expect_short";
 };
 template <> struct TestInfo<cl_int>
 {
+    using argType = cl_int;
     static constexpr const char* typeName = "int";
     static constexpr const char* testName = "expect_int";
 };
 template <> struct TestInfo<cl_long>
 {
+    using argType = cl_long;
     static constexpr const char* typeName = "long";
     static constexpr const char* testName = "expect_long";
 };
+template <> struct TestInfo<cl_bool>
+{
+    using argType = cl_int;
+    static constexpr const char* typeName = "bool";
+    static constexpr const char* testName = "expect_bool";
+};
 
 template <typename T>
 static int test_expect_type(cl_device_id device, cl_context context,
                             cl_command_queue queue)
 {
+    using ArgType = typename TestInfo<T>::argType;
+
     log_info("    testing type %s\n", TestInfo<T>::typeName);
 
-    const T value = 42;
+    const ArgType value = 42;
     cl_int error = CL_SUCCESS;
 
     std::vector<size_t> vecSizes({ 1, 2, 3, 4, 8, 16 });
-    std::vector<T> testData;
+    std::vector<ArgType> testData;
     testData.reserve(16 * vecSizes.size());
 
     for (auto v : vecSizes)
@@ -69,8 +81,8 @@ static int test_expect_type(cl_device_id device, cl_context context,
     }
 
     clMemWrapper dst =
-        clCreateBuffer(context, CL_MEM_WRITE_ONLY, testData.size() * sizeof(T),
-                       nullptr, &error);
+        clCreateBuffer(context, CL_MEM_WRITE_ONLY,
+                       testData.size() * sizeof(ArgType), nullptr, &error);
     test_error(error, "Unable to create destination buffer");
 
     clProgramWrapper prog;
@@ -90,10 +102,10 @@ static int test_expect_type(cl_device_id device, cl_context context,
                                    NULL, NULL);
     test_error(error, "Unable to enqueue kernel");
 
-    std::vector<T> resData(testData.size());
-    error =
-        clEnqueueReadBuffer(queue, dst, CL_TRUE, 0, resData.size() * sizeof(T),
-                            resData.data(), 0, NULL, NULL);
+    std::vector<ArgType> resData(testData.size());
+    error = clEnqueueReadBuffer(queue, dst, CL_TRUE, 0,
+                                resData.size() * sizeof(ArgType),
+                                resData.data(), 0, NULL, NULL);
     test_error(error, "Unable to read destination buffer");
 
     if (resData != testData)
@@ -122,6 +134,7 @@ TEST_SPIRV_FUNC(op_expect)
     {
         result |= test_expect_type<cl_long>(deviceID, context, queue);
     }
+    result |= test_expect_type<cl_bool>(deviceID, context, queue);
 
     return result;
 }

From 6b4d57d85c1f1edadb7d3fff978ce9a47efceb91 Mon Sep 17 00:00:00 2001
From: Julia Jiang <56359287+jujiang-del@users.noreply.github.com>
Date: Tue, 2 Jul 2024 12:40:42 -0400
Subject: [PATCH 18/18] Fix cts build error on mariner OS (#1872)

---
 .../extensions/cl_khr_command_buffer/command_buffer_printf.cpp  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_printf.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_printf.cpp
index eef3e3558b..80fac2ada1 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_printf.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_printf.cpp
@@ -426,7 +426,7 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
                 std::max(min_pattern_length, rand() % max_pattern_length);
 
             std::vector<cl_char> pattern(pattern_length + 1, pattern_character);
-            pattern[pattern_length] = '\0';
+            pattern.back() = '\0';
             simul_passes[i] = { pattern,
                                 { cl_int(i * offset), cl_int(pattern_length) },
                                 std::vector<cl_char>(num_elements