From 21ee05ecafde275886a2fd57499cb4000b446dd7 Mon Sep 17 00:00:00 2001
From: Karol Herbst <kherbst@redhat.com>
Date: Tue, 13 Aug 2024 18:18:33 +0200
Subject: [PATCH 1/4] math_brute_force: stop relying on volatile for
 IsTininessDetectedBeforeRounding (#2038)

This makes it literally impossible for drivers to constant fold the
IsTininessDetectedBeforeRounding kernel. Sure, drivers might have should
respect volatile here, but I'm not convinced this is actually required
by the spec in a very strict sense, because here there are no
side-effects possible in the first place.

And as far as I know, constant folding is allowed to give different
results than an actual GPU calculation would.

In any case, passing the constants via kernel arguments makes this
detection more reliable and one doesn't have to wonder why the fma test
is failing.

Side note: this was the last bug (known as of today) I had to fix in
order being able to make a CL CTS submission for Apple Silicon devices.
---
 test_conformance/math_brute_force/main.cpp | 23 +++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp
index d939984e55..e1ea4c2544 100644
--- a/test_conformance/math_brute_force/main.cpp
+++ b/test_conformance/math_brute_force/main.cpp
@@ -1043,13 +1043,14 @@ int IsTininessDetectedBeforeRounding(void)
 {
     int error;
     const char *kernelSource =
-        R"(__kernel void IsTininessDetectedBeforeRounding( __global float *out )
+        R"(__kernel void IsTininessDetectedBeforeRounding( __global float *out, float a, float b )
         {
-           volatile float a = 0x1.000002p-126f;
-           volatile float b = 0x1.fffffcp-1f;
            out[0] = a * b; // product is 0x1.fffffffffff8p-127
         })";
 
+    float a = 0x1.000002p-126f;
+    float b = 0x1.fffffcp-1f;
+
     clProgramWrapper query;
     clKernelWrapper kernel;
     error =
@@ -1073,6 +1074,22 @@ int IsTininessDetectedBeforeRounding(void)
         return error;
     }
 
+    if ((error = clSetKernelArg(kernel, 1, sizeof(a), &a)))
+    {
+        vlog_error("Error: Unable to set kernel arg to detect how tininess is "
+                   "detected  for the device. Err = %d",
+                   error);
+        return error;
+    }
+
+    if ((error = clSetKernelArg(kernel, 2, sizeof(b), &b)))
+    {
+        vlog_error("Error: Unable to set kernel arg to detect how tininess is "
+                   "detected  for the device. Err = %d",
+                   error);
+        return error;
+    }
+
     size_t dim = 1;
     if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &dim, NULL, 0,
                                         NULL, NULL)))

From aa49f3bb53fdd3f8f4c23705bcc5ef5f3724616b Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Tue, 13 Aug 2024 18:50:07 +0200
Subject: [PATCH 2/4] Added few missing format cases for zero and blank space
 padding (#2016)

according to work plan for issue #1058
---
 test_conformance/printf/util_printf.cpp | 32 +++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/test_conformance/printf/util_printf.cpp b/test_conformance/printf/util_printf.cpp
index de5db5430e..03d5eb171c 100644
--- a/test_conformance/printf/util_printf.cpp
+++ b/test_conformance/printf/util_printf.cpp
@@ -80,8 +80,12 @@ std::vector<printDataGenParameters> printIntGenParameters = {
     //(Minimum)Six-wide,Five-digit(zero-filled in absent
     // digits),default(right)-justified
 
-    { { "%06.5i" }, "100" }
+    { { "%06.5i" }, "100" },
 
+    //(Minimum)Ten-wide, left-justified, with a blank space inserted before the
+    // value
+
+    { { "% 10d" }, "42" },
 };
 
 //-----------------------------------------------
@@ -165,6 +169,16 @@ std::vector<printDataGenParameters> printHalfGenParameters = {
     // exponent,left-justified,with sign,capital E,default(right)-justified
 
     { { "%+#21.15E" }, "-65504.0h" },
+
+    //(Minimum)Ten-wide,two positions after the decimal,with
+    // a blank space inserted before the value, default(right)-justified
+
+    { { "% 10.2f" }, "1.25h" },
+
+    //(Minimum)Eight-wide,two positions after the decimal, with
+    // zeros inserted before the value, default(right)-justified
+
+    { { "%08.2f" }, "3.14h" },
 };
 
 //---------------------------------------------------------
@@ -364,6 +378,16 @@ std::vector<printDataGenParameters> printFloatGenParameters = {
     // xh.hhhhpAd style,default(right)-justified
 
     { { "%10.2a" }, "9990.235" },
+
+    //(Minimum)Ten-wide,two positions after the decimal,with
+    // a blank space inserted before the value, default(right)-justified
+
+    { { "% 10.2f" }, "1.25" },
+
+    //(Minimum)Eight-wide,two positions after the decimal,with
+    // zeros inserted before the value, default(right)-justified
+
+    { { "%08.2f" }, "3.14" },
 };
 
 //---------------------------------------------------------
@@ -492,8 +516,12 @@ std::vector<printDataGenParameters> printOctalGenParameters = {
     //(Minimum)Four-wide,Five-digit,0-flag ignored(because of
     // precision),default(right)-justified
 
-    { { "%04.5o" }, "10" }
+    { { "%04.5o" }, "10" },
+
+    //(Minimum)Ten-wide, zeros inserted before the value,
+    // default(right)-justified
 
+    { { "%010o" }, "10" }
 };
 
 //-------------------------------------------------------

From b4c3bf2af2ec171e28952de65e3c48df3cf02589 Mon Sep 17 00:00:00 2001
From: Ahmed <36049290+AhmedAmraniAkdi@users.noreply.github.com>
Date: Tue, 13 Aug 2024 17:52:07 +0100
Subject: [PATCH 3/4] Fixes for basic explicit_s2v and commonfns degrees for
 cl_half (#2024)

Basic explicit_s2v:

The verification step was always using round to even when converting a
float to half even for round to zero cores.

Commonfns degrees:

The verification step was only taking into account infinities and not
values that over/underflow. This resulted in an incorrect error
calculation. E.g:

double cpu_result = 175668.85998711039;
cl_half gpu_result = 31743; // this is 65504 when converting to float,
we overflowed.
float error = (cpu_result - gpu_result) * some_factor;

The fix adds the check if( (cl_half) reference == test ) before
calculating the error.
---
 test_common/harness/conversions.cpp           | 34 ++++++-----
 test_common/harness/conversions.h             |  3 +
 test_conformance/basic/test_explicit_s2v.cpp  | 60 ++++++++++++-------
 .../basic/test_vector_creation.cpp            |  5 +-
 test_conformance/commonfns/test_base.h        | 49 ++++++++-------
 5 files changed, 90 insertions(+), 61 deletions(-)

diff --git a/test_common/harness/conversions.cpp b/test_common/harness/conversions.cpp
index 300d7d4648..18c2869d93 100644
--- a/test_common/harness/conversions.cpp
+++ b/test_common/harness/conversions.cpp
@@ -263,10 +263,11 @@ static Long sLowerLimits[kNumExplicitTypes] = {
         }                                                                      \
         break;
 
-#define TO_HALF_CASE(inType)                                                   \
+#define TO_HALF_CASE(inType, halfRoundingMode)                                 \
     case kHalf:                                                                \
         halfPtr = (cl_half *)outRaw;                                           \
-        *halfPtr = cl_half_from_float((float)(*inType##Ptr), CL_HALF_RTE);     \
+        *halfPtr =                                                             \
+            cl_half_from_float((float)(*inType##Ptr), halfRoundingMode);       \
         break;
 #define TO_FLOAT_CASE(inType)                                                  \
     case kFloat:                                                               \
@@ -453,6 +454,7 @@ typedef unsigned long ulong;
 
 void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
                             bool saturate, RoundingType roundType,
+                            cl_half_rounding_mode halfRoundingMode,
                             ExplicitType outType)
 {
     bool *boolPtr;
@@ -537,7 +539,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
                     SIMPLE_CAST_CASE(schar, kULong, ULong)
                     SIMPLE_CAST_CASE(schar, kUnsignedLong, ULong)
 
-                    TO_HALF_CASE(schar)
+                    TO_HALF_CASE(schar, halfRoundingMode)
                     TO_FLOAT_CASE(schar)
                     TO_DOUBLE_CASE(schar)
 
@@ -570,7 +572,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
                     SIMPLE_CAST_CASE(uchar, kULong, ULong)
                     SIMPLE_CAST_CASE(uchar, kUnsignedLong, ULong)
 
-                    TO_HALF_CASE(uchar)
+                    TO_HALF_CASE(uchar, halfRoundingMode)
                     TO_FLOAT_CASE(uchar)
                     TO_DOUBLE_CASE(uchar)
 
@@ -603,7 +605,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
                     SIMPLE_CAST_CASE(uchar, kULong, ULong)
                     SIMPLE_CAST_CASE(uchar, kUnsignedLong, ULong)
 
-                    TO_HALF_CASE(uchar)
+                    TO_HALF_CASE(uchar, halfRoundingMode)
                     TO_FLOAT_CASE(uchar)
                     TO_DOUBLE_CASE(uchar)
 
@@ -636,7 +638,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
                     SIMPLE_CAST_CASE(short, kULong, ULong)
                     SIMPLE_CAST_CASE(short, kUnsignedLong, ULong)
 
-                    TO_HALF_CASE(short)
+                    TO_HALF_CASE(short, halfRoundingMode)
                     TO_FLOAT_CASE(short)
                     TO_DOUBLE_CASE(short)
 
@@ -669,7 +671,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
                     SIMPLE_CAST_CASE(ushort, kULong, ULong)
                     SIMPLE_CAST_CASE(ushort, kUnsignedLong, ULong)
 
-                    TO_HALF_CASE(ushort)
+                    TO_HALF_CASE(ushort, halfRoundingMode)
                     TO_FLOAT_CASE(ushort)
                     TO_DOUBLE_CASE(ushort)
 
@@ -702,7 +704,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
                     SIMPLE_CAST_CASE(ushort, kULong, ULong)
                     SIMPLE_CAST_CASE(ushort, kUnsignedLong, ULong)
 
-                    TO_HALF_CASE(ushort)
+                    TO_HALF_CASE(ushort, halfRoundingMode)
                     TO_FLOAT_CASE(ushort)
                     TO_DOUBLE_CASE(ushort)
 
@@ -735,7 +737,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
                     SIMPLE_CAST_CASE(int, kULong, ULong)
                     SIMPLE_CAST_CASE(int, kUnsignedLong, ULong)
 
-                    TO_HALF_CASE(int)
+                    TO_HALF_CASE(int, halfRoundingMode)
                     TO_FLOAT_CASE(int)
                     TO_DOUBLE_CASE(int)
 
@@ -768,7 +770,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
                     SIMPLE_CAST_CASE(uint, kULong, ULong)
                     SIMPLE_CAST_CASE(uint, kUnsignedLong, ULong)
 
-                    TO_HALF_CASE(uint)
+                    TO_HALF_CASE(uint, halfRoundingMode)
                     TO_FLOAT_CASE(uint)
                     TO_DOUBLE_CASE(uint)
 
@@ -801,7 +803,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
                     SIMPLE_CAST_CASE(uint, kULong, ULong)
                     SIMPLE_CAST_CASE(uint, kUnsignedLong, ULong)
 
-                    TO_HALF_CASE(uint)
+                    TO_HALF_CASE(uint, halfRoundingMode)
                     TO_FLOAT_CASE(uint)
                     TO_DOUBLE_CASE(uint)
 
@@ -834,7 +836,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
                     DOWN_CAST_CASE(Long, kULong, ULong, saturate)
                     DOWN_CAST_CASE(Long, kUnsignedLong, ULong, saturate)
 
-                    TO_HALF_CASE(Long)
+                    TO_HALF_CASE(Long, halfRoundingMode)
                     TO_FLOAT_CASE(Long)
                     TO_DOUBLE_CASE(Long)
 
@@ -867,7 +869,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
                     U_DOWN_CAST_CASE(ULong, kUnsignedInt, uint, saturate)
                     U_DOWN_CAST_CASE(ULong, kLong, Long, saturate)
 
-                    TO_HALF_CASE(ULong)
+                    TO_HALF_CASE(ULong, halfRoundingMode)
                     TO_FLOAT_CASE(ULong)
                     TO_DOUBLE_CASE(ULong)
 
@@ -900,7 +902,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
                     U_DOWN_CAST_CASE(ULong, kUnsignedInt, uint, saturate)
                     U_DOWN_CAST_CASE(ULong, kLong, Long, saturate)
 
-                    TO_HALF_CASE(ULong)
+                    TO_HALF_CASE(ULong, halfRoundingMode)
                     TO_FLOAT_CASE(ULong)
                     TO_DOUBLE_CASE(ULong)
 
@@ -969,7 +971,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
                 FLOAT_ROUND_CASE(kULong, ULong, roundType, saturate)
                 FLOAT_ROUND_CASE(kUnsignedLong, ULong, roundType, saturate)
 
-                TO_HALF_CASE(float)
+                TO_HALF_CASE(float, halfRoundingMode)
 
                 case kFloat:
                     memcpy(outRaw, inRaw, get_explicit_type_size(inType));
@@ -1003,7 +1005,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
                 DOUBLE_ROUND_CASE(kULong, ULong, roundType, saturate)
                 DOUBLE_ROUND_CASE(kUnsignedLong, ULong, roundType, saturate)
 
-                TO_HALF_CASE(double)
+                TO_HALF_CASE(double, halfRoundingMode)
 
                 TO_FLOAT_CASE(double);
 
diff --git a/test_common/harness/conversions.h b/test_common/harness/conversions.h
index e6880e0552..49108f1ac5 100644
--- a/test_common/harness/conversions.h
+++ b/test_common/harness/conversions.h
@@ -25,6 +25,8 @@
 #include <string.h>
 #include <sys/types.h>
 
+#include <CL/cl_half.h>
+
 /* Note: the next three all have to match in size and order!! */
 
 enum ExplicitTypes
@@ -71,6 +73,7 @@ extern const char *get_explicit_type_name(ExplicitType type);
 extern void convert_explicit_value(void *inRaw, void *outRaw,
                                    ExplicitType inType, bool saturate,
                                    RoundingType roundType,
+                                   cl_half_rounding_mode halfRoundingMode,
                                    ExplicitType outType);
 
 extern void generate_random_data(ExplicitType type, size_t count, MTdata d,
diff --git a/test_conformance/basic/test_explicit_s2v.cpp b/test_conformance/basic/test_explicit_s2v.cpp
index 067afb435b..a061f9bbba 100644
--- a/test_conformance/basic/test_explicit_s2v.cpp
+++ b/test_conformance/basic/test_explicit_s2v.cpp
@@ -24,10 +24,14 @@ using std::isnan;
 #include <sys/stat.h>
 #include <vector>
 
+#include <CL/cl_half.h>
+
 #include "procs.h"
 #include "harness/conversions.h"
 #include "harness/typeWrappers.h"
 
+extern cl_half_rounding_mode halfRoundingMode;
+
 namespace {
 
 // clang-format off
@@ -123,53 +127,60 @@ int test_explicit_s2v_function(cl_context context, cl_command_queue queue,
     unsigned char *inPtr, *outPtr;
     size_t paramSize, destTypeSize;
 
-    paramSize = get_explicit_type_size( srcType );
-    destTypeSize = get_explicit_type_size( destType );
+    paramSize = get_explicit_type_size(srcType);
+    destTypeSize = get_explicit_type_size(destType);
 
     size_t destStride = destTypeSize * vecSize;
     std::vector<char> outData(destStride * count);
 
     streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
                                 paramSize * count, inputData, &error);
-    test_error( error, "clCreateBuffer failed");
+    test_error(error, "clCreateBuffer failed");
     streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, destStride * count,
                                 NULL, &error);
-    test_error( error, "clCreateBuffer failed");
+    test_error(error, "clCreateBuffer failed");
 
     /* Set the arguments */
-    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0] );
-    test_error( error, "Unable to set indexed kernel arguments" );
-    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1] );
-    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
+    test_error(error, "Unable to set indexed kernel arguments");
+    error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]);
+    test_error(error, "Unable to set indexed kernel arguments");
 
     /* Run the kernel */
     threadSize[0] = count;
 
-    error = get_max_common_work_group_size( context, kernel, threadSize[0], &groupSize[0] );
-    test_error( error, "Unable to get work group size to use" );
+    error = get_max_common_work_group_size(context, kernel, threadSize[0],
+                                           &groupSize[0]);
+    test_error(error, "Unable to get work group size to use");
 
-    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threadSize, groupSize, 0, NULL, NULL );
-    test_error( error, "Unable to execute test kernel" );
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threadSize,
+                                   groupSize, 0, NULL, NULL);
+    test_error(error, "Unable to execute test kernel");
 
-    /* Now verify the results. Each value should have been duplicated four times, and we should be able to just
+    /* Now verify the results. Each value should have been duplicated four
+     times, and we should be able to just
      do a memcpy instead of relying on the actual type of data */
     error =
         clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, destStride * count,
                             outData.data(), 0, NULL, NULL);
-    test_error( error, "Unable to read output values!" );
+    test_error(error, "Unable to read output values!");
 
     inPtr = (unsigned char *)inputData;
     outPtr = (unsigned char *)outData.data();
 
-    for( i = 0; i < count; i++ )
+    for (i = 0; i < count; i++)
     {
-        /* Convert the input data element to our output data type to compare against */
-        convert_explicit_value( (void *)inPtr, (void *)convertedData, srcType, false, kDefaultRoundingType, destType );
+        /* Convert the input data element to our output data type to compare
+         * against */
+        convert_explicit_value((void *)inPtr, (void *)convertedData, srcType,
+                               false, kDefaultRoundingType, halfRoundingMode,
+                               destType);
 
         /* Now compare every element of the vector */
-        for( s = 0; s < vecSize; s++ )
+        for (s = 0; s < vecSize; s++)
         {
-            if( memcmp( convertedData, outPtr + destTypeSize * s, destTypeSize ) != 0 )
+            if (memcmp(convertedData, outPtr + destTypeSize * s, destTypeSize)
+                != 0)
             {
                 bool isSrcNaN =
                     (((srcType == kHalf)
@@ -194,9 +205,14 @@ int test_explicit_s2v_function(cl_context context, cl_command_queue queue,
                 }
 
                 unsigned int *p = (unsigned int *)outPtr;
-                log_error( "ERROR: Output value %d:%d does not validate for size %d:%d!\n", i, s, vecSize, (int)destTypeSize );
-                log_error( "       Input:   0x%0*x\n", (int)( paramSize * 2 ), *(unsigned int *)inPtr & ( 0xffffffff >> ( 32 - paramSize * 8 ) ) );
-                log_error( "       Actual:  0x%08x 0x%08x 0x%08x 0x%08x\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
+                log_error("ERROR: Output value %d:%d does not validate for "
+                          "size %d:%d!\n",
+                          i, s, vecSize, (int)destTypeSize);
+                log_error("       Input:   0x%0*x\n", (int)(paramSize * 2),
+                          *(unsigned int *)inPtr
+                              & (0xffffffff >> (32 - paramSize * 8)));
+                log_error("       Actual:  0x%08x 0x%08x 0x%08x 0x%08x\n", p[0],
+                          p[1], p[2], p[3]);
                 return -1;
             }
         }
diff --git a/test_conformance/basic/test_vector_creation.cpp b/test_conformance/basic/test_vector_creation.cpp
index 6bae156acf..79c97f7dd9 100644
--- a/test_conformance/basic/test_vector_creation.cpp
+++ b/test_conformance/basic/test_vector_creation.cpp
@@ -21,6 +21,8 @@
 
 #include <CL/cl_half.h>
 
+extern cl_half_rounding_mode halfRoundingMode;
+
 #define DEBUG 0
 #define DEPTH 16
 // Limit the maximum code size for any given kernel.
@@ -320,7 +322,8 @@ int test_vector_creation(cl_device_id deviceID, cl_context context,
                     &j,
                     ((char *)input_data_converted.data())
                         + get_explicit_type_size(vecType[type_index]) * j,
-                    kInt, 0, kRoundToEven, vecType[type_index]);
+                    kInt, 0, kRoundToEven, halfRoundingMode,
+                    vecType[type_index]);
             }
         }
 
diff --git a/test_conformance/commonfns/test_base.h b/test_conformance/commonfns/test_base.h
index be36ed264b..c7286a92f5 100644
--- a/test_conformance/commonfns/test_base.h
+++ b/test_conformance/commonfns/test_base.h
@@ -151,28 +151,6 @@ struct MixTest : BaseFunctionTest
     cl_int Run() override;
 };
 
-template <typename T> float UlpFn(const T &val, const double &r)
-{
-    if (std::is_same<T, half>::value)
-    {
-        return Ulp_Error_Half(val, r);
-    }
-    else if (std::is_same<T, float>::value)
-    {
-        return Ulp_Error(val, r);
-    }
-    else if (std::is_same<T, double>::value)
-    {
-        return Ulp_Error_Double(val, r);
-    }
-    else
-    {
-        log_error("UlpFn: unsupported data type\n");
-    }
-
-    return -1.f; // wrong val
-}
-
 template <typename T> inline double conv_to_dbl(const T &val)
 {
     if (std::is_same<T, half>::value)
@@ -217,6 +195,33 @@ template <typename T> bool isfinite_fp(const T &v)
     }
 }
 
+template <typename T> float UlpFn(const T &val, const double &r)
+{
+    if (std::is_same<T, half>::value)
+    {
+        if (conv_to_half(r) == val)
+        {
+            return 0.0f;
+        }
+
+        return Ulp_Error_Half(val, r);
+    }
+    else if (std::is_same<T, float>::value)
+    {
+        return Ulp_Error(val, r);
+    }
+    else if (std::is_same<T, double>::value)
+    {
+        return Ulp_Error_Double(val, r);
+    }
+    else
+    {
+        log_error("UlpFn: unsupported data type\n");
+    }
+
+    return -1.f; // wrong val
+}
+
 template <class T>
 int MakeAndRunTest(cl_device_id device, cl_context context,
                    cl_command_queue queue, int num_elements,

From b8981f5fb8be9da3bf802949622499232568faf6 Mon Sep 17 00:00:00 2001
From: joshqti <127994991+joshqti@users.noreply.github.com>
Date: Tue, 13 Aug 2024 09:54:09 -0700
Subject: [PATCH 4/4] [basic] Enable image format aware scanline comparison.
 (#2042)

Use format aware memory comparison instead of memcmp.
---
 test_conformance/basic/test_imagearraycopy.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/test_conformance/basic/test_imagearraycopy.cpp b/test_conformance/basic/test_imagearraycopy.cpp
index f29eac31c3..0ba2ea5234 100644
--- a/test_conformance/basic/test_imagearraycopy.cpp
+++ b/test_conformance/basic/test_imagearraycopy.cpp
@@ -91,7 +91,13 @@ int test_imagearraycopy_single_format(cl_device_id device, cl_context context,
     err = clReleaseEvent(copyevent);
     test_error(err, "clReleaseEvent failed");
 
-    if (memcmp(imgptr, bufptr, buffer_size) != 0)
+    image_descriptor compareImageInfo = { 0 };
+    compareImageInfo.format = format;
+    compareImageInfo.width = buffer_size / get_pixel_size(format);
+    size_t where = compare_scanlines(&compareImageInfo,
+                                     reinterpret_cast<const char *>(imgptr),
+                                     reinterpret_cast<const char *>(bufptr));
+    if (where < compareImageInfo.width)
     {
         log_error("ERROR: Results did not validate!\n");
         auto inchar = static_cast<unsigned char *>(imgptr);