From b4c3bf2af2ec171e28952de65e3c48df3cf02589 Mon Sep 17 00:00:00 2001
From: Ahmed <36049290+AhmedAmraniAkdi@users.noreply.github.com>
Date: Tue, 13 Aug 2024 17:52:07 +0100
Subject: [PATCH] Fixes for basic explicit_s2v and commonfns degrees for
 cl_half (#2024)

Basic explicit_s2v:

The verification step was always using round to even when converting a
float to half even for round to zero cores.

Commonfns degrees:

The verification step was only taking into account infinities and not
values that over/underflow. This resulted in an incorrect error
calculation. E.g:

double cpu_result = 175668.85998711039;
cl_half gpu_result = 31743; // this is 65504 when converting to float,
we overflowed.
float error = (cpu_result - gpu_result) * some_factor;

The fix adds the check if( (cl_half) reference == test ) before
calculating the error.
---
 test_common/harness/conversions.cpp           | 34 ++++++-----
 test_common/harness/conversions.h             |  3 +
 test_conformance/basic/test_explicit_s2v.cpp  | 60 ++++++++++++-------
 .../basic/test_vector_creation.cpp            |  5 +-
 test_conformance/commonfns/test_base.h        | 49 ++++++++-------
 5 files changed, 90 insertions(+), 61 deletions(-)

diff --git a/test_common/harness/conversions.cpp b/test_common/harness/conversions.cpp
index 300d7d4648..18c2869d93 100644
--- a/test_common/harness/conversions.cpp
+++ b/test_common/harness/conversions.cpp
@@ -263,10 +263,11 @@ static Long sLowerLimits[kNumExplicitTypes] = {
         }                                                                      \
         break;
 
-#define TO_HALF_CASE(inType)                                                   \
+#define TO_HALF_CASE(inType, halfRoundingMode)                                 \
     case kHalf:                                                                \
         halfPtr = (cl_half *)outRaw;                                           \
-        *halfPtr = cl_half_from_float((float)(*inType##Ptr), CL_HALF_RTE);     \
+        *halfPtr =                                                             \
+            cl_half_from_float((float)(*inType##Ptr), halfRoundingMode);       \
         break;
 #define TO_FLOAT_CASE(inType)                                                  \
     case kFloat:                                                               \
@@ -453,6 +454,7 @@ typedef unsigned long ulong;
 
 void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
                             bool saturate, RoundingType roundType,
+                            cl_half_rounding_mode halfRoundingMode,
                             ExplicitType outType)
 {
     bool *boolPtr;
@@ -537,7 +539,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
                     SIMPLE_CAST_CASE(schar, kULong, ULong)
                     SIMPLE_CAST_CASE(schar, kUnsignedLong, ULong)
 
-                    TO_HALF_CASE(schar)
+                    TO_HALF_CASE(schar, halfRoundingMode)
                     TO_FLOAT_CASE(schar)
                     TO_DOUBLE_CASE(schar)
 
@@ -570,7 +572,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
                     SIMPLE_CAST_CASE(uchar, kULong, ULong)
                     SIMPLE_CAST_CASE(uchar, kUnsignedLong, ULong)
 
-                    TO_HALF_CASE(uchar)
+                    TO_HALF_CASE(uchar, halfRoundingMode)
                     TO_FLOAT_CASE(uchar)
                     TO_DOUBLE_CASE(uchar)
 
@@ -603,7 +605,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
                     SIMPLE_CAST_CASE(uchar, kULong, ULong)
                     SIMPLE_CAST_CASE(uchar, kUnsignedLong, ULong)
 
-                    TO_HALF_CASE(uchar)
+                    TO_HALF_CASE(uchar, halfRoundingMode)
                     TO_FLOAT_CASE(uchar)
                     TO_DOUBLE_CASE(uchar)
 
@@ -636,7 +638,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
                     SIMPLE_CAST_CASE(short, kULong, ULong)
                     SIMPLE_CAST_CASE(short, kUnsignedLong, ULong)
 
-                    TO_HALF_CASE(short)
+                    TO_HALF_CASE(short, halfRoundingMode)
                     TO_FLOAT_CASE(short)
                     TO_DOUBLE_CASE(short)
 
@@ -669,7 +671,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
                     SIMPLE_CAST_CASE(ushort, kULong, ULong)
                     SIMPLE_CAST_CASE(ushort, kUnsignedLong, ULong)
 
-                    TO_HALF_CASE(ushort)
+                    TO_HALF_CASE(ushort, halfRoundingMode)
                     TO_FLOAT_CASE(ushort)
                     TO_DOUBLE_CASE(ushort)
 
@@ -702,7 +704,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
                     SIMPLE_CAST_CASE(ushort, kULong, ULong)
                     SIMPLE_CAST_CASE(ushort, kUnsignedLong, ULong)
 
-                    TO_HALF_CASE(ushort)
+                    TO_HALF_CASE(ushort, halfRoundingMode)
                     TO_FLOAT_CASE(ushort)
                     TO_DOUBLE_CASE(ushort)
 
@@ -735,7 +737,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
                     SIMPLE_CAST_CASE(int, kULong, ULong)
                     SIMPLE_CAST_CASE(int, kUnsignedLong, ULong)
 
-                    TO_HALF_CASE(int)
+                    TO_HALF_CASE(int, halfRoundingMode)
                     TO_FLOAT_CASE(int)
                     TO_DOUBLE_CASE(int)
 
@@ -768,7 +770,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
                     SIMPLE_CAST_CASE(uint, kULong, ULong)
                     SIMPLE_CAST_CASE(uint, kUnsignedLong, ULong)
 
-                    TO_HALF_CASE(uint)
+                    TO_HALF_CASE(uint, halfRoundingMode)
                     TO_FLOAT_CASE(uint)
                     TO_DOUBLE_CASE(uint)
 
@@ -801,7 +803,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
                     SIMPLE_CAST_CASE(uint, kULong, ULong)
                     SIMPLE_CAST_CASE(uint, kUnsignedLong, ULong)
 
-                    TO_HALF_CASE(uint)
+                    TO_HALF_CASE(uint, halfRoundingMode)
                     TO_FLOAT_CASE(uint)
                     TO_DOUBLE_CASE(uint)
 
@@ -834,7 +836,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
                     DOWN_CAST_CASE(Long, kULong, ULong, saturate)
                     DOWN_CAST_CASE(Long, kUnsignedLong, ULong, saturate)
 
-                    TO_HALF_CASE(Long)
+                    TO_HALF_CASE(Long, halfRoundingMode)
                     TO_FLOAT_CASE(Long)
                     TO_DOUBLE_CASE(Long)
 
@@ -867,7 +869,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
                     U_DOWN_CAST_CASE(ULong, kUnsignedInt, uint, saturate)
                     U_DOWN_CAST_CASE(ULong, kLong, Long, saturate)
 
-                    TO_HALF_CASE(ULong)
+                    TO_HALF_CASE(ULong, halfRoundingMode)
                     TO_FLOAT_CASE(ULong)
                     TO_DOUBLE_CASE(ULong)
 
@@ -900,7 +902,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
                     U_DOWN_CAST_CASE(ULong, kUnsignedInt, uint, saturate)
                     U_DOWN_CAST_CASE(ULong, kLong, Long, saturate)
 
-                    TO_HALF_CASE(ULong)
+                    TO_HALF_CASE(ULong, halfRoundingMode)
                     TO_FLOAT_CASE(ULong)
                     TO_DOUBLE_CASE(ULong)
 
@@ -969,7 +971,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
                 FLOAT_ROUND_CASE(kULong, ULong, roundType, saturate)
                 FLOAT_ROUND_CASE(kUnsignedLong, ULong, roundType, saturate)
 
-                TO_HALF_CASE(float)
+                TO_HALF_CASE(float, halfRoundingMode)
 
                 case kFloat:
                     memcpy(outRaw, inRaw, get_explicit_type_size(inType));
@@ -1003,7 +1005,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
                 DOUBLE_ROUND_CASE(kULong, ULong, roundType, saturate)
                 DOUBLE_ROUND_CASE(kUnsignedLong, ULong, roundType, saturate)
 
-                TO_HALF_CASE(double)
+                TO_HALF_CASE(double, halfRoundingMode)
 
                 TO_FLOAT_CASE(double);
 
diff --git a/test_common/harness/conversions.h b/test_common/harness/conversions.h
index e6880e0552..49108f1ac5 100644
--- a/test_common/harness/conversions.h
+++ b/test_common/harness/conversions.h
@@ -25,6 +25,8 @@
 #include <string.h>
 #include <sys/types.h>
 
+#include <CL/cl_half.h>
+
 /* Note: the next three all have to match in size and order!! */
 
 enum ExplicitTypes
@@ -71,6 +73,7 @@ extern const char *get_explicit_type_name(ExplicitType type);
 extern void convert_explicit_value(void *inRaw, void *outRaw,
                                    ExplicitType inType, bool saturate,
                                    RoundingType roundType,
+                                   cl_half_rounding_mode halfRoundingMode,
                                    ExplicitType outType);
 
 extern void generate_random_data(ExplicitType type, size_t count, MTdata d,
diff --git a/test_conformance/basic/test_explicit_s2v.cpp b/test_conformance/basic/test_explicit_s2v.cpp
index 067afb435b..a061f9bbba 100644
--- a/test_conformance/basic/test_explicit_s2v.cpp
+++ b/test_conformance/basic/test_explicit_s2v.cpp
@@ -24,10 +24,14 @@ using std::isnan;
 #include <sys/stat.h>
 #include <vector>
 
+#include <CL/cl_half.h>
+
 #include "procs.h"
 #include "harness/conversions.h"
 #include "harness/typeWrappers.h"
 
+extern cl_half_rounding_mode halfRoundingMode;
+
 namespace {
 
 // clang-format off
@@ -123,53 +127,60 @@ int test_explicit_s2v_function(cl_context context, cl_command_queue queue,
     unsigned char *inPtr, *outPtr;
     size_t paramSize, destTypeSize;
 
-    paramSize = get_explicit_type_size( srcType );
-    destTypeSize = get_explicit_type_size( destType );
+    paramSize = get_explicit_type_size(srcType);
+    destTypeSize = get_explicit_type_size(destType);
 
     size_t destStride = destTypeSize * vecSize;
     std::vector<char> outData(destStride * count);
 
     streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
                                 paramSize * count, inputData, &error);
-    test_error( error, "clCreateBuffer failed");
+    test_error(error, "clCreateBuffer failed");
     streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, destStride * count,
                                 NULL, &error);
-    test_error( error, "clCreateBuffer failed");
+    test_error(error, "clCreateBuffer failed");
 
     /* Set the arguments */
-    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0] );
-    test_error( error, "Unable to set indexed kernel arguments" );
-    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1] );
-    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
+    test_error(error, "Unable to set indexed kernel arguments");
+    error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]);
+    test_error(error, "Unable to set indexed kernel arguments");
 
     /* Run the kernel */
     threadSize[0] = count;
 
-    error = get_max_common_work_group_size( context, kernel, threadSize[0], &groupSize[0] );
-    test_error( error, "Unable to get work group size to use" );
+    error = get_max_common_work_group_size(context, kernel, threadSize[0],
+                                           &groupSize[0]);
+    test_error(error, "Unable to get work group size to use");
 
-    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threadSize, groupSize, 0, NULL, NULL );
-    test_error( error, "Unable to execute test kernel" );
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threadSize,
+                                   groupSize, 0, NULL, NULL);
+    test_error(error, "Unable to execute test kernel");
 
-    /* Now verify the results. Each value should have been duplicated four times, and we should be able to just
+    /* Now verify the results. Each value should have been duplicated four
+     times, and we should be able to just
      do a memcpy instead of relying on the actual type of data */
     error =
         clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, destStride * count,
                             outData.data(), 0, NULL, NULL);
-    test_error( error, "Unable to read output values!" );
+    test_error(error, "Unable to read output values!");
 
     inPtr = (unsigned char *)inputData;
     outPtr = (unsigned char *)outData.data();
 
-    for( i = 0; i < count; i++ )
+    for (i = 0; i < count; i++)
     {
-        /* Convert the input data element to our output data type to compare against */
-        convert_explicit_value( (void *)inPtr, (void *)convertedData, srcType, false, kDefaultRoundingType, destType );
+        /* Convert the input data element to our output data type to compare
+         * against */
+        convert_explicit_value((void *)inPtr, (void *)convertedData, srcType,
+                               false, kDefaultRoundingType, halfRoundingMode,
+                               destType);
 
         /* Now compare every element of the vector */
-        for( s = 0; s < vecSize; s++ )
+        for (s = 0; s < vecSize; s++)
         {
-            if( memcmp( convertedData, outPtr + destTypeSize * s, destTypeSize ) != 0 )
+            if (memcmp(convertedData, outPtr + destTypeSize * s, destTypeSize)
+                != 0)
             {
                 bool isSrcNaN =
                     (((srcType == kHalf)
@@ -194,9 +205,14 @@ int test_explicit_s2v_function(cl_context context, cl_command_queue queue,
                 }
 
                 unsigned int *p = (unsigned int *)outPtr;
-                log_error( "ERROR: Output value %d:%d does not validate for size %d:%d!\n", i, s, vecSize, (int)destTypeSize );
-                log_error( "       Input:   0x%0*x\n", (int)( paramSize * 2 ), *(unsigned int *)inPtr & ( 0xffffffff >> ( 32 - paramSize * 8 ) ) );
-                log_error( "       Actual:  0x%08x 0x%08x 0x%08x 0x%08x\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
+                log_error("ERROR: Output value %d:%d does not validate for "
+                          "size %d:%d!\n",
+                          i, s, vecSize, (int)destTypeSize);
+                log_error("       Input:   0x%0*x\n", (int)(paramSize * 2),
+                          *(unsigned int *)inPtr
+                              & (0xffffffff >> (32 - paramSize * 8)));
+                log_error("       Actual:  0x%08x 0x%08x 0x%08x 0x%08x\n", p[0],
+                          p[1], p[2], p[3]);
                 return -1;
             }
         }
diff --git a/test_conformance/basic/test_vector_creation.cpp b/test_conformance/basic/test_vector_creation.cpp
index 6bae156acf..79c97f7dd9 100644
--- a/test_conformance/basic/test_vector_creation.cpp
+++ b/test_conformance/basic/test_vector_creation.cpp
@@ -21,6 +21,8 @@
 
 #include <CL/cl_half.h>
 
+extern cl_half_rounding_mode halfRoundingMode;
+
 #define DEBUG 0
 #define DEPTH 16
 // Limit the maximum code size for any given kernel.
@@ -320,7 +322,8 @@ int test_vector_creation(cl_device_id deviceID, cl_context context,
                     &j,
                     ((char *)input_data_converted.data())
                         + get_explicit_type_size(vecType[type_index]) * j,
-                    kInt, 0, kRoundToEven, vecType[type_index]);
+                    kInt, 0, kRoundToEven, halfRoundingMode,
+                    vecType[type_index]);
             }
         }
 
diff --git a/test_conformance/commonfns/test_base.h b/test_conformance/commonfns/test_base.h
index be36ed264b..c7286a92f5 100644
--- a/test_conformance/commonfns/test_base.h
+++ b/test_conformance/commonfns/test_base.h
@@ -151,28 +151,6 @@ struct MixTest : BaseFunctionTest
     cl_int Run() override;
 };
 
-template <typename T> float UlpFn(const T &val, const double &r)
-{
-    if (std::is_same<T, half>::value)
-    {
-        return Ulp_Error_Half(val, r);
-    }
-    else if (std::is_same<T, float>::value)
-    {
-        return Ulp_Error(val, r);
-    }
-    else if (std::is_same<T, double>::value)
-    {
-        return Ulp_Error_Double(val, r);
-    }
-    else
-    {
-        log_error("UlpFn: unsupported data type\n");
-    }
-
-    return -1.f; // wrong val
-}
-
 template <typename T> inline double conv_to_dbl(const T &val)
 {
     if (std::is_same<T, half>::value)
@@ -217,6 +195,33 @@ template <typename T> bool isfinite_fp(const T &v)
     }
 }
 
+template <typename T> float UlpFn(const T &val, const double &r)
+{
+    if (std::is_same<T, half>::value)
+    {
+        if (conv_to_half(r) == val)
+        {
+            return 0.0f;
+        }
+
+        return Ulp_Error_Half(val, r);
+    }
+    else if (std::is_same<T, float>::value)
+    {
+        return Ulp_Error(val, r);
+    }
+    else if (std::is_same<T, double>::value)
+    {
+        return Ulp_Error_Double(val, r);
+    }
+    else
+    {
+        log_error("UlpFn: unsupported data type\n");
+    }
+
+    return -1.f; // wrong val
+}
+
 template <class T>
 int MakeAndRunTest(cl_device_id device, cl_context context,
                    cl_command_queue queue, int num_elements,