From b4c3bf2af2ec171e28952de65e3c48df3cf02589 Mon Sep 17 00:00:00 2001 From: Ahmed <36049290+AhmedAmraniAkdi@users.noreply.github.com> Date: Tue, 13 Aug 2024 17:52:07 +0100 Subject: [PATCH] Fixes for basic explicit_s2v and commonfns degrees for cl_half (#2024) Basic explicit_s2v: The verification step was always using round to even when converting a float to half even for round to zero cores. Commonfns degrees: The verification step was only taking into account infinities and not values that over/underflow. This resulted in an incorrect error calculation. E.g: double cpu_result = 175668.85998711039; cl_half gpu_result = 31743; // this is 65504 when converting to float, we overflowed. float error = (cpu_result - gpu_result) * some_factor; The fix adds the check if( (cl_half) reference == test ) before calculating the error. --- test_common/harness/conversions.cpp | 34 ++++++----- test_common/harness/conversions.h | 3 + test_conformance/basic/test_explicit_s2v.cpp | 60 ++++++++++++------- .../basic/test_vector_creation.cpp | 5 +- test_conformance/commonfns/test_base.h | 49 ++++++++------- 5 files changed, 90 insertions(+), 61 deletions(-) diff --git a/test_common/harness/conversions.cpp b/test_common/harness/conversions.cpp index 300d7d4648..18c2869d93 100644 --- a/test_common/harness/conversions.cpp +++ b/test_common/harness/conversions.cpp @@ -263,10 +263,11 @@ static Long sLowerLimits[kNumExplicitTypes] = { } \ break; -#define TO_HALF_CASE(inType) \ +#define TO_HALF_CASE(inType, halfRoundingMode) \ case kHalf: \ halfPtr = (cl_half *)outRaw; \ - *halfPtr = cl_half_from_float((float)(*inType##Ptr), CL_HALF_RTE); \ + *halfPtr = \ + cl_half_from_float((float)(*inType##Ptr), halfRoundingMode); \ break; #define TO_FLOAT_CASE(inType) \ case kFloat: \ @@ -453,6 +454,7 @@ typedef unsigned long ulong; void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType, bool saturate, RoundingType roundType, + cl_half_rounding_mode halfRoundingMode, ExplicitType outType) { bool *boolPtr; @@ -537,7 +539,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType, SIMPLE_CAST_CASE(schar, kULong, ULong) SIMPLE_CAST_CASE(schar, kUnsignedLong, ULong) - TO_HALF_CASE(schar) + TO_HALF_CASE(schar, halfRoundingMode) TO_FLOAT_CASE(schar) TO_DOUBLE_CASE(schar) @@ -570,7 +572,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType, SIMPLE_CAST_CASE(uchar, kULong, ULong) SIMPLE_CAST_CASE(uchar, kUnsignedLong, ULong) - TO_HALF_CASE(uchar) + TO_HALF_CASE(uchar, halfRoundingMode) TO_FLOAT_CASE(uchar) TO_DOUBLE_CASE(uchar) @@ -603,7 +605,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType, SIMPLE_CAST_CASE(uchar, kULong, ULong) SIMPLE_CAST_CASE(uchar, kUnsignedLong, ULong) - TO_HALF_CASE(uchar) + TO_HALF_CASE(uchar, halfRoundingMode) TO_FLOAT_CASE(uchar) TO_DOUBLE_CASE(uchar) @@ -636,7 +638,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType, SIMPLE_CAST_CASE(short, kULong, ULong) SIMPLE_CAST_CASE(short, kUnsignedLong, ULong) - TO_HALF_CASE(short) + TO_HALF_CASE(short, halfRoundingMode) TO_FLOAT_CASE(short) TO_DOUBLE_CASE(short) @@ -669,7 +671,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType, SIMPLE_CAST_CASE(ushort, kULong, ULong) SIMPLE_CAST_CASE(ushort, kUnsignedLong, ULong) - TO_HALF_CASE(ushort) + TO_HALF_CASE(ushort, halfRoundingMode) TO_FLOAT_CASE(ushort) TO_DOUBLE_CASE(ushort) @@ -702,7 +704,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType, SIMPLE_CAST_CASE(ushort, kULong, ULong) SIMPLE_CAST_CASE(ushort, kUnsignedLong, ULong) - TO_HALF_CASE(ushort) + TO_HALF_CASE(ushort, halfRoundingMode) TO_FLOAT_CASE(ushort) TO_DOUBLE_CASE(ushort) @@ -735,7 +737,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType, SIMPLE_CAST_CASE(int, kULong, ULong) SIMPLE_CAST_CASE(int, kUnsignedLong, ULong) - TO_HALF_CASE(int) + TO_HALF_CASE(int, halfRoundingMode) TO_FLOAT_CASE(int) TO_DOUBLE_CASE(int) @@ -768,7 +770,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType, SIMPLE_CAST_CASE(uint, kULong, ULong) SIMPLE_CAST_CASE(uint, kUnsignedLong, ULong) - TO_HALF_CASE(uint) + TO_HALF_CASE(uint, halfRoundingMode) TO_FLOAT_CASE(uint) TO_DOUBLE_CASE(uint) @@ -801,7 +803,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType, SIMPLE_CAST_CASE(uint, kULong, ULong) SIMPLE_CAST_CASE(uint, kUnsignedLong, ULong) - TO_HALF_CASE(uint) + TO_HALF_CASE(uint, halfRoundingMode) TO_FLOAT_CASE(uint) TO_DOUBLE_CASE(uint) @@ -834,7 +836,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType, DOWN_CAST_CASE(Long, kULong, ULong, saturate) DOWN_CAST_CASE(Long, kUnsignedLong, ULong, saturate) - TO_HALF_CASE(Long) + TO_HALF_CASE(Long, halfRoundingMode) TO_FLOAT_CASE(Long) TO_DOUBLE_CASE(Long) @@ -867,7 +869,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType, U_DOWN_CAST_CASE(ULong, kUnsignedInt, uint, saturate) U_DOWN_CAST_CASE(ULong, kLong, Long, saturate) - TO_HALF_CASE(ULong) + TO_HALF_CASE(ULong, halfRoundingMode) TO_FLOAT_CASE(ULong) TO_DOUBLE_CASE(ULong) @@ -900,7 +902,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType, U_DOWN_CAST_CASE(ULong, kUnsignedInt, uint, saturate) U_DOWN_CAST_CASE(ULong, kLong, Long, saturate) - TO_HALF_CASE(ULong) + TO_HALF_CASE(ULong, halfRoundingMode) TO_FLOAT_CASE(ULong) TO_DOUBLE_CASE(ULong) @@ -969,7 +971,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType, FLOAT_ROUND_CASE(kULong, ULong, roundType, saturate) FLOAT_ROUND_CASE(kUnsignedLong, ULong, roundType, saturate) - TO_HALF_CASE(float) + TO_HALF_CASE(float, halfRoundingMode) case kFloat: memcpy(outRaw, inRaw, get_explicit_type_size(inType)); @@ -1003,7 +1005,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType, DOUBLE_ROUND_CASE(kULong, ULong, roundType, saturate) DOUBLE_ROUND_CASE(kUnsignedLong, ULong, roundType, saturate) - TO_HALF_CASE(double) + TO_HALF_CASE(double, halfRoundingMode) TO_FLOAT_CASE(double); diff --git a/test_common/harness/conversions.h b/test_common/harness/conversions.h index e6880e0552..49108f1ac5 100644 --- a/test_common/harness/conversions.h +++ b/test_common/harness/conversions.h @@ -25,6 +25,8 @@ #include #include +#include + /* Note: the next three all have to match in size and order!! */ enum ExplicitTypes @@ -71,6 +73,7 @@ extern const char *get_explicit_type_name(ExplicitType type); extern void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType, bool saturate, RoundingType roundType, + cl_half_rounding_mode halfRoundingMode, ExplicitType outType); extern void generate_random_data(ExplicitType type, size_t count, MTdata d, diff --git a/test_conformance/basic/test_explicit_s2v.cpp b/test_conformance/basic/test_explicit_s2v.cpp index 067afb435b..a061f9bbba 100644 --- a/test_conformance/basic/test_explicit_s2v.cpp +++ b/test_conformance/basic/test_explicit_s2v.cpp @@ -24,10 +24,14 @@ using std::isnan; #include #include +#include + #include "procs.h" #include "harness/conversions.h" #include "harness/typeWrappers.h" +extern cl_half_rounding_mode halfRoundingMode; + namespace { // clang-format off @@ -123,53 +127,60 @@ int test_explicit_s2v_function(cl_context context, cl_command_queue queue, unsigned char *inPtr, *outPtr; size_t paramSize, destTypeSize; - paramSize = get_explicit_type_size( srcType ); - destTypeSize = get_explicit_type_size( destType ); + paramSize = get_explicit_type_size(srcType); + destTypeSize = get_explicit_type_size(destType); size_t destStride = destTypeSize * vecSize; std::vector outData(destStride * count); streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, paramSize * count, inputData, &error); - test_error( error, "clCreateBuffer failed"); + test_error(error, "clCreateBuffer failed"); streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, destStride * count, NULL, &error); - test_error( error, "clCreateBuffer failed"); + test_error(error, "clCreateBuffer failed"); /* Set the arguments */ - error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0] ); - test_error( error, "Unable to set indexed kernel arguments" ); - error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1] ); - test_error( error, "Unable to set indexed kernel arguments" ); + error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]); + test_error(error, "Unable to set indexed kernel arguments"); + error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]); + test_error(error, "Unable to set indexed kernel arguments"); /* Run the kernel */ threadSize[0] = count; - error = get_max_common_work_group_size( context, kernel, threadSize[0], &groupSize[0] ); - test_error( error, "Unable to get work group size to use" ); + error = get_max_common_work_group_size(context, kernel, threadSize[0], + &groupSize[0]); + test_error(error, "Unable to get work group size to use"); - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threadSize, groupSize, 0, NULL, NULL ); - test_error( error, "Unable to execute test kernel" ); + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threadSize, + groupSize, 0, NULL, NULL); + test_error(error, "Unable to execute test kernel"); - /* Now verify the results. Each value should have been duplicated four times, and we should be able to just + /* Now verify the results. Each value should have been duplicated four + times, and we should be able to just do a memcpy instead of relying on the actual type of data */ error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, destStride * count, outData.data(), 0, NULL, NULL); - test_error( error, "Unable to read output values!" ); + test_error(error, "Unable to read output values!"); inPtr = (unsigned char *)inputData; outPtr = (unsigned char *)outData.data(); - for( i = 0; i < count; i++ ) + for (i = 0; i < count; i++) { - /* Convert the input data element to our output data type to compare against */ - convert_explicit_value( (void *)inPtr, (void *)convertedData, srcType, false, kDefaultRoundingType, destType ); + /* Convert the input data element to our output data type to compare + * against */ + convert_explicit_value((void *)inPtr, (void *)convertedData, srcType, + false, kDefaultRoundingType, halfRoundingMode, + destType); /* Now compare every element of the vector */ - for( s = 0; s < vecSize; s++ ) + for (s = 0; s < vecSize; s++) { - if( memcmp( convertedData, outPtr + destTypeSize * s, destTypeSize ) != 0 ) + if (memcmp(convertedData, outPtr + destTypeSize * s, destTypeSize) + != 0) { bool isSrcNaN = (((srcType == kHalf) @@ -194,9 +205,14 @@ int test_explicit_s2v_function(cl_context context, cl_command_queue queue, } unsigned int *p = (unsigned int *)outPtr; - log_error( "ERROR: Output value %d:%d does not validate for size %d:%d!\n", i, s, vecSize, (int)destTypeSize ); - log_error( " Input: 0x%0*x\n", (int)( paramSize * 2 ), *(unsigned int *)inPtr & ( 0xffffffff >> ( 32 - paramSize * 8 ) ) ); - log_error( " Actual: 0x%08x 0x%08x 0x%08x 0x%08x\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] ); + log_error("ERROR: Output value %d:%d does not validate for " + "size %d:%d!\n", + i, s, vecSize, (int)destTypeSize); + log_error(" Input: 0x%0*x\n", (int)(paramSize * 2), + *(unsigned int *)inPtr + & (0xffffffff >> (32 - paramSize * 8))); + log_error(" Actual: 0x%08x 0x%08x 0x%08x 0x%08x\n", p[0], + p[1], p[2], p[3]); return -1; } } diff --git a/test_conformance/basic/test_vector_creation.cpp b/test_conformance/basic/test_vector_creation.cpp index 6bae156acf..79c97f7dd9 100644 --- a/test_conformance/basic/test_vector_creation.cpp +++ b/test_conformance/basic/test_vector_creation.cpp @@ -21,6 +21,8 @@ #include +extern cl_half_rounding_mode halfRoundingMode; + #define DEBUG 0 #define DEPTH 16 // Limit the maximum code size for any given kernel. @@ -320,7 +322,8 @@ int test_vector_creation(cl_device_id deviceID, cl_context context, &j, ((char *)input_data_converted.data()) + get_explicit_type_size(vecType[type_index]) * j, - kInt, 0, kRoundToEven, vecType[type_index]); + kInt, 0, kRoundToEven, halfRoundingMode, + vecType[type_index]); } } diff --git a/test_conformance/commonfns/test_base.h b/test_conformance/commonfns/test_base.h index be36ed264b..c7286a92f5 100644 --- a/test_conformance/commonfns/test_base.h +++ b/test_conformance/commonfns/test_base.h @@ -151,28 +151,6 @@ struct MixTest : BaseFunctionTest cl_int Run() override; }; -template float UlpFn(const T &val, const double &r) -{ - if (std::is_same::value) - { - return Ulp_Error_Half(val, r); - } - else if (std::is_same::value) - { - return Ulp_Error(val, r); - } - else if (std::is_same::value) - { - return Ulp_Error_Double(val, r); - } - else - { - log_error("UlpFn: unsupported data type\n"); - } - - return -1.f; // wrong val -} - template inline double conv_to_dbl(const T &val) { if (std::is_same::value) @@ -217,6 +195,33 @@ template bool isfinite_fp(const T &v) } } +template float UlpFn(const T &val, const double &r) +{ + if (std::is_same::value) + { + if (conv_to_half(r) == val) + { + return 0.0f; + } + + return Ulp_Error_Half(val, r); + } + else if (std::is_same::value) + { + return Ulp_Error(val, r); + } + else if (std::is_same::value) + { + return Ulp_Error_Double(val, r); + } + else + { + log_error("UlpFn: unsupported data type\n"); + } + + return -1.f; // wrong val +} + template int MakeAndRunTest(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements,