From 21ee05ecafde275886a2fd57499cb4000b446dd7 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Tue, 13 Aug 2024 18:18:33 +0200 Subject: [PATCH 1/4] math_brute_force: stop relying on volatile for IsTininessDetectedBeforeRounding (#2038) This makes it literally impossible for drivers to constant fold the IsTininessDetectedBeforeRounding kernel. Sure, drivers might have should respect volatile here, but I'm not convinced this is actually required by the spec in a very strict sense, because here there are no side-effects possible in the first place. And as far as I know, constant folding is allowed to give different results than an actual GPU calculation would. In any case, passing the constants via kernel arguments makes this detection more reliable and one doesn't have to wonder why the fma test is failing. Side note: this was the last bug (known as of today) I had to fix in order being able to make a CL CTS submission for Apple Silicon devices. --- test_conformance/math_brute_force/main.cpp | 23 +++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp index d939984e55..e1ea4c2544 100644 --- a/test_conformance/math_brute_force/main.cpp +++ b/test_conformance/math_brute_force/main.cpp @@ -1043,13 +1043,14 @@ int IsTininessDetectedBeforeRounding(void) { int error; const char *kernelSource = - R"(__kernel void IsTininessDetectedBeforeRounding( __global float *out ) + R"(__kernel void IsTininessDetectedBeforeRounding( __global float *out, float a, float b ) { - volatile float a = 0x1.000002p-126f; - volatile float b = 0x1.fffffcp-1f; out[0] = a * b; // product is 0x1.fffffffffff8p-127 })"; + float a = 0x1.000002p-126f; + float b = 0x1.fffffcp-1f; + clProgramWrapper query; clKernelWrapper kernel; error = @@ -1073,6 +1074,22 @@ int IsTininessDetectedBeforeRounding(void) return error; } + if ((error = clSetKernelArg(kernel, 1, sizeof(a), &a))) + { + vlog_error("Error: Unable to set kernel arg to detect how tininess is " + "detected for the device. Err = %d", + error); + return error; + } + + if ((error = clSetKernelArg(kernel, 2, sizeof(b), &b))) + { + vlog_error("Error: Unable to set kernel arg to detect how tininess is " + "detected for the device. Err = %d", + error); + return error; + } + size_t dim = 1; if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &dim, NULL, 0, NULL, NULL))) From aa49f3bb53fdd3f8f4c23705bcc5ef5f3724616b Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 13 Aug 2024 18:50:07 +0200 Subject: [PATCH 2/4] Added few missing format cases for zero and blank space padding (#2016) according to work plan for issue #1058 --- test_conformance/printf/util_printf.cpp | 32 +++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/test_conformance/printf/util_printf.cpp b/test_conformance/printf/util_printf.cpp index de5db5430e..03d5eb171c 100644 --- a/test_conformance/printf/util_printf.cpp +++ b/test_conformance/printf/util_printf.cpp @@ -80,8 +80,12 @@ std::vector printIntGenParameters = { //(Minimum)Six-wide,Five-digit(zero-filled in absent // digits),default(right)-justified - { { "%06.5i" }, "100" } + { { "%06.5i" }, "100" }, + //(Minimum)Ten-wide, left-justified, with a blank space inserted before the + // value + + { { "% 10d" }, "42" }, }; //----------------------------------------------- @@ -165,6 +169,16 @@ std::vector printHalfGenParameters = { // exponent,left-justified,with sign,capital E,default(right)-justified { { "%+#21.15E" }, "-65504.0h" }, + + //(Minimum)Ten-wide,two positions after the decimal,with + // a blank space inserted before the value, default(right)-justified + + { { "% 10.2f" }, "1.25h" }, + + //(Minimum)Eight-wide,two positions after the decimal, with + // zeros inserted before the value, default(right)-justified + + { { "%08.2f" }, "3.14h" }, }; //--------------------------------------------------------- @@ -364,6 +378,16 @@ std::vector printFloatGenParameters = { // xh.hhhhpAd style,default(right)-justified { { "%10.2a" }, "9990.235" }, + + //(Minimum)Ten-wide,two positions after the decimal,with + // a blank space inserted before the value, default(right)-justified + + { { "% 10.2f" }, "1.25" }, + + //(Minimum)Eight-wide,two positions after the decimal,with + // zeros inserted before the value, default(right)-justified + + { { "%08.2f" }, "3.14" }, }; //--------------------------------------------------------- @@ -492,8 +516,12 @@ std::vector printOctalGenParameters = { //(Minimum)Four-wide,Five-digit,0-flag ignored(because of // precision),default(right)-justified - { { "%04.5o" }, "10" } + { { "%04.5o" }, "10" }, + + //(Minimum)Ten-wide, zeros inserted before the value, + // default(right)-justified + { { "%010o" }, "10" } }; //------------------------------------------------------- From b4c3bf2af2ec171e28952de65e3c48df3cf02589 Mon Sep 17 00:00:00 2001 From: Ahmed <36049290+AhmedAmraniAkdi@users.noreply.github.com> Date: Tue, 13 Aug 2024 17:52:07 +0100 Subject: [PATCH 3/4] Fixes for basic explicit_s2v and commonfns degrees for cl_half (#2024) Basic explicit_s2v: The verification step was always using round to even when converting a float to half even for round to zero cores. Commonfns degrees: The verification step was only taking into account infinities and not values that over/underflow. This resulted in an incorrect error calculation. E.g: double cpu_result = 175668.85998711039; cl_half gpu_result = 31743; // this is 65504 when converting to float, we overflowed. float error = (cpu_result - gpu_result) * some_factor; The fix adds the check if( (cl_half) reference == test ) before calculating the error. --- test_common/harness/conversions.cpp | 34 ++++++----- test_common/harness/conversions.h | 3 + test_conformance/basic/test_explicit_s2v.cpp | 60 ++++++++++++------- .../basic/test_vector_creation.cpp | 5 +- test_conformance/commonfns/test_base.h | 49 ++++++++------- 5 files changed, 90 insertions(+), 61 deletions(-) diff --git a/test_common/harness/conversions.cpp b/test_common/harness/conversions.cpp index 300d7d4648..18c2869d93 100644 --- a/test_common/harness/conversions.cpp +++ b/test_common/harness/conversions.cpp @@ -263,10 +263,11 @@ static Long sLowerLimits[kNumExplicitTypes] = { } \ break; -#define TO_HALF_CASE(inType) \ +#define TO_HALF_CASE(inType, halfRoundingMode) \ case kHalf: \ halfPtr = (cl_half *)outRaw; \ - *halfPtr = cl_half_from_float((float)(*inType##Ptr), CL_HALF_RTE); \ + *halfPtr = \ + cl_half_from_float((float)(*inType##Ptr), halfRoundingMode); \ break; #define TO_FLOAT_CASE(inType) \ case kFloat: \ @@ -453,6 +454,7 @@ typedef unsigned long ulong; void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType, bool saturate, RoundingType roundType, + cl_half_rounding_mode halfRoundingMode, ExplicitType outType) { bool *boolPtr; @@ -537,7 +539,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType, SIMPLE_CAST_CASE(schar, kULong, ULong) SIMPLE_CAST_CASE(schar, kUnsignedLong, ULong) - TO_HALF_CASE(schar) + TO_HALF_CASE(schar, halfRoundingMode) TO_FLOAT_CASE(schar) TO_DOUBLE_CASE(schar) @@ -570,7 +572,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType, SIMPLE_CAST_CASE(uchar, kULong, ULong) SIMPLE_CAST_CASE(uchar, kUnsignedLong, ULong) - TO_HALF_CASE(uchar) + TO_HALF_CASE(uchar, halfRoundingMode) TO_FLOAT_CASE(uchar) TO_DOUBLE_CASE(uchar) @@ -603,7 +605,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType, SIMPLE_CAST_CASE(uchar, kULong, ULong) SIMPLE_CAST_CASE(uchar, kUnsignedLong, ULong) - TO_HALF_CASE(uchar) + TO_HALF_CASE(uchar, halfRoundingMode) TO_FLOAT_CASE(uchar) TO_DOUBLE_CASE(uchar) @@ -636,7 +638,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType, SIMPLE_CAST_CASE(short, kULong, ULong) SIMPLE_CAST_CASE(short, kUnsignedLong, ULong) - TO_HALF_CASE(short) + TO_HALF_CASE(short, halfRoundingMode) TO_FLOAT_CASE(short) TO_DOUBLE_CASE(short) @@ -669,7 +671,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType, SIMPLE_CAST_CASE(ushort, kULong, ULong) SIMPLE_CAST_CASE(ushort, kUnsignedLong, ULong) - TO_HALF_CASE(ushort) + TO_HALF_CASE(ushort, halfRoundingMode) TO_FLOAT_CASE(ushort) TO_DOUBLE_CASE(ushort) @@ -702,7 +704,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType, SIMPLE_CAST_CASE(ushort, kULong, ULong) SIMPLE_CAST_CASE(ushort, kUnsignedLong, ULong) - TO_HALF_CASE(ushort) + TO_HALF_CASE(ushort, halfRoundingMode) TO_FLOAT_CASE(ushort) TO_DOUBLE_CASE(ushort) @@ -735,7 +737,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType, SIMPLE_CAST_CASE(int, kULong, ULong) SIMPLE_CAST_CASE(int, kUnsignedLong, ULong) - TO_HALF_CASE(int) + TO_HALF_CASE(int, halfRoundingMode) TO_FLOAT_CASE(int) TO_DOUBLE_CASE(int) @@ -768,7 +770,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType, SIMPLE_CAST_CASE(uint, kULong, ULong) SIMPLE_CAST_CASE(uint, kUnsignedLong, ULong) - TO_HALF_CASE(uint) + TO_HALF_CASE(uint, halfRoundingMode) TO_FLOAT_CASE(uint) TO_DOUBLE_CASE(uint) @@ -801,7 +803,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType, SIMPLE_CAST_CASE(uint, kULong, ULong) SIMPLE_CAST_CASE(uint, kUnsignedLong, ULong) - TO_HALF_CASE(uint) + TO_HALF_CASE(uint, halfRoundingMode) TO_FLOAT_CASE(uint) TO_DOUBLE_CASE(uint) @@ -834,7 +836,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType, DOWN_CAST_CASE(Long, kULong, ULong, saturate) DOWN_CAST_CASE(Long, kUnsignedLong, ULong, saturate) - TO_HALF_CASE(Long) + TO_HALF_CASE(Long, halfRoundingMode) TO_FLOAT_CASE(Long) TO_DOUBLE_CASE(Long) @@ -867,7 +869,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType, U_DOWN_CAST_CASE(ULong, kUnsignedInt, uint, saturate) U_DOWN_CAST_CASE(ULong, kLong, Long, saturate) - TO_HALF_CASE(ULong) + TO_HALF_CASE(ULong, halfRoundingMode) TO_FLOAT_CASE(ULong) TO_DOUBLE_CASE(ULong) @@ -900,7 +902,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType, U_DOWN_CAST_CASE(ULong, kUnsignedInt, uint, saturate) U_DOWN_CAST_CASE(ULong, kLong, Long, saturate) - TO_HALF_CASE(ULong) + TO_HALF_CASE(ULong, halfRoundingMode) TO_FLOAT_CASE(ULong) TO_DOUBLE_CASE(ULong) @@ -969,7 +971,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType, FLOAT_ROUND_CASE(kULong, ULong, roundType, saturate) FLOAT_ROUND_CASE(kUnsignedLong, ULong, roundType, saturate) - TO_HALF_CASE(float) + TO_HALF_CASE(float, halfRoundingMode) case kFloat: memcpy(outRaw, inRaw, get_explicit_type_size(inType)); @@ -1003,7 +1005,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType, DOUBLE_ROUND_CASE(kULong, ULong, roundType, saturate) DOUBLE_ROUND_CASE(kUnsignedLong, ULong, roundType, saturate) - TO_HALF_CASE(double) + TO_HALF_CASE(double, halfRoundingMode) TO_FLOAT_CASE(double); diff --git a/test_common/harness/conversions.h b/test_common/harness/conversions.h index e6880e0552..49108f1ac5 100644 --- a/test_common/harness/conversions.h +++ b/test_common/harness/conversions.h @@ -25,6 +25,8 @@ #include #include +#include + /* Note: the next three all have to match in size and order!! */ enum ExplicitTypes @@ -71,6 +73,7 @@ extern const char *get_explicit_type_name(ExplicitType type); extern void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType, bool saturate, RoundingType roundType, + cl_half_rounding_mode halfRoundingMode, ExplicitType outType); extern void generate_random_data(ExplicitType type, size_t count, MTdata d, diff --git a/test_conformance/basic/test_explicit_s2v.cpp b/test_conformance/basic/test_explicit_s2v.cpp index 067afb435b..a061f9bbba 100644 --- a/test_conformance/basic/test_explicit_s2v.cpp +++ b/test_conformance/basic/test_explicit_s2v.cpp @@ -24,10 +24,14 @@ using std::isnan; #include #include +#include + #include "procs.h" #include "harness/conversions.h" #include "harness/typeWrappers.h" +extern cl_half_rounding_mode halfRoundingMode; + namespace { // clang-format off @@ -123,53 +127,60 @@ int test_explicit_s2v_function(cl_context context, cl_command_queue queue, unsigned char *inPtr, *outPtr; size_t paramSize, destTypeSize; - paramSize = get_explicit_type_size( srcType ); - destTypeSize = get_explicit_type_size( destType ); + paramSize = get_explicit_type_size(srcType); + destTypeSize = get_explicit_type_size(destType); size_t destStride = destTypeSize * vecSize; std::vector outData(destStride * count); streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, paramSize * count, inputData, &error); - test_error( error, "clCreateBuffer failed"); + test_error(error, "clCreateBuffer failed"); streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, destStride * count, NULL, &error); - test_error( error, "clCreateBuffer failed"); + test_error(error, "clCreateBuffer failed"); /* Set the arguments */ - error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0] ); - test_error( error, "Unable to set indexed kernel arguments" ); - error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1] ); - test_error( error, "Unable to set indexed kernel arguments" ); + error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]); + test_error(error, "Unable to set indexed kernel arguments"); + error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]); + test_error(error, "Unable to set indexed kernel arguments"); /* Run the kernel */ threadSize[0] = count; - error = get_max_common_work_group_size( context, kernel, threadSize[0], &groupSize[0] ); - test_error( error, "Unable to get work group size to use" ); + error = get_max_common_work_group_size(context, kernel, threadSize[0], + &groupSize[0]); + test_error(error, "Unable to get work group size to use"); - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threadSize, groupSize, 0, NULL, NULL ); - test_error( error, "Unable to execute test kernel" ); + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threadSize, + groupSize, 0, NULL, NULL); + test_error(error, "Unable to execute test kernel"); - /* Now verify the results. Each value should have been duplicated four times, and we should be able to just + /* Now verify the results. Each value should have been duplicated four + times, and we should be able to just do a memcpy instead of relying on the actual type of data */ error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, destStride * count, outData.data(), 0, NULL, NULL); - test_error( error, "Unable to read output values!" ); + test_error(error, "Unable to read output values!"); inPtr = (unsigned char *)inputData; outPtr = (unsigned char *)outData.data(); - for( i = 0; i < count; i++ ) + for (i = 0; i < count; i++) { - /* Convert the input data element to our output data type to compare against */ - convert_explicit_value( (void *)inPtr, (void *)convertedData, srcType, false, kDefaultRoundingType, destType ); + /* Convert the input data element to our output data type to compare + * against */ + convert_explicit_value((void *)inPtr, (void *)convertedData, srcType, + false, kDefaultRoundingType, halfRoundingMode, + destType); /* Now compare every element of the vector */ - for( s = 0; s < vecSize; s++ ) + for (s = 0; s < vecSize; s++) { - if( memcmp( convertedData, outPtr + destTypeSize * s, destTypeSize ) != 0 ) + if (memcmp(convertedData, outPtr + destTypeSize * s, destTypeSize) + != 0) { bool isSrcNaN = (((srcType == kHalf) @@ -194,9 +205,14 @@ int test_explicit_s2v_function(cl_context context, cl_command_queue queue, } unsigned int *p = (unsigned int *)outPtr; - log_error( "ERROR: Output value %d:%d does not validate for size %d:%d!\n", i, s, vecSize, (int)destTypeSize ); - log_error( " Input: 0x%0*x\n", (int)( paramSize * 2 ), *(unsigned int *)inPtr & ( 0xffffffff >> ( 32 - paramSize * 8 ) ) ); - log_error( " Actual: 0x%08x 0x%08x 0x%08x 0x%08x\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] ); + log_error("ERROR: Output value %d:%d does not validate for " + "size %d:%d!\n", + i, s, vecSize, (int)destTypeSize); + log_error(" Input: 0x%0*x\n", (int)(paramSize * 2), + *(unsigned int *)inPtr + & (0xffffffff >> (32 - paramSize * 8))); + log_error(" Actual: 0x%08x 0x%08x 0x%08x 0x%08x\n", p[0], + p[1], p[2], p[3]); return -1; } } diff --git a/test_conformance/basic/test_vector_creation.cpp b/test_conformance/basic/test_vector_creation.cpp index 6bae156acf..79c97f7dd9 100644 --- a/test_conformance/basic/test_vector_creation.cpp +++ b/test_conformance/basic/test_vector_creation.cpp @@ -21,6 +21,8 @@ #include +extern cl_half_rounding_mode halfRoundingMode; + #define DEBUG 0 #define DEPTH 16 // Limit the maximum code size for any given kernel. @@ -320,7 +322,8 @@ int test_vector_creation(cl_device_id deviceID, cl_context context, &j, ((char *)input_data_converted.data()) + get_explicit_type_size(vecType[type_index]) * j, - kInt, 0, kRoundToEven, vecType[type_index]); + kInt, 0, kRoundToEven, halfRoundingMode, + vecType[type_index]); } } diff --git a/test_conformance/commonfns/test_base.h b/test_conformance/commonfns/test_base.h index be36ed264b..c7286a92f5 100644 --- a/test_conformance/commonfns/test_base.h +++ b/test_conformance/commonfns/test_base.h @@ -151,28 +151,6 @@ struct MixTest : BaseFunctionTest cl_int Run() override; }; -template float UlpFn(const T &val, const double &r) -{ - if (std::is_same::value) - { - return Ulp_Error_Half(val, r); - } - else if (std::is_same::value) - { - return Ulp_Error(val, r); - } - else if (std::is_same::value) - { - return Ulp_Error_Double(val, r); - } - else - { - log_error("UlpFn: unsupported data type\n"); - } - - return -1.f; // wrong val -} - template inline double conv_to_dbl(const T &val) { if (std::is_same::value) @@ -217,6 +195,33 @@ template bool isfinite_fp(const T &v) } } +template float UlpFn(const T &val, const double &r) +{ + if (std::is_same::value) + { + if (conv_to_half(r) == val) + { + return 0.0f; + } + + return Ulp_Error_Half(val, r); + } + else if (std::is_same::value) + { + return Ulp_Error(val, r); + } + else if (std::is_same::value) + { + return Ulp_Error_Double(val, r); + } + else + { + log_error("UlpFn: unsupported data type\n"); + } + + return -1.f; // wrong val +} + template int MakeAndRunTest(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements, From b8981f5fb8be9da3bf802949622499232568faf6 Mon Sep 17 00:00:00 2001 From: joshqti <127994991+joshqti@users.noreply.github.com> Date: Tue, 13 Aug 2024 09:54:09 -0700 Subject: [PATCH 4/4] [basic] Enable image format aware scanline comparison. (#2042) Use format aware memory comparison instead of memcmp. --- test_conformance/basic/test_imagearraycopy.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/test_conformance/basic/test_imagearraycopy.cpp b/test_conformance/basic/test_imagearraycopy.cpp index f29eac31c3..0ba2ea5234 100644 --- a/test_conformance/basic/test_imagearraycopy.cpp +++ b/test_conformance/basic/test_imagearraycopy.cpp @@ -91,7 +91,13 @@ int test_imagearraycopy_single_format(cl_device_id device, cl_context context, err = clReleaseEvent(copyevent); test_error(err, "clReleaseEvent failed"); - if (memcmp(imgptr, bufptr, buffer_size) != 0) + image_descriptor compareImageInfo = { 0 }; + compareImageInfo.format = format; + compareImageInfo.width = buffer_size / get_pixel_size(format); + size_t where = compare_scanlines(&compareImageInfo, + reinterpret_cast(imgptr), + reinterpret_cast(bufptr)); + if (where < compareImageInfo.width) { log_error("ERROR: Results did not validate!\n"); auto inchar = static_cast(imgptr);