Skip to content

Commit

Permalink
Fixes for basic explicit_s2v and commonfns degrees for cl_half (#2024)
Browse files Browse the repository at this point in the history
Basic explicit_s2v:

The verification step was always using round to even when converting a
float to half even for round to zero cores.

Commonfns degrees:

The verification step was only taking into account infinities and not
values that over/underflow. This resulted in an incorrect error
calculation. E.g:

double cpu_result = 175668.85998711039;
cl_half gpu_result = 31743; // this is 65504 when converting to float,
we overflowed.
float error = (cpu_result - gpu_result) * some_factor;

The fix adds the check if( (cl_half) reference == test ) before
calculating the error.
  • Loading branch information
AhmedAmraniAkdi committed Aug 13, 2024
1 parent aa49f3b commit b4c3bf2
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 61 deletions.
34 changes: 18 additions & 16 deletions test_common/harness/conversions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -263,10 +263,11 @@ static Long sLowerLimits[kNumExplicitTypes] = {
} \
break;

#define TO_HALF_CASE(inType) \
#define TO_HALF_CASE(inType, halfRoundingMode) \
case kHalf: \
halfPtr = (cl_half *)outRaw; \
*halfPtr = cl_half_from_float((float)(*inType##Ptr), CL_HALF_RTE); \
*halfPtr = \
cl_half_from_float((float)(*inType##Ptr), halfRoundingMode); \
break;
#define TO_FLOAT_CASE(inType) \
case kFloat: \
Expand Down Expand Up @@ -453,6 +454,7 @@ typedef unsigned long ulong;

void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
bool saturate, RoundingType roundType,
cl_half_rounding_mode halfRoundingMode,
ExplicitType outType)
{
bool *boolPtr;
Expand Down Expand Up @@ -537,7 +539,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
SIMPLE_CAST_CASE(schar, kULong, ULong)
SIMPLE_CAST_CASE(schar, kUnsignedLong, ULong)

TO_HALF_CASE(schar)
TO_HALF_CASE(schar, halfRoundingMode)
TO_FLOAT_CASE(schar)
TO_DOUBLE_CASE(schar)

Expand Down Expand Up @@ -570,7 +572,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
SIMPLE_CAST_CASE(uchar, kULong, ULong)
SIMPLE_CAST_CASE(uchar, kUnsignedLong, ULong)

TO_HALF_CASE(uchar)
TO_HALF_CASE(uchar, halfRoundingMode)
TO_FLOAT_CASE(uchar)
TO_DOUBLE_CASE(uchar)

Expand Down Expand Up @@ -603,7 +605,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
SIMPLE_CAST_CASE(uchar, kULong, ULong)
SIMPLE_CAST_CASE(uchar, kUnsignedLong, ULong)

TO_HALF_CASE(uchar)
TO_HALF_CASE(uchar, halfRoundingMode)
TO_FLOAT_CASE(uchar)
TO_DOUBLE_CASE(uchar)

Expand Down Expand Up @@ -636,7 +638,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
SIMPLE_CAST_CASE(short, kULong, ULong)
SIMPLE_CAST_CASE(short, kUnsignedLong, ULong)

TO_HALF_CASE(short)
TO_HALF_CASE(short, halfRoundingMode)
TO_FLOAT_CASE(short)
TO_DOUBLE_CASE(short)

Expand Down Expand Up @@ -669,7 +671,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
SIMPLE_CAST_CASE(ushort, kULong, ULong)
SIMPLE_CAST_CASE(ushort, kUnsignedLong, ULong)

TO_HALF_CASE(ushort)
TO_HALF_CASE(ushort, halfRoundingMode)
TO_FLOAT_CASE(ushort)
TO_DOUBLE_CASE(ushort)

Expand Down Expand Up @@ -702,7 +704,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
SIMPLE_CAST_CASE(ushort, kULong, ULong)
SIMPLE_CAST_CASE(ushort, kUnsignedLong, ULong)

TO_HALF_CASE(ushort)
TO_HALF_CASE(ushort, halfRoundingMode)
TO_FLOAT_CASE(ushort)
TO_DOUBLE_CASE(ushort)

Expand Down Expand Up @@ -735,7 +737,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
SIMPLE_CAST_CASE(int, kULong, ULong)
SIMPLE_CAST_CASE(int, kUnsignedLong, ULong)

TO_HALF_CASE(int)
TO_HALF_CASE(int, halfRoundingMode)
TO_FLOAT_CASE(int)
TO_DOUBLE_CASE(int)

Expand Down Expand Up @@ -768,7 +770,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
SIMPLE_CAST_CASE(uint, kULong, ULong)
SIMPLE_CAST_CASE(uint, kUnsignedLong, ULong)

TO_HALF_CASE(uint)
TO_HALF_CASE(uint, halfRoundingMode)
TO_FLOAT_CASE(uint)
TO_DOUBLE_CASE(uint)

Expand Down Expand Up @@ -801,7 +803,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
SIMPLE_CAST_CASE(uint, kULong, ULong)
SIMPLE_CAST_CASE(uint, kUnsignedLong, ULong)

TO_HALF_CASE(uint)
TO_HALF_CASE(uint, halfRoundingMode)
TO_FLOAT_CASE(uint)
TO_DOUBLE_CASE(uint)

Expand Down Expand Up @@ -834,7 +836,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
DOWN_CAST_CASE(Long, kULong, ULong, saturate)
DOWN_CAST_CASE(Long, kUnsignedLong, ULong, saturate)

TO_HALF_CASE(Long)
TO_HALF_CASE(Long, halfRoundingMode)
TO_FLOAT_CASE(Long)
TO_DOUBLE_CASE(Long)

Expand Down Expand Up @@ -867,7 +869,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
U_DOWN_CAST_CASE(ULong, kUnsignedInt, uint, saturate)
U_DOWN_CAST_CASE(ULong, kLong, Long, saturate)

TO_HALF_CASE(ULong)
TO_HALF_CASE(ULong, halfRoundingMode)
TO_FLOAT_CASE(ULong)
TO_DOUBLE_CASE(ULong)

Expand Down Expand Up @@ -900,7 +902,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
U_DOWN_CAST_CASE(ULong, kUnsignedInt, uint, saturate)
U_DOWN_CAST_CASE(ULong, kLong, Long, saturate)

TO_HALF_CASE(ULong)
TO_HALF_CASE(ULong, halfRoundingMode)
TO_FLOAT_CASE(ULong)
TO_DOUBLE_CASE(ULong)

Expand Down Expand Up @@ -969,7 +971,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
FLOAT_ROUND_CASE(kULong, ULong, roundType, saturate)
FLOAT_ROUND_CASE(kUnsignedLong, ULong, roundType, saturate)

TO_HALF_CASE(float)
TO_HALF_CASE(float, halfRoundingMode)

case kFloat:
memcpy(outRaw, inRaw, get_explicit_type_size(inType));
Expand Down Expand Up @@ -1003,7 +1005,7 @@ void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
DOUBLE_ROUND_CASE(kULong, ULong, roundType, saturate)
DOUBLE_ROUND_CASE(kUnsignedLong, ULong, roundType, saturate)

TO_HALF_CASE(double)
TO_HALF_CASE(double, halfRoundingMode)

TO_FLOAT_CASE(double);

Expand Down
3 changes: 3 additions & 0 deletions test_common/harness/conversions.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
#include <string.h>
#include <sys/types.h>

#include <CL/cl_half.h>

/* Note: the next three all have to match in size and order!! */

enum ExplicitTypes
Expand Down Expand Up @@ -71,6 +73,7 @@ extern const char *get_explicit_type_name(ExplicitType type);
extern void convert_explicit_value(void *inRaw, void *outRaw,
ExplicitType inType, bool saturate,
RoundingType roundType,
cl_half_rounding_mode halfRoundingMode,
ExplicitType outType);

extern void generate_random_data(ExplicitType type, size_t count, MTdata d,
Expand Down
60 changes: 38 additions & 22 deletions test_conformance/basic/test_explicit_s2v.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,14 @@ using std::isnan;
#include <sys/stat.h>
#include <vector>

#include <CL/cl_half.h>

#include "procs.h"
#include "harness/conversions.h"
#include "harness/typeWrappers.h"

extern cl_half_rounding_mode halfRoundingMode;

namespace {

// clang-format off
Expand Down Expand Up @@ -123,53 +127,60 @@ int test_explicit_s2v_function(cl_context context, cl_command_queue queue,
unsigned char *inPtr, *outPtr;
size_t paramSize, destTypeSize;

paramSize = get_explicit_type_size( srcType );
destTypeSize = get_explicit_type_size( destType );
paramSize = get_explicit_type_size(srcType);
destTypeSize = get_explicit_type_size(destType);

size_t destStride = destTypeSize * vecSize;
std::vector<char> outData(destStride * count);

streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
paramSize * count, inputData, &error);
test_error( error, "clCreateBuffer failed");
test_error(error, "clCreateBuffer failed");
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, destStride * count,
NULL, &error);
test_error( error, "clCreateBuffer failed");
test_error(error, "clCreateBuffer failed");

/* Set the arguments */
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0] );
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1] );
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
test_error(error, "Unable to set indexed kernel arguments");
error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]);
test_error(error, "Unable to set indexed kernel arguments");

/* Run the kernel */
threadSize[0] = count;

error = get_max_common_work_group_size( context, kernel, threadSize[0], &groupSize[0] );
test_error( error, "Unable to get work group size to use" );
error = get_max_common_work_group_size(context, kernel, threadSize[0],
&groupSize[0]);
test_error(error, "Unable to get work group size to use");

error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threadSize, groupSize, 0, NULL, NULL );
test_error( error, "Unable to execute test kernel" );
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threadSize,
groupSize, 0, NULL, NULL);
test_error(error, "Unable to execute test kernel");

/* Now verify the results. Each value should have been duplicated four times, and we should be able to just
/* Now verify the results. Each value should have been duplicated four
times, and we should be able to just
do a memcpy instead of relying on the actual type of data */
error =
clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, destStride * count,
outData.data(), 0, NULL, NULL);
test_error( error, "Unable to read output values!" );
test_error(error, "Unable to read output values!");

inPtr = (unsigned char *)inputData;
outPtr = (unsigned char *)outData.data();

for( i = 0; i < count; i++ )
for (i = 0; i < count; i++)
{
/* Convert the input data element to our output data type to compare against */
convert_explicit_value( (void *)inPtr, (void *)convertedData, srcType, false, kDefaultRoundingType, destType );
/* Convert the input data element to our output data type to compare
* against */
convert_explicit_value((void *)inPtr, (void *)convertedData, srcType,
false, kDefaultRoundingType, halfRoundingMode,
destType);

/* Now compare every element of the vector */
for( s = 0; s < vecSize; s++ )
for (s = 0; s < vecSize; s++)
{
if( memcmp( convertedData, outPtr + destTypeSize * s, destTypeSize ) != 0 )
if (memcmp(convertedData, outPtr + destTypeSize * s, destTypeSize)
!= 0)
{
bool isSrcNaN =
(((srcType == kHalf)
Expand All @@ -194,9 +205,14 @@ int test_explicit_s2v_function(cl_context context, cl_command_queue queue,
}

unsigned int *p = (unsigned int *)outPtr;
log_error( "ERROR: Output value %d:%d does not validate for size %d:%d!\n", i, s, vecSize, (int)destTypeSize );
log_error( " Input: 0x%0*x\n", (int)( paramSize * 2 ), *(unsigned int *)inPtr & ( 0xffffffff >> ( 32 - paramSize * 8 ) ) );
log_error( " Actual: 0x%08x 0x%08x 0x%08x 0x%08x\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
log_error("ERROR: Output value %d:%d does not validate for "
"size %d:%d!\n",
i, s, vecSize, (int)destTypeSize);
log_error(" Input: 0x%0*x\n", (int)(paramSize * 2),
*(unsigned int *)inPtr
& (0xffffffff >> (32 - paramSize * 8)));
log_error(" Actual: 0x%08x 0x%08x 0x%08x 0x%08x\n", p[0],
p[1], p[2], p[3]);
return -1;
}
}
Expand Down
5 changes: 4 additions & 1 deletion test_conformance/basic/test_vector_creation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@

#include <CL/cl_half.h>

extern cl_half_rounding_mode halfRoundingMode;

#define DEBUG 0
#define DEPTH 16
// Limit the maximum code size for any given kernel.
Expand Down Expand Up @@ -320,7 +322,8 @@ int test_vector_creation(cl_device_id deviceID, cl_context context,
&j,
((char *)input_data_converted.data())
+ get_explicit_type_size(vecType[type_index]) * j,
kInt, 0, kRoundToEven, vecType[type_index]);
kInt, 0, kRoundToEven, halfRoundingMode,
vecType[type_index]);
}
}

Expand Down
49 changes: 27 additions & 22 deletions test_conformance/commonfns/test_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,28 +151,6 @@ struct MixTest : BaseFunctionTest
cl_int Run() override;
};

template <typename T> float UlpFn(const T &val, const double &r)
{
if (std::is_same<T, half>::value)
{
return Ulp_Error_Half(val, r);
}
else if (std::is_same<T, float>::value)
{
return Ulp_Error(val, r);
}
else if (std::is_same<T, double>::value)
{
return Ulp_Error_Double(val, r);
}
else
{
log_error("UlpFn: unsupported data type\n");
}

return -1.f; // wrong val
}

template <typename T> inline double conv_to_dbl(const T &val)
{
if (std::is_same<T, half>::value)
Expand Down Expand Up @@ -217,6 +195,33 @@ template <typename T> bool isfinite_fp(const T &v)
}
}

template <typename T> float UlpFn(const T &val, const double &r)
{
if (std::is_same<T, half>::value)
{
if (conv_to_half(r) == val)
{
return 0.0f;
}

return Ulp_Error_Half(val, r);
}
else if (std::is_same<T, float>::value)
{
return Ulp_Error(val, r);
}
else if (std::is_same<T, double>::value)
{
return Ulp_Error_Double(val, r);
}
else
{
log_error("UlpFn: unsupported data type\n");
}

return -1.f; // wrong val
}

template <class T>
int MakeAndRunTest(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements,
Expand Down

0 comments on commit b4c3bf2

Please sign in to comment.