From 7b83c8f7ab60ec5999bac8059da367f079464dc2 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Mon, 8 May 2023 19:20:05 +0200 Subject: [PATCH 1/8] Modernization of conversions test, preparation to handle cl_khr_fp16 extension --- .../conversions/basic_test_conversions.cpp | 3317 +++++++---------- .../conversions/basic_test_conversions.h | 382 +- .../conversions/conversions_data_info.h | 812 ++++ test_conformance/conversions/fplib.h | 5 + .../conversions/test_conversions.cpp | 1331 +------ 5 files changed, 2534 insertions(+), 3313 deletions(-) create mode 100644 test_conformance/conversions/conversions_data_info.h diff --git a/test_conformance/conversions/basic_test_conversions.cpp b/test_conformance/conversions/basic_test_conversions.cpp index dfb32279a..a01f60015 100644 --- a/test_conformance/conversions/basic_test_conversions.cpp +++ b/test_conformance/conversions/basic_test_conversions.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -15,2243 +15,1514 @@ // #include "harness/testHarness.h" #include "harness/compat.h" +#include "harness/rounding_mode.h" +#include "harness/ThreadPool.h" +#include "harness/testHarness.h" +#include "harness/kernelHelpers.h" +#include "harness/mt19937.h" +#include "harness/kernelHelpers.h" -#include "basic_test_conversions.h" -#include -#include +#if defined(__APPLE__) +#include +#include +#endif -#include "harness/mt19937.h" +#if defined(__linux__) +#include +#include +#include +#endif +#if defined(__linux__) +#include +#include +#endif -#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) -#include "fplib.h" +#if defined(__MINGW32__) +#include #endif -#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) -/* Rounding modes and saturation for use with qcom 64 bit to float conversion library */ - bool qcom_sat; - roundingMode qcom_rm; +#include +#include +#include +#include +#if !defined(_WIN32) +#include +#include #endif +#include + +#include -static inline cl_ulong random64( MTdata d ); +#include +#include -#if defined (_WIN32) - #include - #include +#include "basic_test_conversions.h" + +#if (defined(_WIN32) && defined(_MSC_VER)) +// need for _controlfp_s and rouinding modes in RoundingMode +#include "harness/testHarness.h" +#endif + +#if defined(_WIN32) +#include +#include #else // !_WIN32 -#if defined (__SSE__ ) - #include +#if defined(__SSE__) +#include #endif -#if defined (__SSE2__ ) - #include +#if defined(__SSE2__) +#include #endif #endif // _WIN32 -const char *gTypeNames[ kTypeCount ] = { - "uchar", "char", - "ushort", "short", - "uint", "int", - "float", "double", - "ulong", "long" - }; - -const char *gRoundingModeNames[ kRoundingModeCount ] = { - "", - "_rte", - "_rtp", - "_rtn", - "_rtz" - }; - -const char *gSaturationNames[ 2 ] = { "", "_sat" }; - -size_t gTypeSizes[ kTypeCount ] = { - sizeof( cl_uchar ), sizeof( cl_char ), - sizeof( cl_ushort ), sizeof( cl_short ), - sizeof( cl_uint ), sizeof( cl_int ), - sizeof( cl_float ), sizeof( cl_double ), - sizeof( cl_ulong ), sizeof( cl_long ), - }; - -long lrintf_clamped( float f ); -long lrintf_clamped( float f ) -{ - static const float magic[2] = { MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23), - MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23) }; +cl_context gContext = NULL; +cl_command_queue gQueue = NULL; +int gStartTestNumber = -1; +int gEndTestNumber = 0; +#if defined(__APPLE__) +int gTimeResults = 1; +#else +int gTimeResults = 0; +#endif +int gReportAverageTimes = 0; +void *gIn = NULL; +void *gRef = NULL; +void *gAllowZ = NULL; +void *gOut[kCallStyleCount] = { NULL }; +cl_mem gInBuffer; +cl_mem gOutBuffers[kCallStyleCount]; +size_t gComputeDevices = 0; +uint32_t gDeviceFrequency = 0; +int gWimpyMode = 0; +int gWimpyReductionFactor = 128; +int gSkipTesting = 0; +int gForceFTZ = 0; +int gIsRTZ = 0; +uint32_t gSimdSize = 1; +int gHasDouble = 0; +int gTestDouble = 1; +const char *sizeNames[] = { "", "", "2", "3", "4", "8", "16" }; +int vectorSizes[] = { 1, 1, 2, 3, 4, 8, 16 }; +int gMinVectorSize = 0; +int gMaxVectorSize = sizeof(vectorSizes) / sizeof(vectorSizes[0]); +MTdata gMTdata; +const char **argList = NULL; +int argCount = 0; + + +double SubtractTime(uint64_t endTime, uint64_t startTime); + + +// clang-format off +// for readability sake keep this section unformatted + +std::vector DataInitInfo::specialValuesUInt = { + uint32_t(INT_MIN), uint32_t(INT_MIN + 1), uint32_t(INT_MIN + 2), + uint32_t(-(1 << 30) - 3), uint32_t(-(1 << 30) - 2), uint32_t(-(1 << 30) - 1), uint32_t(-(1 << 30)), + uint32_t(-(1 << 30) + 1), uint32_t(-(1 << 30) + 2), uint32_t(-(1 << 30) + 3), + uint32_t(-(1 << 24) - 3), uint32_t(-(1 << 24) - 2),uint32_t(-(1 << 24) - 1), + uint32_t(-(1 << 24)), uint32_t(-(1 << 24) + 1), uint32_t(-(1 << 24) + 2), uint32_t(-(1 << 24) + 3), + uint32_t(-(1 << 23) - 3), uint32_t(-(1 << 23) - 2),uint32_t(-(1 << 23) - 1), + uint32_t(-(1 << 23)), uint32_t(-(1 << 23) + 1), uint32_t(-(1 << 23) + 2), uint32_t(-(1 << 23) + 3), + uint32_t(-(1 << 22) - 3), uint32_t(-(1 << 22) - 2),uint32_t(-(1 << 22) - 1), + uint32_t(-(1 << 22)), uint32_t(-(1 << 22) + 1), uint32_t(-(1 << 22) + 2), uint32_t(-(1 << 22) + 3), + uint32_t(-(1 << 21) - 3), uint32_t(-(1 << 21) - 2),uint32_t(-(1 << 21) - 1), + uint32_t(-(1 << 21)), uint32_t(-(1 << 21) + 1), uint32_t(-(1 << 21) + 2), uint32_t(-(1 << 21) + 3), + uint32_t(-(1 << 16) - 3), uint32_t(-(1 << 16) - 2),uint32_t(-(1 << 16) - 1), + uint32_t(-(1 << 16)), uint32_t(-(1 << 16) + 1), uint32_t(-(1 << 16) + 2), uint32_t(-(1 << 16) + 3), + uint32_t(-(1 << 15) - 3), uint32_t(-(1 << 15) - 2),uint32_t(-(1 << 15) - 1), + uint32_t(-(1 << 15)), uint32_t(-(1 << 15) + 1), uint32_t(-(1 << 15) + 2), uint32_t(-(1 << 15) + 3), + uint32_t(-(1 << 8) - 3), uint32_t(-(1 << 8) - 2),uint32_t(-(1 << 8) - 1), + uint32_t(-(1 << 8)), uint32_t(-(1 << 8) + 1), uint32_t(-(1 << 8) + 2), uint32_t(-(1 << 8) + 3), + uint32_t(-(1 << 7) - 3), uint32_t(-(1 << 7) - 2),uint32_t(-(1 << 7) - 1), + uint32_t(-(1 << 7)), uint32_t(-(1 << 7) + 1), uint32_t(-(1 << 7) + 2), uint32_t(-(1 << 7) + 3), + uint32_t(-4), uint32_t(-3), uint32_t(-2), uint32_t(-1), 0, 1, 2, 3, 4, + (1 << 7) - 3,(1 << 7) - 2,(1 << 7) - 1, (1 << 7), (1 << 7) + 1, (1 << 7) + 2, (1 << 7) + 3, + (1 << 8) - 3,(1 << 8) - 2,(1 << 8) - 1, (1 << 8), (1 << 8) + 1, (1 << 8) + 2, (1 << 8) + 3, + (1 << 15) - 3,(1 << 15) - 2,(1 << 15) - 1, (1 << 15), (1 << 15) + 1, (1 << 15) + 2, (1 << 15) + 3, + (1 << 16) - 3,(1 << 16) - 2,(1 << 16) - 1, (1 << 16), (1 << 16) + 1, (1 << 16) + 2, (1 << 16) + 3, + (1 << 21) - 3,(1 << 21) - 2,(1 << 21) - 1, (1 << 21), (1 << 21) + 1, (1 << 21) + 2, (1 << 21) + 3, + (1 << 22) - 3,(1 << 22) - 2,(1 << 22) - 1, (1 << 22), (1 << 22) + 1, (1 << 22) + 2, (1 << 22) + 3, + (1 << 23) - 3,(1 << 23) - 2,(1 << 23) - 1, (1 << 23), (1 << 23) + 1, (1 << 23) + 2, (1 << 23) + 3, + (1 << 24) - 3,(1 << 24) - 2,(1 << 24) - 1, (1 << 24), (1 << 24) + 1, (1 << 24) + 2, (1 << 24) + 3, + (1 << 30) - 3,(1 << 30) - 2,(1 << 30) - 1, (1 << 30), (1 << 30) + 1, (1 << 30) + 2, (1 << 30) + 3, + INT_MAX - 3, INT_MAX - 2, INT_MAX - 1, INT_MAX, // 0x80000000, 0x80000001 0x80000002 already covered above + UINT_MAX - 3, UINT_MAX - 2, UINT_MAX - 1, UINT_MAX +}; - if( f >= -(float) LONG_MIN ) - return LONG_MAX; +std::vector DataInitInfo::specialValuesFloat = { + -NAN, -INFINITY, -FLT_MAX, + MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39), + MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38), + MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), + MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), + -1000.f, -100.f, -4.0f, -3.5f, -3.0f, + MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, + MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, + MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, + MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24), MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, + MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25), MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, + MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26), MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, + MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27), MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, + MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), + MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), + MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), + MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150), MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), + MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), + MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f, +NAN, +INFINITY, +FLT_MAX, + MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), + MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38), + MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), + MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), + +1000.f, +100.f, +4.0f, +3.5f, +3.0f, + MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23), +2.0f, + MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), + MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25), + MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), + MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27), + MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), + MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), + MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), + MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150), MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), + MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), + MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f +}; - if( f <= (float) LONG_MIN ) - return LONG_MIN; +// A table of more difficult cases to get right +std::vector DataInitInfo::specialValuesDouble = { + -NAN, -INFINITY, -DBL_MAX, + MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.80000000000001p64, -0x180000000000001LL, 8), + MAKE_HEX_DOUBLE(-0x1.8p64, -0x18LL, 60), MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp64, -0x17ffffffffffffLL, 12), + MAKE_HEX_DOUBLE(-0x1.80000000000001p63, -0x180000000000001LL, 7), MAKE_HEX_DOUBLE(-0x1.8p63, -0x18LL, 59), + MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp63, -0x17ffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), + MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10), + MAKE_HEX_DOUBLE(-0x1.80000000000001p32, -0x180000000000001LL, -24), MAKE_HEX_DOUBLE(-0x1.8p32, -0x18LL, 28), + MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp32, -0x17ffffffffffffLL, -20), MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), + MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), + MAKE_HEX_DOUBLE(-0x1.80000000000001p31, -0x180000000000001LL, -25), MAKE_HEX_DOUBLE(-0x1.8p31, -0x18LL, 27), + MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp31, -0x17ffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), + MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), + -1000., -100., -4.0, -3.5, -3.0, + MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, + MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, + MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, + MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52), MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53), MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55), MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), + -DBL_MIN, + MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), + MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), + MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), + -0.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11), + MAKE_HEX_DOUBLE(0x1.80000000000001p63, 0x180000000000001LL, 7), MAKE_HEX_DOUBLE(0x1.8p63, 0x18LL, 59), + MAKE_HEX_DOUBLE(0x1.7ffffffffffffp63, 0x17ffffffffffffLL, 11), MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), + MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10), + MAKE_HEX_DOUBLE(+0x1.80000000000001p32, +0x180000000000001LL, -24), MAKE_HEX_DOUBLE(+0x1.8p32, +0x18LL, 28), + MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp32, +0x17ffffffffffffLL, -20), MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), + MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), + MAKE_HEX_DOUBLE(+0x1.80000000000001p31, +0x180000000000001LL, -25), MAKE_HEX_DOUBLE(+0x1.8p31, +0x18LL, 27), + MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp31, +0x17ffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), + MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), + +1000., +100., +4.0, +3.5, +3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, + MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), + +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52), MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), + +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53), MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), + +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), + +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55), MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), + +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), + MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), + MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0, MAKE_HEX_DOUBLE(-0x1.ffffffffffffep62, -0x1ffffffffffffeLL, 10), + MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp62, -0x1ffffffffffffcLL, 10), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10), + MAKE_HEX_DOUBLE(+0x1.ffffffffffffep62, +0x1ffffffffffffeLL, 10), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp62, +0x1ffffffffffffcLL, 10), + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10), MAKE_HEX_DOUBLE(-0x1.ffffffffffffep51, -0x1ffffffffffffeLL, -1), + MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp51, -0x1ffffffffffffcLL, -1), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp51, -0x1fffffffffffffLL, -1), + MAKE_HEX_DOUBLE(+0x1.ffffffffffffep51, +0x1ffffffffffffeLL, -1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp51, +0x1ffffffffffffcLL, -1), + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp51, +0x1fffffffffffffLL, -1), MAKE_HEX_DOUBLE(-0x1.ffffffffffffep52, -0x1ffffffffffffeLL, 0), + MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp52, -0x1ffffffffffffcLL, 0), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp52, -0x1fffffffffffffLL, 0), + MAKE_HEX_DOUBLE(+0x1.ffffffffffffep52, +0x1ffffffffffffeLL, 0), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp52, +0x1ffffffffffffcLL, 0), + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp52, +0x1fffffffffffffLL, 0), MAKE_HEX_DOUBLE(-0x1.ffffffffffffep53, -0x1ffffffffffffeLL, 1), + MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp53, -0x1ffffffffffffcLL, 1), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp53, -0x1fffffffffffffLL, 1), + MAKE_HEX_DOUBLE(+0x1.ffffffffffffep53, +0x1ffffffffffffeLL, 1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp53, +0x1ffffffffffffcLL, 1), + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp53, +0x1fffffffffffffLL, 1), MAKE_HEX_DOUBLE(-0x1.0000000000002p52, -0x10000000000002LL, 0), + MAKE_HEX_DOUBLE(-0x1.0000000000001p52, -0x10000000000001LL, 0), MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52), + MAKE_HEX_DOUBLE(+0x1.0000000000002p52, +0x10000000000002LL, 0), MAKE_HEX_DOUBLE(+0x1.0000000000001p52, +0x10000000000001LL, 0), + MAKE_HEX_DOUBLE(+0x1.0p52, +0x1LL, 52), MAKE_HEX_DOUBLE(-0x1.0000000000002p53, -0x10000000000002LL, 1), + MAKE_HEX_DOUBLE(-0x1.0000000000001p53, -0x10000000000001LL, 1), MAKE_HEX_DOUBLE(-0x1.0p53, -0x1LL, 53), + MAKE_HEX_DOUBLE(+0x1.0000000000002p53, +0x10000000000002LL, 1), MAKE_HEX_DOUBLE(+0x1.0000000000001p53, +0x10000000000001LL, 1), + MAKE_HEX_DOUBLE(+0x1.0p53, +0x1LL, 53), MAKE_HEX_DOUBLE(-0x1.0000000000002p54, -0x10000000000002LL, 2), + MAKE_HEX_DOUBLE(-0x1.0000000000001p54, -0x10000000000001LL, 2), MAKE_HEX_DOUBLE(-0x1.0p54, -0x1LL, 54), + MAKE_HEX_DOUBLE(+0x1.0000000000002p54, +0x10000000000002LL, 2), MAKE_HEX_DOUBLE(+0x1.0000000000001p54, +0x10000000000001LL, 2), + MAKE_HEX_DOUBLE(+0x1.0p54, +0x1LL, 54), MAKE_HEX_DOUBLE(-0x1.fffffffefffffp62, -0x1fffffffefffffLL, 10), + MAKE_HEX_DOUBLE(-0x1.ffffffffp62, -0x1ffffffffLL, 30), MAKE_HEX_DOUBLE(-0x1.ffffffff00001p62, -0x1ffffffff00001LL, 10), + MAKE_HEX_DOUBLE(0x1.fffffffefffffp62, 0x1fffffffefffffLL, 10), MAKE_HEX_DOUBLE(0x1.ffffffffp62, 0x1ffffffffLL, 30), + MAKE_HEX_DOUBLE(0x1.ffffffff00001p62, 0x1ffffffff00001LL, 10), +}; +// clang-format on - // Round fractional values to integer in round towards nearest mode - if( fabsf(f) < MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23 ) ) - { - volatile float x = f; - float magicVal = magic[ f < 0 ]; - -#if defined( __SSE__ ) || defined (_WIN32) - // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly - __m128 v = _mm_set_ss( x ); - __m128 m = _mm_set_ss( magicVal ); - v = _mm_add_ss( v, m ); - v = _mm_sub_ss( v, m ); - _mm_store_ss( (float*) &x, v ); + +// Windows (since long double got deprecated) sets the x87 to 53-bit precision +// (that's x87 default state). This causes problems with the tests that +// convert long and ulong to float and double or otherwise deal with values +// that need more precision than 53-bit. So, set the x87 to 64-bit precision. +static inline void Force64BitFPUPrecision(void) +{ +#if __MINGW32__ + // The usual method is to use _controlfp as follows: + // #include + // _controlfp(_PC_64, _MCW_PC); + // + // _controlfp is available on MinGW32 but not on MinGW64. Instead of having + // divergent code just use inline assembly which works for both. + unsigned short int orig_cw = 0; + unsigned short int new_cw = 0; + __asm__ __volatile__("fstcw %0" : "=m"(orig_cw)); + new_cw = orig_cw | 0x0300; // set precision to 64-bit + __asm__ __volatile__("fldcw %0" ::"m"(new_cw)); #else - x += magicVal; - x -= magicVal; + /* Implement for other platforms if needed */ #endif - f = x; +} + + +template +int CalcRefValsPat::check_result(void *test, + uint32_t count, + int vectorSize) +{ + const cl_uchar *a = (const cl_uchar *)gAllowZ; + + if (std::is_integral::value) + { // char/uchar/short/ushort/int/uint/long/ulong + const OutType *t = (const OutType *)test; + const OutType *c = (const OutType *)gRef; + for (uint32_t i = 0; i < count; i++) + if (t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (OutType)0)) + { + size_t s = sizeof(OutType) * 2; + std::stringstream sstr; + sstr << "\nError for vector size %d found at 0x%8.8x: *0x%" + << s << "." << s << "x vs 0x%" << s << "." << s << "x\n"; + vlog(sstr.str().c_str(), vectorSize, i, c[i], t[i]); + return i + 1; + } + } + else if (std::is_same::value) + { + // cast to integral - from original test + const cl_uint *t = (const cl_uint *)test; + const cl_uint *c = (const cl_uint *)gRef; + + for (uint32_t i = 0; i < count; i++) + if (t[i] != c[i] && + // Allow nan's to be binary different + !((t[i] & 0x7fffffffU) > 0x7f800000U + && (c[i] & 0x7fffffffU) > 0x7f800000U) + && !(a[i] != (cl_uchar)0 && t[i] == (c[i] & 0x80000000U))) + { + vlog( + "\nError for vector size %d found at 0x%8.8x: *%a vs %a\n", + vectorSize, i, ((OutType *)gRef)[i], ((OutType *)test)[i]); + return i + 1; + } + } + else + { + const cl_ulong *t = (const cl_ulong *)test; + const cl_ulong *c = (const cl_ulong *)gRef; + + for (uint32_t i = 0; i < count; i++) + if (t[i] != c[i] && + // Allow nan's to be binary different + !((t[i] & 0x7fffffffffffffffULL) > 0x7ff0000000000000ULL + && (c[i] & 0x7fffffffffffffffULL) > 0x7f80000000000000ULL) + && !(a[i] != (cl_uchar)0 + && t[i] == (c[i] & 0x8000000000000000ULL))) + { + vlog( + "\nError for vector size %d found at 0x%8.8x: *%a vs %a\n", + vectorSize, i, ((OutType *)gRef)[i], ((OutType *)test)[i]); + return i + 1; + } } - return (long) f; + return 0; } -long long llrintf_clamped( float f ); -long long llrintf_clamped( float f ) + +cl_uint RoundUpToNextPowerOfTwo(cl_uint x) { - static const float magic[2] = { MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23), - MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23) }; + if (0 == (x & (x - 1))) return x; - if( f >= -(float) LLONG_MIN ) - return LLONG_MAX; + while (x & (x - 1)) x &= x - 1; - if( f <= (float) LLONG_MIN ) - return LLONG_MIN; + return x + x; +} + + +cl_int CustomConversionsTest::Run() +{ + int startMinVectorSize = gMinVectorSize; + Type inType, outType; + RoundingMode round; + SaturationMode sat; - // Round fractional values to integer in round towards nearest mode - if( fabsf(f) < MAKE_HEX_FLOAT(0x1.0p23f, 0x1L, 23) ) + for (int i = 0; i < argCount; i++) { - volatile float x = f; - float magicVal = magic[ f < 0 ]; -#if defined( __SSE__ ) || defined (_WIN32) - // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly - __m128 v = _mm_set_ss( x ); - __m128 m = _mm_set_ss( magicVal ); - v = _mm_add_ss( v, m ); - v = _mm_sub_ss( v, m ); - _mm_store_ss( (float*) &x, v ); -#else - x += magicVal; - x -= magicVal; -#endif - f = x; + if (conv_test::GetTestCase(argList[i], &outType, &inType, &sat, &round)) + { + vlog_error("\n\t\t**** ERROR: Unable to parse function name " + "%s. Skipping.... *****\n\n", + argList[i]); + continue; + } + + // skip double if we don't have it + if (!gTestDouble && (inType == kdouble || outType == kdouble)) + { + if (gHasDouble) + { + vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n", + gTypeNames[outType], gSaturationNames[sat], + gRoundingModeNames[round], gTypeNames[inType]); + vlog("\t\tcl_khr_fp64 enabled, but double testing turned " + "off.\n"); + } + continue; + } + + // skip longs on embedded + if (!gHasLong + && (inType == klong || outType == klong || inType == kulong + || outType == kulong)) + { + continue; + } + + // Skip the implicit converts if the rounding mode is not default or + // test is saturated + if (0 == startMinVectorSize) + { + if (sat || round != kDefaultRoundingMode) + gMinVectorSize = 1; + else + gMinVectorSize = 0; + } + + IterOverSelectedTypes iter(typeIterator, *this, inType, outType); + + iter.Run(); + + if (gFailCount) + { + vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n", + gTypeNames[outType], gSaturationNames[sat], + gRoundingModeNames[round], gTypeNames[inType]); + } } - return (long long) f; + return gFailCount; +} + + +ConversionsTest::ConversionsTest(cl_device_id device, cl_context context, + cl_command_queue queue) + : context(context), device(device), queue(queue), num_elements(0), + typeIterator({ cl_uchar(0), cl_char(0), cl_ushort(0), cl_short(0), + cl_uint(0), cl_int(0), cl_float(0), cl_double(0), + cl_ulong(0), cl_long(0) }) +{} + + +cl_int ConversionsTest::Run() +{ + IterOverTypes iter(typeIterator, *this); + + iter.Run(); + + return gFailCount; } -long lrint_clamped( double f ); -long lrint_clamped( double f ) + +cl_int ConversionsTest::SetUp(int elements) { - static const double magic[2] = { MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52), MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52) }; + num_elements = elements; + return CL_SUCCESS; +} - if( sizeof( long ) > 4 ) + +template +void ConversionsTest::TestTypesConversion(const Type &inType, + const Type &outType, int &testNumber) +{ + SaturationMode sat; + RoundingMode round; + int error; + int startMinVectorSize = gMinVectorSize; + + // skip longs on embedded + if (!gHasLong + && (inType == klong || outType == klong || inType == kulong + || outType == kulong)) { - if( f >= -(double) LONG_MIN ) - return LONG_MAX; + return; } - else + + for (sat = (SaturationMode)0; sat < kSaturationModeCount; + sat = (SaturationMode)(sat + 1)) { - if( f >= LONG_MAX ) - return LONG_MAX; - } + // skip illegal saturated conversions to float type + if (kSaturated == sat && (outType == kfloat || outType == kdouble)) + { + continue; + } + + for (round = (RoundingMode)0; round < kRoundingModeCount; + round = (RoundingMode)(round + 1)) + { + if (++testNumber < gStartTestNumber) + { + continue; + } + else + { + if (gEndTestNumber > 0 && testNumber >= gEndTestNumber) return; + } - if( f <= (double) LONG_MIN ) - return LONG_MIN; + vlog("%d) Testing convert_%sn%s%s( %sn ):\n", testNumber, + gTypeNames[outType], gSaturationNames[sat], + gRoundingModeNames[round], gTypeNames[inType]); - // Round fractional values to integer in round towards nearest mode - if( fabs(f) < MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52) ) - { - volatile double x = f; - double magicVal = magic[ f < 0 ]; -#if defined( __SSE2__ ) || defined (_MSC_VER) - // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly - __m128d v = _mm_set_sd( x ); - __m128d m = _mm_set_sd( magicVal ); - v = _mm_add_sd( v, m ); - v = _mm_sub_sd( v, m ); - _mm_store_sd( (double*) &x, v ); -#else - x += magicVal; - x -= magicVal; -#endif - f = x; - } + // skip double if we don't have it + if (!gTestDouble && (inType == kdouble || outType == kdouble)) + { + if (gHasDouble) + { + vlog_error("\t *** %d) convert_%sn%s%s( %sn ) " + "FAILED ** \n", + testNumber, gTypeNames[outType], + gSaturationNames[sat], gRoundingModeNames[round], + gTypeNames[inType]); + vlog("\t\tcl_khr_fp64 enabled, but double " + "testing turned off.\n"); + } + continue; + } - return (long) f; + // Skip the implicit converts if the rounding mode is + // not default or test is saturated + if (0 == startMinVectorSize) + { + if (sat || round != kDefaultRoundingMode) + gMinVectorSize = 1; + else + gMinVectorSize = 0; + } + + if ((error = DoTest(outType, inType, + sat, round))) + { + vlog_error("\t *** %d) convert_%sn%s%s( %sn ) " + "FAILED ** \n", + testNumber, gTypeNames[outType], + gSaturationNames[sat], gRoundingModeNames[round], + gTypeNames[inType]); + } + } + } } -long long llrint_clamped( double f ); -long long llrint_clamped( double f ) + +template +int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat, + RoundingMode round) { - static const double magic[2] = { MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52), MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52) }; +#ifdef __APPLE__ + cl_ulong wall_start = mach_absolute_time(); +#endif - if( f >= -(double) LLONG_MIN ) - return LLONG_MAX; +#if 0 + uint64_t lastCase = 1ULL << (8 * gTypeSizes[inType]); +#else + cl_uint threads = GetThreadCount(); + uint64_t lastCase = 1000000ULL; +#endif - if( f <= (double) LLONG_MIN ) - return LLONG_MIN; + DataInitInfo info = { 0, 0, outType, inType, sat, round, threads }; + DataInfoSpec init_info(info); + WriteInputBufferInfo writeInputBufferInfo; + int vectorSize; + int error = 0; + uint64_t i; - // Round fractional values to integer in round towards nearest mode - if( fabs(f) < MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52) ) + gTestCount++; + size_t blockCount = + BUFFER_SIZE / std::max(gTypeSizes[inType], gTypeSizes[outType]); + size_t step = blockCount; + + for (i = 0; i < threads; i++) { - volatile double x = f; - double magicVal = magic[ f < 0 ]; -#if defined( __SSE2__ ) || defined (_MSC_VER) - // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly - __m128d v = _mm_set_sd( x ); - __m128d m = _mm_set_sd( magicVal ); - v = _mm_add_sd( v, m ); - v = _mm_sub_sd( v, m ); - _mm_store_sd( (double*) &x, v ); -#else - x += magicVal; - x -= magicVal; -#endif - f = x; + init_info.mdv.emplace_back(MTdataHolder(gRandomSeed)); } - return (long long) f; -} + writeInputBufferInfo.outType = outType; + writeInputBufferInfo.inType = inType; + writeInputBufferInfo.calcInfo.resize(gMaxVectorSize); + for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++) + { + writeInputBufferInfo.calcInfo[vectorSize].reset( + new CalcRefValsPat()); + writeInputBufferInfo.calcInfo[vectorSize]->program = + conv_test::MakeProgram( + outType, inType, sat, round, vectorSize, + &writeInputBufferInfo.calcInfo[vectorSize]->kernel); + if (NULL == writeInputBufferInfo.calcInfo[vectorSize]->program) + { + gFailCount++; + return -1; + } + if (NULL == writeInputBufferInfo.calcInfo[vectorSize]->kernel) + { + gFailCount++; + vlog_error("\t\tFAILED -- Failed to create kernel.\n"); + return -2; + } -/* - Names created as: - - #include + writeInputBufferInfo.calcInfo[vectorSize]->parent = + &writeInputBufferInfo; + writeInputBufferInfo.calcInfo[vectorSize]->vectorSize = vectorSize; + writeInputBufferInfo.calcInfo[vectorSize]->result = -1; + } - const char *names[] = { "uchar", "char", "ushort", "short", "uint", "int", "float", "double", "ulong", "long" }; + if (gSkipTesting) return error; - int main( void ) + // Patch up rounding mode if default is RTZ + // We leave the part above in default rounding mode so that the right kernel + // is compiled. + if (std::is_same::value) { + if (round == kDefaultRoundingMode && gIsRTZ) + init_info.round = round = kRoundTowardZero; + } - int i,j; +#if 0 + // Figure out how many elements are in a work block + // we handle 64-bit types a bit differently. + if (8 * gTypeSizes[inType] > 32) lastCase = 0x100000000ULL; +#endif - for( i = 0; i < sizeof( names ) / sizeof( names[0] ); i++ ) - for( j = 0; j < sizeof( names ) / sizeof( names[0] ); j++ ) - { - if( j == i ) - continue; + if (!gWimpyMode && gIsEmbedded) + step = blockCount * EMBEDDED_REDUCTION_FACTOR; - vlog( "void %s2%s( void *, void *);\n", names[i], names[j] ); - } + if (gWimpyMode) step = (size_t)blockCount * (size_t)gWimpyReductionFactor; + vlog("Testing... "); + fflush(stdout); + for (i = 0; i < (uint64_t)lastCase; i += step) + { + if (0 == (i & ((lastCase >> 3) - 1))) + { + vlog("."); + fflush(stdout); + } - return 0; - } -*/ - -static float my_fabsf( float x ); -static double my_fabs( double x ); - - - -static void uchar2char( void *, void *); -static void uchar2ushort( void *, void *); -static void uchar2short( void *, void *); -static void uchar2uint( void *, void *); -static void uchar2int( void *, void *); -static void uchar2float( void *, void *); -static void uchar2double( void *, void *); -static void uchar2ulong( void *, void *); -static void uchar2long( void *, void *); -static void char2uchar( void *, void *); -static void char2ushort( void *, void *); -static void char2short( void *, void *); -static void char2uint( void *, void *); -static void char2int( void *, void *); -static void char2float( void *, void *); -static void char2double( void *, void *); -static void char2ulong( void *, void *); -static void char2long( void *, void *); -static void ushort2uchar( void *, void *); -static void ushort2char( void *, void *); -static void ushort2short( void *, void *); -static void ushort2uint( void *, void *); -static void ushort2int( void *, void *); -static void ushort2float( void *, void *); -static void ushort2double( void *, void *); -static void ushort2ulong( void *, void *); -static void ushort2long( void *, void *); -static void short2uchar( void *, void *); -static void short2char( void *, void *); -static void short2ushort( void *, void *); -static void short2uint( void *, void *); -static void short2int( void *, void *); -static void short2float( void *, void *); -static void short2double( void *, void *); -static void short2ulong( void *, void *); -static void short2long( void *, void *); -static void uint2uchar( void *, void *); -static void uint2char( void *, void *); -static void uint2ushort( void *, void *); -static void uint2short( void *, void *); -static void uint2int( void *, void *); -static void uint2float( void *, void *); -static void uint2double( void *, void *); -static void uint2ulong( void *, void *); -static void uint2long( void *, void *); -static void int2uchar( void *, void *); -static void int2char( void *, void *); -static void int2ushort( void *, void *); -static void int2short( void *, void *); -static void int2uint( void *, void *); -static void int2float( void *, void *); -static void int2double( void *, void *); -static void int2ulong( void *, void *); -static void int2long( void *, void *); -static void float2uchar( void *, void *); -static void float2char( void *, void *); -static void float2ushort( void *, void *); -static void float2short( void *, void *); -static void float2uint( void *, void *); -static void float2int( void *, void *); -static void float2double( void *, void *); -static void float2ulong( void *, void *); -static void float2long( void *, void *); -static void double2uchar( void *, void *); -static void double2char( void *, void *); -static void double2ushort( void *, void *); -static void double2short( void *, void *); -static void double2uint( void *, void *); -static void double2int( void *, void *); -static void double2float( void *, void *); -static void double2ulong( void *, void *); -static void double2long( void *, void *); -static void ulong2uchar( void *, void *); -static void ulong2char( void *, void *); -static void ulong2ushort( void *, void *); -static void ulong2short( void *, void *); -static void ulong2uint( void *, void *); -static void ulong2int( void *, void *); -static void ulong2float( void *, void *); -static void ulong2double( void *, void *); -static void ulong2long( void *, void *); -static void long2uchar( void *, void *); -static void long2char( void *, void *); -static void long2ushort( void *, void *); -static void long2short( void *, void *); -static void long2uint( void *, void *); -static void long2int( void *, void *); -static void long2float( void *, void *); -static void long2double( void *, void *); -static void long2ulong( void *, void *); - -/* - Conversion list created as - - #include - - const char *names[] = { "uchar", "char", "ushort", "short", "uint", "int", "float", "double", "ulong", "long" }; - - int main( void ) - { + cl_uint count = (uint32_t)std::min((uint64_t)blockCount, lastCase - i); + writeInputBufferInfo.count = count; - int i,j; + // Crate a user event to represent the status of the reference value + // computation completion + writeInputBufferInfo.calcReferenceValues = + clCreateUserEvent(gContext, &error); + if (error || NULL == writeInputBufferInfo.calcReferenceValues) + { + vlog_error("ERROR: Unable to create user event. (%d)\n", error); + gFailCount++; + return error; + } - for( i = 0; i < sizeof( names ) / sizeof( names[0] ); i++ ) + // retain for consumption by MapOutputBufferComplete + for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; + vectorSize++) { - vlog( "{ " ); - for( j = 0; j < sizeof( names ) / sizeof( names[0] ); j++ ) + if ((error = + clRetainEvent(writeInputBufferInfo.calcReferenceValues))) { - if( j == i ) - vlog( " NULL, " ); - else - { - char s[64]; - sprintf( s, "%s2%s,", names[j], names[i] ); - vlog( "%15s ", s ); - } + vlog_error("ERROR: Unable to retain user event. (%d)\n", error); + gFailCount++; + return error; } - vlog( "},\n" ); } - return 0; - } - - */ -/* -Convert gConversions[kTypeCount][kTypeCount] = { -{ NULL, char2uchar, ushort2uchar, short2uchar, uint2uchar, int2uchar, float2uchar, double2uchar, ulong2uchar, long2uchar, }, -{ uchar2char, NULL, ushort2char, short2char, uint2char, int2char, float2char, double2char, ulong2char, long2char, }, -{ uchar2ushort, char2ushort, NULL, short2ushort, uint2ushort, int2ushort, float2ushort, double2ushort, ulong2ushort, long2ushort, }, -{ uchar2short, char2short, ushort2short, NULL, uint2short, int2short, float2short, double2short, ulong2short, long2short, }, -{ uchar2uint, char2uint, ushort2uint, short2uint, NULL, int2uint, float2uint, double2uint, ulong2uint, long2uint, }, -{ uchar2int, char2int, ushort2int, short2int, uint2int, NULL, float2int, double2int, ulong2int, long2int, }, -{ uchar2float, char2float, ushort2float, short2float, uint2float, int2float, NULL, double2float, ulong2float, long2float, }, -{ uchar2double, char2double, ushort2double, short2double, uint2double, int2double, float2double, NULL, ulong2double, long2double, }, -{ uchar2ulong, char2ulong, ushort2ulong, short2ulong, uint2ulong, int2ulong, float2ulong, double2ulong, NULL, long2ulong, }, -{ uchar2long, char2long, ushort2long, short2long, uint2long, int2long, float2long, double2long, ulong2long, NULL, } }; -*/ - -static void uchar2char_sat( void *, void *); -static void uchar2ushort_sat( void *, void *); -static void uchar2short_sat( void *, void *); -static void uchar2uint_sat( void *, void *); -static void uchar2int_sat( void *, void *); -static void uchar2float_sat( void *, void *); -static void uchar2double_sat( void *, void *); -static void uchar2ulong_sat( void *, void *); -static void uchar2long_sat( void *, void *); -static void char2uchar_sat( void *, void *); -static void char2ushort_sat( void *, void *); -static void char2short_sat( void *, void *); -static void char2uint_sat( void *, void *); -static void char2int_sat( void *, void *); -static void char2float_sat( void *, void *); -static void char2double_sat( void *, void *); -static void char2ulong_sat( void *, void *); -static void char2long_sat( void *, void *); -static void ushort2uchar_sat( void *, void *); -static void ushort2char_sat( void *, void *); -static void ushort2short_sat( void *, void *); -static void ushort2uint_sat( void *, void *); -static void ushort2int_sat( void *, void *); -static void ushort2float_sat( void *, void *); -static void ushort2double_sat( void *, void *); -static void ushort2ulong_sat( void *, void *); -static void ushort2long_sat( void *, void *); -static void short2uchar_sat( void *, void *); -static void short2char_sat( void *, void *); -static void short2ushort_sat( void *, void *); -static void short2uint_sat( void *, void *); -static void short2int_sat( void *, void *); -static void short2float_sat( void *, void *); -static void short2double_sat( void *, void *); -static void short2ulong_sat( void *, void *); -static void short2long_sat( void *, void *); -static void uint2uchar_sat( void *, void *); -static void uint2char_sat( void *, void *); -static void uint2ushort_sat( void *, void *); -static void uint2short_sat( void *, void *); -static void uint2int_sat( void *, void *); -static void uint2float_sat( void *, void *); -static void uint2double_sat( void *, void *); -static void uint2ulong_sat( void *, void *); -static void uint2long_sat( void *, void *); -static void int2uchar_sat( void *, void *); -static void int2char_sat( void *, void *); -static void int2ushort_sat( void *, void *); -static void int2short_sat( void *, void *); -static void int2uint_sat( void *, void *); -static void int2float_sat( void *, void *); -static void int2double_sat( void *, void *); -static void int2ulong_sat( void *, void *); -static void int2long_sat( void *, void *); -static void float2uchar_sat( void *, void *); -static void float2char_sat( void *, void *); -static void float2ushort_sat( void *, void *); -static void float2short_sat( void *, void *); -static void float2uint_sat( void *, void *); -static void float2int_sat( void *, void *); -static void float2double_sat( void *, void *); -static void float2ulong_sat( void *, void *); -static void float2long_sat( void *, void *); -static void double2uchar_sat( void *, void *); -static void double2char_sat( void *, void *); -static void double2ushort_sat( void *, void *); -static void double2short_sat( void *, void *); -static void double2uint_sat( void *, void *); -static void double2int_sat( void *, void *); -static void double2float_sat( void *, void *); -static void double2ulong_sat( void *, void *); -static void double2long_sat( void *, void *); -static void ulong2uchar_sat( void *, void *); -static void ulong2char_sat( void *, void *); -static void ulong2ushort_sat( void *, void *); -static void ulong2short_sat( void *, void *); -static void ulong2uint_sat( void *, void *); -static void ulong2int_sat( void *, void *); -static void ulong2float_sat( void *, void *); -static void ulong2double_sat( void *, void *); -static void ulong2long_sat( void *, void *); -static void long2uchar_sat( void *, void *); -static void long2char_sat( void *, void *); -static void long2ushort_sat( void *, void *); -static void long2short_sat( void *, void *); -static void long2uint_sat( void *, void *); -static void long2int_sat( void *, void *); -static void long2float_sat( void *, void *); -static void long2double_sat( void *, void *); -static void long2ulong_sat( void *, void *); -/* - #include - - const char *names[] = { "uchar", "char", "ushort", "short", "uint", "int", "float", "double", "ulong", "long" }; - - int main( void ) - { + // Crate a user event to represent when the callbacks are done verifying + // correctness + writeInputBufferInfo.doneBarrier = clCreateUserEvent(gContext, &error); + if (error || NULL == writeInputBufferInfo.doneBarrier) + { + vlog_error("ERROR: Unable to create user event for barrier. (%d)\n", + error); + gFailCount++; + return error; + } - int i,j; + // retain for use by the callback that calls this + if ((error = clRetainEvent(writeInputBufferInfo.doneBarrier))) + { + vlog_error("ERROR: Unable to retain user event doneBarrier. (%d)\n", + error); + gFailCount++; + return error; + } - for( i = 0; i < sizeof( names ) / sizeof( names[0] ); i++ ) + // Call this in a multithreaded manner + cl_uint chunks = RoundUpToNextPowerOfTwo(threads) * 2; + init_info.start = i; + init_info.size = count / chunks; + if (init_info.size < 16384) { - vlog( "{ " ); - for( j = 0; j < sizeof( names ) / sizeof( names[0] ); j++ ) + chunks = RoundUpToNextPowerOfTwo(threads); + init_info.size = count / chunks; + if (init_info.size < 16384) { - if( j == i ) - vlog( " NULL, " ); - else - { - char s[64]; - sprintf( s, "%s2%s_sat,", names[j], names[i] ); - vlog( "%18s ", s ); - } + init_info.size = count; + chunks = 1; } - vlog( "},\n" ); } - return 0; - } + ThreadPool_Do(conv_test::InitData, chunks, &init_info); -Convert gSaturatedConversions[kTypeCount][kTypeCount] = { -{ NULL, char2uchar_sat, ushort2uchar_sat, short2uchar_sat, uint2uchar_sat, int2uchar_sat, float2uchar_sat, double2uchar_sat, ulong2uchar_sat, long2uchar_sat, }, -{ uchar2char_sat, NULL, ushort2char_sat, short2char_sat, uint2char_sat, int2char_sat, float2char_sat, double2char_sat, ulong2char_sat, long2char_sat, }, -{ uchar2ushort_sat, char2ushort_sat, NULL, short2ushort_sat, uint2ushort_sat, int2ushort_sat, float2ushort_sat, double2ushort_sat, ulong2ushort_sat, long2ushort_sat, }, -{ uchar2short_sat, char2short_sat, ushort2short_sat, NULL, uint2short_sat, int2short_sat, float2short_sat, double2short_sat, ulong2short_sat, long2short_sat, }, -{ uchar2uint_sat, char2uint_sat, ushort2uint_sat, short2uint_sat, NULL, int2uint_sat, float2uint_sat, double2uint_sat, ulong2uint_sat, long2uint_sat, }, -{ uchar2int_sat, char2int_sat, ushort2int_sat, short2int_sat, uint2int_sat, NULL, float2int_sat, double2int_sat, ulong2int_sat, long2int_sat, }, -{ uchar2float_sat, char2float_sat, ushort2float_sat, short2float_sat, uint2float_sat, int2float_sat, NULL, double2float_sat, ulong2float_sat, long2float_sat, }, -{ uchar2double_sat, char2double_sat, ushort2double_sat, short2double_sat, uint2double_sat, int2double_sat, float2double_sat, NULL, ulong2double_sat, long2double_sat, }, -{ uchar2ulong_sat, char2ulong_sat, ushort2ulong_sat, short2ulong_sat, uint2ulong_sat, int2ulong_sat, float2ulong_sat, double2ulong_sat, NULL, long2ulong_sat, }, -{ uchar2long_sat, char2long_sat, ushort2long_sat, short2long_sat, uint2long_sat, int2long_sat, float2long_sat, double2long_sat, ulong2long_sat, NULL, } -}; -*/ + // Copy the results to the device + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_TRUE, 0, + count * gTypeSizes[inType], gIn, 0, + NULL, NULL))) + { + vlog_error("ERROR: clEnqueueWriteBuffer failed. (%d)\n", error); + gFailCount++; + return error; + } -/* - #include + // Call completion callback for the write, which will enqueue the rest + // of the work. + conv_test::WriteInputBufferComplete((void *)&writeInputBufferInfo); - const char *names[] = { "uchar", "char", "ushort", "short", "uint", "int", "float", "double", "ulong", "long" }; - const char *types[] = { "uchar", "char", "ushort", "short", "uint", "int", "float", "double", "ulong", "llong" }; + // Make sure the work is actually running, so we don't deadlock + if ((error = clFlush(gQueue))) + { + vlog_error("clFlush failed with error %d\n", error); + gFailCount++; + return error; + } - int main( void ) - { + ThreadPool_Do(conv_test::PrepareReference, chunks, &init_info); - int i,j; + // signal we are done calculating the reference results + if ((error = clSetUserEventStatus( + writeInputBufferInfo.calcReferenceValues, CL_COMPLETE))) + { + vlog_error( + "Error: Failed to set user event status to CL_COMPLETE: %d\n", + error); + gFailCount++; + return error; + } - for( i = 0; i < sizeof( names ) / sizeof( names[0] ); i++ ) - for( j = 0; j < sizeof( names ) / sizeof( names[0] ); j++ ) - { - if( j == i ) - continue; + // Wait for the event callbacks to finish verifying correctness. + if ((error = clWaitForEvents( + 1, (cl_event *)&writeInputBufferInfo.doneBarrier))) + { + vlog_error("Error: Failed to wait for barrier: %d\n", error); + gFailCount++; + return error; + } + + if ((error = clReleaseEvent(writeInputBufferInfo.calcReferenceValues))) + { + vlog_error("Error: Failed to release calcReferenceValues: %d\n", + error); + gFailCount++; + return error; + } + + if ((error = clReleaseEvent(writeInputBufferInfo.doneBarrier))) + { + vlog_error("Error: Failed to release done barrier: %d\n", error); + gFailCount++; + return error; + } - switch( i ) + for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; + vectorSize++) + { + if ((error = writeInputBufferInfo.calcInfo[vectorSize]->result)) + { + switch (inType) { - case 6: //float - if( j == 7 ) - vlog( "void %s2%s( void *out, void *in){ ((%s*) out)[0] = (%s) ((%s*) in)[0]; }\n", names[i], names[i], names[j], types[j], types[i] ); - else - vlog( "void %s2%s( void *out, void *in){ ((%s*) out)[0] = (%s) my_rintf(((%s*) in)[0]); }\n", names[i], names[i], names[j], types[j], types[i] ); + case kuchar: + case kchar: + vlog("Input value: 0x%2.2x ", + ((unsigned char *)gIn)[error - 1]); + break; + case kushort: + case kshort: + vlog("Input value: 0x%4.4x ", + ((unsigned short *)gIn)[error - 1]); break; - case 7: //double - if( j == 6 ) - vlog( "void %s2%s( void *out, void *in){ ((%s*) out)[0] = (%s) ((%s*) in)[0]; }\n", names[i], names[i], names[j], types[j], types[i] ); - else - vlog( "void %s2%s( void *out, void *in){ ((%s*) out)[0] = (%s) rint(((%s*) in)[0]); }\n", names[i], names[i], names[j], types[j], types[i] ); + case kuint: + case kint: + vlog("Input value: 0x%8.8x ", + ((unsigned int *)gIn)[error - 1]); + break; + case kfloat: + vlog("Input value: %a ", ((float *)gIn)[error - 1]); + break; + case kulong: + case klong: + vlog("Input value: 0x%16.16llx ", + ((unsigned long long *)gIn)[error - 1]); + break; + case kdouble: + vlog("Input value: %a ", ((double *)gIn)[error - 1]); break; default: - vlog( "void %s2%s( void *out, void *in){ ((%s*) out)[0] = (%s) - ((%s*) in)[0]; }\n", names[i], names[i], names[j], types[j], types[i] ); + vlog_error("Internal error at %s: %d\n", __FILE__, + __LINE__); + abort(); break; } - } + // tell the user which conversion it was. + if (0 == vectorSize) + vlog(" (implicit scalar conversion from %s to %s)\n", + gTypeNames[inType], gTypeNames[outType]); + else + vlog(" (convert_%s%s%s%s( %s%s ))\n", gTypeNames[outType], + sizeNames[vectorSize], gSaturationNames[sat], + gRoundingModeNames[round], gTypeNames[inType], + sizeNames[vectorSize]); - return 0; + gFailCount++; + return error; + } + } } -*/ -float my_fabsf( float x ) -{ - union{ cl_uint u; float f; }u; - u.f = x; - u.u &= 0x7fffffff; - return u.f; -} + log_info("done.\n"); -double my_fabs( double x ) -{ - union{ cl_ulong u; double f; }u; - u.f = x; - u.u &= 0x7fffffffffffffffULL; - return u.f; -} + if (gTimeResults) + { + // Kick off tests for the various vector lengths + for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; + vectorSize++) + { + size_t workItemCount = blockCount / vectorSizes[vectorSize]; + if (vectorSizes[vectorSize] * gTypeSizes[outType] < 4) + workItemCount /= + 4 / (vectorSizes[vectorSize] * gTypeSizes[outType]); + + double sum = 0.0; + double bestTime = INFINITY; + cl_uint k; + for (k = 0; k < PERF_LOOP_COUNT; k++) + { + uint64_t startTime = conv_test::GetTime(); + if ((error = conv_test::RunKernel( + writeInputBufferInfo.calcInfo[vectorSize]->kernel, + gInBuffer, gOutBuffers[vectorSize], workItemCount))) + { + gFailCount++; + return error; + } -static float my_rintf( float f ); -static float my_rintf( float f ) -{ - static const float magic[2] = { MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23), - MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23) }; + // Make sure OpenCL is done + if ((error = clFinish(gQueue))) + { + vlog_error("Error %d at clFinish\n", error); + return error; + } - // Round fractional values to integer in round towards nearest mode - if( fabsf(f) < MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23 ) ) - { - volatile float x = f; - float magicVal = magic[ f < 0 ]; - -#if defined( __SSE__ ) - // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly - __m128 v = _mm_set_ss( x ); - __m128 m = _mm_set_ss( magicVal ); - v = _mm_add_ss( v, m ); - v = _mm_sub_ss( v, m ); - _mm_store_ss( (float*) &x, v ); -#else - x += magicVal; - x -= magicVal; -#endif - f = x; + uint64_t endTime = conv_test::GetTime(); + double time = SubtractTime(endTime, startTime); + sum += time; + if (time < bestTime) bestTime = time; + } + + if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT; + double clocksPerOp = bestTime * (double)gDeviceFrequency + * gComputeDevices * gSimdSize * 1e6 + / (workItemCount * vectorSizes[vectorSize]); + if (0 == vectorSize) + vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", + "implicit convert %s -> %s", gTypeNames[inType], + gTypeNames[outType]); + else + vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", + "convert_%s%s%s%s( %s%s )", gTypeNames[outType], + sizeNames[vectorSize], gSaturationNames[sat], + gRoundingModeNames[round], gTypeNames[inType], + sizeNames[vectorSize]); + } } - return f; -} + if (gWimpyMode) + vlog("\tWimp pass"); + else + vlog("\tpassed"); -static void uchar2char( void *out, void *in){ ((char*) out)[0] = ((cl_uchar*) in)[0]; } -static void uchar2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_uchar*) in)[0]; } -static void uchar2short( void *out, void *in){ ((short*) out)[0] = ((cl_uchar*) in)[0]; } -static void uchar2uint( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_uchar*) in)[0]; } -static void uchar2int( void *out, void *in){ ((int*) out)[0] = ((cl_uchar*) in)[0]; } -static void uchar2float( void *out, void *in) -{ - cl_uchar l = ((cl_uchar*) in)[0]; - ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void uchar2double( void *out, void *in) -{ - cl_uchar l = ((cl_uchar*) in)[0]; - ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void uchar2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_uchar*) in)[0]; } -static void uchar2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_uchar*) in)[0]; } -static void char2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = ((cl_char*) in)[0]; } -static void char2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_char*) in)[0]; } -static void char2short( void *out, void *in){ ((short*) out)[0] = ((cl_char*) in)[0]; } -static void char2uint( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_char*) in)[0]; } -static void char2int( void *out, void *in){ ((int*) out)[0] = ((cl_char*) in)[0]; } -static void char2float( void *out, void *in) -{ - cl_char l = ((cl_char*) in)[0]; - ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void char2double( void *out, void *in) -{ - cl_char l = ((cl_char*) in)[0]; - ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void char2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_char*) in)[0]; } -static void char2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_char*) in)[0]; } -static void ushort2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = ((cl_ushort*) in)[0]; } -static void ushort2char( void *out, void *in){ ((char*) out)[0] = ((cl_ushort*) in)[0]; } -static void ushort2short( void *out, void *in){ ((short*) out)[0] = ((cl_ushort*) in)[0]; } -static void ushort2uint( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_ushort*) in)[0]; } -static void ushort2int( void *out, void *in){ ((int*) out)[0] = ((cl_ushort*) in)[0]; } -static void ushort2float( void *out, void *in) -{ - cl_ushort l = ((cl_ushort*) in)[0]; - ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void ushort2double( void *out, void *in) -{ - cl_ushort l = ((cl_ushort*) in)[0]; - ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void ushort2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_ushort*) in)[0]; } -static void ushort2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_ushort*) in)[0]; } -static void short2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = ((cl_short*) in)[0]; } -static void short2char( void *out, void *in){ ((cl_char*) out)[0] = ((cl_short*) in)[0]; } -static void short2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_short*) in)[0]; } -static void short2uint( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_short*) in)[0]; } -static void short2int( void *out, void *in){ ((cl_int*) out)[0] = ((cl_short*) in)[0]; } -static void short2float( void *out, void *in) -{ - cl_short l = ((cl_short*) in)[0]; - ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void short2double( void *out, void *in) -{ - cl_short l = ((cl_short*) in)[0]; - ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void short2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_short*) in)[0]; } -static void short2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_short*) in)[0]; } -static void uint2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = ((cl_uint*) in)[0]; } -static void uint2char( void *out, void *in){ ((cl_char*) out)[0] = ((cl_uint*) in)[0]; } -static void uint2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_uint*) in)[0]; } -static void uint2short( void *out, void *in){ ((short*) out)[0] = ((cl_uint*) in)[0]; } -static void uint2int( void *out, void *in){ ((cl_int*) out)[0] = ((cl_uint*) in)[0]; } -static void uint2float( void *out, void *in) -{ - // Use volatile to prevent optimization by Clang compiler - volatile cl_uint l = ((cl_uint *)in)[0]; - ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void uint2double( void *out, void *in) -{ - cl_uint l = ((cl_uint*) in)[0]; - ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void uint2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_uint*) in)[0]; } -static void uint2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_uint*) in)[0]; } -static void int2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = ((cl_int*) in)[0]; } -static void int2char( void *out, void *in){ ((cl_char*) out)[0] = ((cl_int*) in)[0]; } -static void int2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_int*) in)[0]; } -static void int2short( void *out, void *in){ ((cl_short*) out)[0] = ((cl_int*) in)[0]; } -static void int2uint( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_int*) in)[0]; } -static void int2float( void *out, void *in) -{ - // Use volatile to prevent optimization by Clang compiler - volatile cl_int l = ((cl_int *)in)[0]; - ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void int2double( void *out, void *in) -{ - cl_int l = ((cl_int*) in)[0]; - ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void int2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_int*) in)[0]; } -static void int2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_int*) in)[0]; } -static void float2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = my_rintf(((cl_float*) in)[0]); } -static void float2char( void *out, void *in){ ((cl_char*) out)[0] = my_rintf(((cl_float*) in)[0]); } -static void float2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = my_rintf(((cl_float*) in)[0]); } -static void float2short( void *out, void *in){ ((cl_short*) out)[0] = my_rintf(((cl_float*) in)[0]); } -static void float2uint( void *out, void *in){ ((cl_uint*) out)[0] = my_rintf(((cl_float*) in)[0]); } -static void float2int( void *out, void *in){ ((cl_int*) out)[0] = my_rintf(((cl_float*) in)[0]); } -static void float2double( void *out, void *in){ ((cl_double*) out)[0] = ((cl_float*) in)[0]; } -static void float2ulong( void *out, void *in) -{ -#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) - // VS2005 (at least) on x86 uses fistp to store the float as a 64-bit int. - // However, fistp stores it as a signed int, and some of the test values won't - // fit into a signed int. (These test values are >= 2^63.) The result on VS2005 - // is that these end up silently (at least by default settings) clamped to - // the max lowest ulong. - cl_float x = my_rintf(((cl_float *)in)[0]); - if (x >= 9223372036854775808.0f) { - x -= 9223372036854775808.0f; - ((cl_ulong*) out)[0] = x; - ((cl_ulong*) out)[0] += 9223372036854775808ULL; - } else { - ((cl_ulong*) out)[0] = x; - } -#else - ((cl_ulong*) out)[0] = my_rintf(((cl_float*) in)[0]); +#ifdef __APPLE__ + // record the run time + vlog("\t(%f s)", 1e-9 * (mach_absolute_time() - wall_start)); #endif -} + vlog("\n\n"); + fflush(stdout); -static void float2long( void *out, void *in){ ((cl_long*) out)[0] = llrint_clamped( ((cl_float*) in)[0] ); } -static void double2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = rint(((cl_double*) in)[0]); } -static void double2char( void *out, void *in){ ((cl_char*) out)[0] = rint(((cl_double*) in)[0]); } -static void double2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = rint(((cl_double*) in)[0]); } -static void double2short( void *out, void *in){ ((cl_short*) out)[0] = rint(((cl_double*) in)[0]); } -static void double2uint( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) rint(((cl_double*) in)[0]); } -static void double2int( void *out, void *in){ ((cl_int*) out)[0] = (int) rint(((cl_double*) in)[0]); } -static void double2float( void *out, void *in){ ((cl_float*) out)[0] = (float) ((cl_double*) in)[0]; } -static void double2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = (cl_ulong) rint(((cl_double*) in)[0]); } -static void double2long( void *out, void *in){ ((cl_long*) out)[0] = (cl_long) rint(((cl_double*) in)[0]); } -static void ulong2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = (cl_uchar) ((cl_ulong*) in)[0]; } -static void ulong2char( void *out, void *in){ ((cl_char*) out)[0] = (cl_char) ((cl_ulong*) in)[0]; } -static void ulong2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = (cl_ushort) ((cl_ulong*) in)[0]; } -static void ulong2short( void *out, void *in){ ((cl_short*) out)[0] = (cl_short)((cl_ulong*) in)[0]; } -static void ulong2uint( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) ((cl_ulong*) in)[0]; } -static void ulong2int( void *out, void *in){ ((cl_int*) out)[0] = (cl_int) ((cl_ulong*) in)[0]; } -static void ulong2float( void *out, void *in) -{ -#if defined(_MSC_VER) && defined(_M_X64) - cl_ulong l = ((cl_ulong*) in)[0]; - float result; - cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1)) : (cl_long)l; - _mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), sl)); - ((float*) out)[0] = (l == 0 ? 0.0f : (((cl_long)l < 0) ? result * 2.0f : result)); -#else - cl_ulong l = ((cl_ulong*) in)[0]; -#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) - /* ARM VFP doesn't have hardware instruction for converting from 64-bit - * integer to float types, hence GCC ARM uses the floating-point emulation - * code despite which -mfloat-abi setting it is. But the emulation code in - * libgcc.a has only one rounding mode (round to nearest even in this case) - * and ignores the user rounding mode setting in hardware. - * As a result setting rounding modes in hardware won't give correct - * rounding results for type covert from 64-bit integer to float using GCC - * for ARM compiler so for testing different rounding modes, we need to use - * alternative reference function. ARM64 does have an instruction, however - * we cannot guarantee the compiler will use it. On all ARM architechures - * use emulation to calculate reference.*/ - ((float*) out)[0] = qcom_u64_2_f32(l, qcom_sat, qcom_rm); -#else - ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -#endif -#endif + return error; } -static void ulong2double( void *out, void *in) -{ -#if defined(_MSC_VER) - cl_ulong l = ((cl_ulong*) in)[0]; - double result; - cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1)) : (cl_long)l; -#if defined(_M_X64) - _mm_store_sd(&result, _mm_cvtsi64_sd(_mm_setzero_pd(), sl)); -#else - result = sl; -#endif - ((double*) out)[0] = (l == 0 ? 0.0 : (((cl_long)l < 0) ? result * 2.0 : result)); -#else - // Use volatile to prevent optimization by Clang compiler - volatile cl_ulong l = ((cl_ulong *)in)[0]; - ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 +#if !defined(__APPLE__) +void memset_pattern4(void *dest, const void *src_pattern, size_t bytes); #endif -} -static void ulong2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_ulong*) in)[0]; } -static void long2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = (cl_uchar) ((cl_long*) in)[0]; } -static void long2char( void *out, void *in){ ((cl_char*) out)[0] = (cl_char) ((cl_long*) in)[0]; } -static void long2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = (cl_ushort) ((cl_long*) in)[0]; } -static void long2short( void *out, void *in){ ((cl_short*) out)[0] = (cl_short) ((cl_long*) in)[0]; } -static void long2uint( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) ((cl_long*) in)[0]; } -static void long2int( void *out, void *in){ ((cl_int*) out)[0] = (cl_int) ((cl_long*) in)[0]; } -static void long2float( void *out, void *in) -{ -#if defined(_MSC_VER) && defined(_M_X64) - cl_long l = ((cl_long*) in)[0]; - float result; - _mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), l)); - ((float*) out)[0] = (l == 0 ? 0.0f : result); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -#else - cl_long l = ((cl_long*) in)[0]; -#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) - /* ARM VFP doesn't have hardware instruction for converting from 64-bit - * integer to float types, hence GCC ARM uses the floating-point emulation - * code despite which -mfloat-abi setting it is. But the emulation code in - * libgcc.a has only one rounding mode (round to nearest even in this case) - * and ignores the user rounding mode setting in hardware. - * As a result setting rounding modes in hardware won't give correct - * rounding results for type covert from 64-bit integer to float using GCC - * for ARM compiler so for testing different rounding modes, we need to use - * alternative reference function. ARM64 does have an instruction, however - * we cannot guarantee the compiler will use it. On all ARM architechures - * use emulation to calculate reference.*/ - ((float*) out)[0] = (l == 0 ? 0.0f : qcom_s64_2_f32(l, qcom_sat, qcom_rm)); +#if defined(_MSC_VER) +/* function is defined in "compat.h" */ #else - ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -#endif -#endif -} -static void long2double( void *out, void *in) +double SubtractTime(uint64_t endTime, uint64_t startTime) { -#if defined(_MSC_VER) && defined(_M_X64) - cl_long l = ((cl_long*) in)[0]; - double result; + uint64_t diff = endTime - startTime; + static double conversion = 0.0; - _mm_store_sd(&result, _mm_cvtsi64_sd(_mm_setzero_pd(), l)); - ((double*) out)[0] = (l == 0 ? 0.0 : result); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 + if (0.0 == conversion) + { +#if defined(__APPLE__) + mach_timebase_info_data_t info = { 0, 0 }; + kern_return_t err = mach_timebase_info(&info); + if (0 == err) + conversion = 1e-9 * (double)info.numer / (double)info.denom; #else - cl_long l = ((cl_long*) in)[0]; - ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 + // This function consumes output from GetTime() above, and converts the + // time to secionds. +#warning need accurate ticks to seconds conversion factor here. Times are invalid. #endif -} -static void long2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_long*) in)[0]; } - -#define CLAMP( _lo, _x, _hi ) ( (_x) < (_lo) ? (_lo) : ((_x) > (_hi) ? (_hi) : (_x))) - -// Done by hand -static void uchar2char_sat( void *out, void *in){ cl_uchar c = ((cl_uchar*) in)[0]; ((cl_char*) out)[0] = c > 0x7f ? 0x7f : c; } -static void uchar2ushort_sat( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_uchar*) in)[0]; } -static void uchar2short_sat( void *out, void *in){ ((cl_short*) out)[0] = ((cl_uchar*) in)[0]; } -static void uchar2uint_sat( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_uchar*) in)[0]; } -static void uchar2int_sat( void *out, void *in){ ((cl_int*) out)[0] = ((cl_uchar*) in)[0]; } -static void uchar2float_sat( void *out, void *in){ ((cl_float*) out)[0] = my_fabsf( (cl_float) ((cl_uchar*) in)[0]); } // my_fabs workaround for -static void uchar2double_sat( void *out, void *in){ ((cl_double*) out)[0] = my_fabs( (cl_double) ((cl_uchar*) in)[0]); } // my_fabs workaround for -static void uchar2ulong_sat( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_uchar*) in)[0]; } -static void uchar2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_uchar*) in)[0]; } -static void char2uchar_sat( void *out, void *in){ cl_char c = ((cl_char*) in)[0]; ((cl_uchar*) out)[0] = c < 0 ? 0 : c; } -static void char2ushort_sat( void *out, void *in){ cl_char c = ((cl_char*) in)[0]; ((cl_ushort*) out)[0] = c < 0 ? 0 : c; } -static void char2short_sat( void *out, void *in){ ((cl_short*) out)[0] = ((cl_char*) in)[0]; } -static void char2uint_sat( void *out, void *in){ cl_char c = ((cl_char*) in)[0]; ((cl_uint*) out)[0] = c < 0 ? 0 : c; } -static void char2int_sat( void *out, void *in){ ((cl_int*) out)[0] = ((cl_char*) in)[0]; } -static void char2float_sat( void *out, void *in){ ((cl_float*) out)[0] = ((cl_char*) in)[0]; } -static void char2double_sat( void *out, void *in){ ((cl_double*) out)[0] = ((cl_char*) in)[0]; } -static void char2ulong_sat( void *out, void *in){ cl_char c = ((cl_char*) in)[0]; ((cl_ulong*) out)[0] = c < 0 ? 0 : c; } -static void char2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_char*) in)[0]; } -static void ushort2uchar_sat( void *out, void *in){ cl_ushort u = ((cl_ushort*) in)[0]; ((cl_uchar*) out)[0] = u > 0xff ? 0xFF : u; } -static void ushort2char_sat( void *out, void *in){ cl_ushort u = ((cl_ushort*) in)[0]; ((cl_char*) out)[0] = u > 0x7f ? 0x7F : u; } -static void ushort2short_sat( void *out, void *in){ cl_ushort u = ((cl_ushort*) in)[0]; ((cl_short*) out)[0] = u > 0x7fff ? 0x7fFF : u; } -static void ushort2uint_sat( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_ushort*) in)[0]; } -static void ushort2int_sat( void *out, void *in){ ((cl_int*) out)[0] = ((cl_ushort*) in)[0]; } -static void ushort2float_sat( void *out, void *in){ ((cl_float*) out)[0] = my_fabsf((cl_float)((cl_ushort*) in)[0]); } // my_fabs workaround for -static void ushort2double_sat( void *out, void *in){ ((cl_double*) out)[0] = my_fabs( (cl_double) ((cl_ushort*) in)[0]); } // my_fabs workaround for -static void ushort2ulong_sat( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_ushort*) in)[0]; } -static void ushort2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_ushort*) in)[0]; } -static void short2uchar_sat( void *out, void *in){ cl_short s = ((cl_short*) in)[0]; ((cl_uchar*) out)[0] = CLAMP( 0, s, CL_UCHAR_MAX ); } -static void short2char_sat( void *out, void *in){ cl_short s = ((cl_short*) in)[0]; ((cl_char*) out)[0] = CLAMP( CL_CHAR_MIN, s, CL_CHAR_MAX ); } -static void short2ushort_sat( void *out, void *in){ cl_short s = ((cl_short*) in)[0]; ((cl_ushort*) out)[0] = s < 0 ? 0 : s; } -static void short2uint_sat( void *out, void *in){ cl_short s = ((cl_short*) in)[0]; ((cl_uint*) out)[0] = s < 0 ? 0 : s; } -static void short2int_sat( void *out, void *in){ ((cl_int*) out)[0] = ((cl_short*) in)[0]; } -static void short2float_sat( void *out, void *in){ ((cl_float*) out)[0] = ((cl_short*) in)[0]; } -static void short2double_sat( void *out, void *in){ ((cl_double*) out)[0] = ((cl_short*) in)[0]; } -static void short2ulong_sat( void *out, void *in){ cl_short s = ((cl_short*) in)[0]; ((cl_ulong*) out)[0] = s < 0 ? 0 : s; } -static void short2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_short*) in)[0]; } -static void uint2uchar_sat( void *out, void *in){ cl_uint u = ((cl_uint*) in)[0]; ((cl_uchar*) out)[0] = CLAMP( 0, u, CL_UCHAR_MAX); } -static void uint2char_sat( void *out, void *in){ cl_uint u = ((cl_uint*) in)[0]; ((cl_char*) out)[0] = CLAMP( 0, u, CL_CHAR_MAX ); } -static void uint2ushort_sat( void *out, void *in){ cl_uint u = ((cl_uint*) in)[0]; ((cl_ushort*) out)[0] = CLAMP( 0, u, CL_USHRT_MAX); } -static void uint2short_sat( void *out, void *in){ cl_uint u = ((cl_uint*) in)[0]; ((cl_short*) out)[0] = CLAMP( 0, u, CL_SHRT_MAX); } -static void uint2int_sat( void *out, void *in){ cl_uint u = ((cl_uint*) in)[0]; ((cl_int*) out)[0] = CLAMP( 0, u, CL_INT_MAX); } -static void uint2float_sat( void *out, void *in){ ((cl_float*) out)[0] = my_fabsf( (cl_float) ((cl_uint*) in)[0] ); } // my_fabs workaround for -static void uint2double_sat( void *out, void *in){ ((cl_double*) out)[0] = my_fabs( (cl_double) ((cl_uint*) in)[0]); } // my_fabs workaround for -static void uint2ulong_sat( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_uint*) in)[0]; } -static void uint2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_uint*) in)[0]; } -static void int2uchar_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_uchar*) out)[0] = CLAMP( 0, i, CL_UCHAR_MAX); } -static void int2char_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_char*) out)[0] = CLAMP( CL_CHAR_MIN, i, CL_CHAR_MAX); } -static void int2ushort_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_ushort*) out)[0] = CLAMP( 0, i, CL_USHRT_MAX); } -static void int2short_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_short*) out)[0] = CLAMP( CL_SHRT_MIN, i, CL_SHRT_MAX); } -static void int2uint_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_uint*) out)[0] = CLAMP( 0, i, CL_INT_MAX); } -static void int2float_sat( void *out, void *in){ ((cl_float*) out)[0] = ((cl_int*) in)[0]; } -static void int2double_sat( void *out, void *in){ ((cl_double*) out)[0] = ((cl_int*) in)[0]; } -static void int2ulong_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_ulong*) out)[0] = i < 0 ? 0 : i; } -static void int2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_int*) in)[0]; } -static void float2uchar_sat( void *out, void *in){ ((cl_uchar*) out)[0] = CLAMP( 0, lrintf_clamped(((cl_float*) in)[0]), CL_UCHAR_MAX ); } -static void float2char_sat( void *out, void *in){ ((cl_char*) out)[0] = CLAMP( CL_CHAR_MIN, lrintf_clamped(((cl_float*) in)[0]), CL_CHAR_MAX); } -static void float2ushort_sat( void *out, void *in){ ((cl_ushort*) out)[0] = CLAMP( 0, lrintf_clamped(((cl_float*) in)[0]), CL_USHRT_MAX ); } -static void float2short_sat( void *out, void *in){ ((cl_short*) out)[0] = CLAMP( CL_SHRT_MIN, lrintf_clamped(((cl_float*) in)[0]), CL_SHRT_MAX ); } -static void float2uint_sat( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) CLAMP( 0, llrintf_clamped(((cl_float*) in)[0]), CL_UINT_MAX ); } -static void float2int_sat( void *out, void *in){ ((cl_int*) out)[0] = (cl_int) CLAMP( CL_INT_MIN, lrintf_clamped(((cl_float*) in)[0]), CL_INT_MAX ); } -static void float2double_sat( void *out, void *in){ ((cl_double*) out)[0] = ((cl_float*) in)[0]; } -static void float2ulong_sat( void *out, void *in) -{ -#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) - // VS2005 (at least) on x86 uses fistp to store the float as a 64-bit int. - // However, fistp stores it as a signed int, and some of the test values won't - // fit into a signed int. (These test values are >= 2^63.) The result on VS2005 - // is that these end up silently (at least by default settings) clamped to - // the max lowest ulong. - cl_float x = my_rintf(((cl_float *)in)[0]); - if (x >= 18446744073709551616.0f) { // 2^64 - ((cl_ulong*) out)[0] = 0xFFFFFFFFFFFFFFFFULL; - } else if (x < 0) { - ((cl_ulong*) out)[0] = 0; - } else if (x >= 9223372036854775808.0f) { // 2^63 - x -= 9223372036854775808.0f; - ((cl_ulong*) out)[0] = x; - ((cl_ulong*) out)[0] += 9223372036854775808ULL; - } else { - ((cl_ulong*) out)[0] = x; } -#else - float f = my_rintf(((float*) in)[0]); ((cl_ulong*) out)[0] = f >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64) ? 0xFFFFFFFFFFFFFFFFULL : f < 0 ? 0 : (cl_ulong) f; -#endif -} -// The final cast used to be (cl_ulong) f, but on Linux (RHEL5 at least) -// if f = -1.0f, then (cl_ulong) f = 0xffffffff, which clearly isn't right. -// Switching it to (cl_long) f seems to fix that. -static void float2long_sat( void *out, void *in){ float f = my_rintf(((float*) in)[0]); ((cl_long*) out)[0] = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63) ? 0x7FFFFFFFFFFFFFFFULL : f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63) ? 0x8000000000000000LL : (cl_long) f; } -static void double2uchar_sat( void *out, void *in){ ((cl_uchar*) out)[0] = CLAMP( 0, lrint_clamped(((cl_double*) in)[0]), CL_UCHAR_MAX ); } -static void double2char_sat( void *out, void *in){ ((cl_char*) out)[0] = CLAMP( CL_CHAR_MIN, lrint_clamped(((cl_double*) in)[0]), CL_CHAR_MAX); } -static void double2ushort_sat( void *out, void *in){ ((cl_ushort*) out)[0] = CLAMP( 0, lrint_clamped(((cl_double*) in)[0]), CL_USHRT_MAX ); } -static void double2short_sat( void *out, void *in){ ((cl_short*) out)[0] = CLAMP( CL_SHRT_MIN, lrint_clamped(((cl_double*) in)[0]), CL_SHRT_MAX ); } -static void double2uint_sat( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) CLAMP( 0, llrint_clamped(((cl_double*) in)[0]), CL_UINT_MAX ); } -static void double2int_sat( void *out, void *in){ ((cl_int*) out)[0] = (cl_int) CLAMP( CL_INT_MIN, lrint_clamped(((cl_double*) in)[0]), CL_INT_MAX ); } -static void double2float_sat( void *out, void *in){ ((cl_float*) out)[0] = (cl_float) ((double*) in)[0]; } -static void double2ulong_sat( void *out, void *in){ double f = rint(((double*) in)[0]); ((cl_ulong*) out)[0] = f >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64) ? 0xFFFFFFFFFFFFFFFFULL : f < 0 ? 0 : (cl_ulong) f; } -static void double2long_sat( void *out, void *in){ double f = rint(((double*) in)[0]); ((cl_long*) out)[0] = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63) ? 0x7FFFFFFFFFFFFFFFULL : f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63) ? 0x8000000000000000LL : (cl_long) f; } -static void ulong2uchar_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_uchar*) out)[0] = CLAMP( 0, u, CL_UCHAR_MAX ); } -static void ulong2char_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_char*) out)[0] = CLAMP( 0, u, CL_CHAR_MAX ); } -static void ulong2ushort_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_ushort*) out)[0] = CLAMP( 0, u, CL_USHRT_MAX ); } -static void ulong2short_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_short*) out)[0] = CLAMP( 0, u, CL_SHRT_MAX ); } -static void ulong2uint_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_uint*) out)[0] = (cl_uint) CLAMP( 0, u, CL_UINT_MAX ); } -static void ulong2int_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_int*) out)[0] = (cl_int) CLAMP( 0, u, CL_INT_MAX ); } -static void ulong2float_sat( void *out, void *in){ ((float*) out)[0] = my_fabsf((float) ((cl_ulong*) in)[0]); } // my_fabs workaround for -static void ulong2double_sat( void *out, void *in){ ((double*) out)[0] = my_fabs( ((cl_ulong*) in)[0]); } // my_fabs workaround for -static void ulong2long_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_long*) out)[0] = CLAMP( 0, u, CL_LONG_MAX ); } -static void long2uchar_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_uchar*) out)[0] = CLAMP( 0, u, CL_UCHAR_MAX ); } -static void long2char_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_char*) out)[0] = CLAMP( CL_CHAR_MIN, u, CL_CHAR_MAX ); } -static void long2ushort_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_ushort*) out)[0] = CLAMP( 0, u, CL_USHRT_MAX ); } -static void long2short_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_short*) out)[0] = CLAMP( CL_SHRT_MIN, u, CL_SHRT_MAX ); } -static void long2uint_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_uint*) out)[0] = (cl_uint) CLAMP( 0, u, CL_UINT_MAX ); } -static void long2int_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_int*) out)[0] = (int) CLAMP( CL_INT_MIN, u, CL_INT_MAX ); } -static void long2float_sat( void *out, void *in){ ((float*) out)[0] = (float) ((cl_long*) in)[0]; } -static void long2double_sat( void *out, void *in){ ((double*) out)[0] = ((cl_long*) in)[0]; } -static void long2ulong_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_ulong*) out)[0] = CLAMP( 0, u, CL_LONG_MAX ); } - -/* -#include -char *ground[] = { "", - "_rte", - "_rtp", - "_rtn", - "_rtz" - }; - -const char *gTypeNames[ ] = { - "uchar", "char", - "ushort", "short", - "uint", "int", - "float", "double", - "ulong", "long" - }; + // strictly speaking we should also be subtracting out timer latency here + return conversion * (double)diff; +} +#endif +//////////////////////////////////////////////////////////////////////////////// -int main( void ) +static void setAllowZ(uint8_t *allow, uint32_t *x, cl_uint count) { - int i, j; - - for( i = 0; i < sizeof( gTypeNames ) / sizeof( gTypeNames[0] ); i++ ) - for( j = 0; j < sizeof( ground ) / sizeof( ground[0] ); j++ ) - { - vlog( "float clampf_%s%s( float );\n", gTypeNames[i], ground[j] ); - vlog( "double clampd_%s%s( double );\n", gTypeNames[i], ground[j] ); - } - - return 0; - + cl_uint i; + for (i = 0; i < count; ++i) + allow[i] |= (uint8_t)((x[i] & 0x7f800000U) == 0); } -*/ - - -float clampf_uchar( float ); -double clampd_uchar( double ); -float clampf_uchar_rte( float ); -double clampd_uchar_rte( double ); -float clampf_uchar_rtp( float ); -double clampd_uchar_rtp( double ); -float clampf_uchar_rtn( float ); -double clampd_uchar_rtn( double ); -float clampf_uchar_rtz( float ); -double clampd_uchar_rtz( double ); -float clampf_char( float ); -double clampd_char( double ); -float clampf_char_rte( float ); -double clampd_char_rte( double ); -float clampf_char_rtp( float ); -double clampd_char_rtp( double ); -float clampf_char_rtn( float ); -double clampd_char_rtn( double ); -float clampf_char_rtz( float ); -double clampd_char_rtz( double ); -float clampf_ushort( float ); -double clampd_ushort( double ); -float clampf_ushort_rte( float ); -double clampd_ushort_rte( double ); -float clampf_ushort_rtp( float ); -double clampd_ushort_rtp( double ); -float clampf_ushort_rtn( float ); -double clampd_ushort_rtn( double ); -float clampf_ushort_rtz( float ); -double clampd_ushort_rtz( double ); -float clampf_short( float ); -double clampd_short( double ); -float clampf_short_rte( float ); -double clampd_short_rte( double ); -float clampf_short_rtp( float ); -double clampd_short_rtp( double ); -float clampf_short_rtn( float ); -double clampd_short_rtn( double ); -float clampf_short_rtz( float ); -double clampd_short_rtz( double ); -float clampf_uint( float ); -double clampd_uint( double ); -float clampf_uint_rte( float ); -double clampd_uint_rte( double ); -float clampf_uint_rtp( float ); -double clampd_uint_rtp( double ); -float clampf_uint_rtn( float ); -double clampd_uint_rtn( double ); -float clampf_uint_rtz( float ); -double clampd_uint_rtz( double ); -float clampf_int( float ); -double clampd_int( double ); -float clampf_int_rte( float ); -double clampd_int_rte( double ); -float clampf_int_rtp( float ); -double clampd_int_rtp( double ); -float clampf_int_rtn( float ); -double clampd_int_rtn( double ); -float clampf_int_rtz( float ); -double clampd_int_rtz( double ); -float clampf_float( float ); -double clampd_float( double ); -float clampf_float_rte( float ); -double clampd_float_rte( double ); -float clampf_float_rtp( float ); -double clampd_float_rtp( double ); -float clampf_float_rtn( float ); -double clampd_float_rtn( double ); -float clampf_float_rtz( float ); -double clampd_float_rtz( double ); -float clampf_double( float ); -double clampd_double( double ); -float clampf_double_rte( float ); -double clampd_double_rte( double ); -float clampf_double_rtp( float ); -double clampd_double_rtp( double ); -float clampf_double_rtn( float ); -double clampd_double_rtn( double ); -float clampf_double_rtz( float ); -double clampd_double_rtz( double ); -float clampf_ulong( float ); -double clampd_ulong( double ); -float clampf_ulong_rte( float ); -double clampd_ulong_rte( double ); -float clampf_ulong_rtp( float ); -double clampd_ulong_rtp( double ); -float clampf_ulong_rtn( float ); -double clampd_ulong_rtn( double ); -float clampf_ulong_rtz( float ); -double clampd_ulong_rtz( double ); -float clampf_long( float ); -double clampd_long( double ); -float clampf_long_rte( float ); -double clampd_long_rte( double ); -float clampf_long_rtp( float ); -double clampd_long_rtp( double ); -float clampf_long_rtn( float ); -double clampd_long_rtn( double ); -float clampf_long_rtz( float ); -double clampd_long_rtz( double ); - -/* -#include -char *ground[] = { "", - "_rte", - "_rtp", - "_rtn", - "_rtz" - }; -const char *gTypeNames[ ] = { - "uchar", "char", - "ushort", "short", - "uint", "int", - "float", "double", - "ulong", "long" - }; +void MapResultValuesComplete(const std::unique_ptr &ptr); +void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status, + void *data); -int main( void ) +// Note: May be called reentrantly +void MapResultValuesComplete(const std::unique_ptr &info) { - int i, j; - - for( i = 0; i < sizeof( gTypeNames ) / sizeof( gTypeNames[0] ); i++ ) + cl_int status; + // CalcRefValsBase *info = (CalcRefValsBase *)data; + cl_event calcReferenceValues = info->parent->calcReferenceValues; + + // we know that the map is done, wait for the main thread to finish + // calculating the reference values + if ((status = + clSetEventCallback(calcReferenceValues, CL_COMPLETE, + CalcReferenceValuesComplete, (void *)&info))) { - vlog( "{\t" ); - for( j = 0; j < sizeof( ground ) / sizeof( ground[0] ); j++ ) - vlog( "clampf_%s%s,\t", gTypeNames[i], ground[j] ); + vlog_error("ERROR: clSetEventCallback failed in " + "MapResultValuesComplete with status: %d\n", + status); + gFailCount++; // not thread safe -- being lazy here + } - vlog( "\t},\n" ); + // this thread no longer needs its reference to info->calcReferenceValues, + // so release it + if ((status = clReleaseEvent(calcReferenceValues))) + { + vlog_error("ERROR: clReleaseEvent(info->calcReferenceValues) failed " + "with status: %d\n", + status); + gFailCount++; // not thread safe -- being lazy here } - return 0; + // no need to flush since we didn't enqueue anything + // e was already released by WriteInputBufferComplete. It should be + // destroyed automatically soon after we exit. } -*/ -clampf gClampFloat[ kTypeCount ][kRoundingModeCount] = { - { clampf_uchar, clampf_uchar_rte, clampf_uchar_rtp, clampf_uchar_rtn, clampf_uchar_rtz, }, - { clampf_char, clampf_char_rte, clampf_char_rtp, clampf_char_rtn, clampf_char_rtz, }, - { clampf_ushort, clampf_ushort_rte, clampf_ushort_rtp, clampf_ushort_rtn, clampf_ushort_rtz, }, - { clampf_short, clampf_short_rte, clampf_short_rtp, clampf_short_rtn, clampf_short_rtz, }, - { clampf_uint, clampf_uint_rte, clampf_uint_rtp, clampf_uint_rtn, clampf_uint_rtz, }, - { clampf_int, clampf_int_rte, clampf_int_rtp, clampf_int_rtn, clampf_int_rtz, }, - { clampf_float, clampf_float_rte, clampf_float_rtp, clampf_float_rtn, clampf_float_rtz, }, - { clampf_double, clampf_double_rte, clampf_double_rtp, clampf_double_rtn, clampf_double_rtz, }, - { clampf_ulong, clampf_ulong_rte, clampf_ulong_rtp, clampf_ulong_rtn, clampf_ulong_rtz, }, - { clampf_long, clampf_long_rte, clampf_long_rtp, clampf_long_rtn, clampf_long_rtz, } -}; - -clampd gClampDouble[ kTypeCount ][kRoundingModeCount] = { - { clampd_uchar, clampd_uchar_rte, clampd_uchar_rtp, clampd_uchar_rtn, clampd_uchar_rtz, }, - { clampd_char, clampd_char_rte, clampd_char_rtp, clampd_char_rtn, clampd_char_rtz, }, - { clampd_ushort, clampd_ushort_rte, clampd_ushort_rtp, clampd_ushort_rtn, clampd_ushort_rtz, }, - { clampd_short, clampd_short_rte, clampd_short_rtp, clampd_short_rtn, clampd_short_rtz, }, - { clampd_uint, clampd_uint_rte, clampd_uint_rtp, clampd_uint_rtn, clampd_uint_rtz, }, - { clampd_int, clampd_int_rte, clampd_int_rtp, clampd_int_rtn, clampd_int_rtz, }, - { clampd_float, clampd_float_rte, clampd_float_rtp, clampd_float_rtn, clampd_float_rtz, }, - { clampd_double, clampd_double_rte, clampd_double_rtp, clampd_double_rtn, clampd_double_rtz, }, - { clampd_ulong, clampd_ulong_rte, clampd_ulong_rtp, clampd_ulong_rtn, clampd_ulong_rtz, }, - { clampd_long, clampd_long_rte, clampd_long_rtp, clampd_long_rtn, clampd_long_rtz, } -}; -#if defined (_WIN32) -#define __attribute__(X) -#endif -static inline float fclamp( float lo, float v, float hi ) __attribute__ ((always_inline)); -static inline double dclamp( double lo, double v, double hi ) __attribute__ ((always_inline)); - -static inline float fclamp( float lo, float v, float hi ){ v = v < lo ? lo : v; return v < hi ? v : hi; } -static inline double dclamp( double lo, double v, double hi ){ v = v < lo ? lo : v; return v < hi ? v : hi; } - -// Clamp unsaturated inputs into range so we don't get test errors: -float clampf_uchar( float f ) { return fclamp( -0.5f, f, 255.5f - 128.0f * FLT_EPSILON ); } -double clampd_uchar( double f ) { return dclamp( -0.5, f, 255.5 - 128.0 * DBL_EPSILON ); } -float clampf_uchar_rte( float f ) { return fclamp( -0.5f, f, 255.5f - 128.0f * FLT_EPSILON ); } -double clampd_uchar_rte( double f ) { return dclamp( -0.5, f, 255.5 - 128.0 * DBL_EPSILON ); } -float clampf_uchar_rtp( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, 255.0f ); } -double clampd_uchar_rtp( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, 255.0 ); } -float clampf_uchar_rtn( float f ) { return fclamp( -0.0f, f, 256.0f - 128.0f * FLT_EPSILON); } -double clampd_uchar_rtn( double f ) { return dclamp( -0.0, f, 256.0 - 128.0 * DBL_EPSILON); } -float clampf_uchar_rtz( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, 256.0f - 128.0f * FLT_EPSILON); } -double clampd_uchar_rtz( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, 256.0 - 128.0f * DBL_EPSILON); } - -float clampf_char( float f ) { return fclamp( -128.5f, f, 127.5f - 64.f * FLT_EPSILON ); } -double clampd_char( double f ) { return dclamp( -128.5, f, 127.5 - 64. * DBL_EPSILON ); } -float clampf_char_rte( float f ) { return fclamp( -128.5f, f, 127.5f - 64.f * FLT_EPSILON ); } -double clampd_char_rte( double f ) { return dclamp( -128.5, f, 127.5 - 64. * DBL_EPSILON ); } -float clampf_char_rtp( float f ) { return fclamp( -129.0f + 128.f*FLT_EPSILON, f, 127.f ); } -double clampd_char_rtp( double f ) { return dclamp( -129.0 + 128.*DBL_EPSILON, f, 127. ); } -float clampf_char_rtn( float f ) { return fclamp( -128.0f, f, 128.f - 64.0f*FLT_EPSILON ); } -double clampd_char_rtn( double f ) { return dclamp( -128.0, f, 128. - 64.0*DBL_EPSILON ); } -float clampf_char_rtz( float f ) { return fclamp( -129.0f + 128.f*FLT_EPSILON, f, 128.f - 64.0f*FLT_EPSILON ); } -double clampd_char_rtz( double f ) { return dclamp( -129.0 + 128.*DBL_EPSILON, f, 128. - 64.0*DBL_EPSILON ); } - -float clampf_ushort( float f ) { return fclamp( -0.5f, f, 65535.5f - 32768.0f * FLT_EPSILON ); } -double clampd_ushort( double f ) { return dclamp( -0.5, f, 65535.5 - 32768.0 * DBL_EPSILON ); } -float clampf_ushort_rte( float f ) { return fclamp( -0.5f, f, 65535.5f - 32768.0f * FLT_EPSILON ); } -double clampd_ushort_rte( double f ) { return dclamp( -0.5, f, 65535.5 - 32768.0 * DBL_EPSILON ); } -float clampf_ushort_rtp( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, 65535.0f ); } -double clampd_ushort_rtp( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, 65535.0 ); } -float clampf_ushort_rtn( float f ) { return fclamp( -0.0f, f, 65536.0f - 32768.0f * FLT_EPSILON); } -double clampd_ushort_rtn( double f ) { return dclamp( -0.0, f, 65536.0 - 32768.0 * DBL_EPSILON); } -float clampf_ushort_rtz( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, 65536.0f - 32768.0f * FLT_EPSILON); } -double clampd_ushort_rtz( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, 65536.0 - 32768.0f * DBL_EPSILON); } - -float clampf_short( float f ) { return fclamp( -32768.5f, f, 32767.5f - 16384.f * FLT_EPSILON ); } -double clampd_short( double f ) { return dclamp( -32768.5, f, 32767.5 - 16384. * DBL_EPSILON ); } -float clampf_short_rte( float f ) { return fclamp( -32768.5f, f, 32767.5f - 16384.f * FLT_EPSILON ); } -double clampd_short_rte( double f ) { return dclamp( -32768.5, f, 32767.5 - 16384. * DBL_EPSILON ); } -float clampf_short_rtp( float f ) { return fclamp( -32769.0f + 32768.f*FLT_EPSILON, f, 32767.f ); } -double clampd_short_rtp( double f ) { return dclamp( -32769.0 + 32768.*DBL_EPSILON, f, 32767. ); } -float clampf_short_rtn( float f ) { return fclamp( -32768.0f, f, 32768.f - 16384.0f*FLT_EPSILON ); } -double clampd_short_rtn( double f ) { return dclamp( -32768.0, f, 32768. - 16384.0*DBL_EPSILON ); } -float clampf_short_rtz( float f ) { return fclamp( -32769.0f + 32768.f*FLT_EPSILON, f, 32768.f - 16384.0f*FLT_EPSILON ); } -double clampd_short_rtz( double f ) { return dclamp( -32769.0 + 32768.*DBL_EPSILON, f, 32768. - 16384.0*DBL_EPSILON ); } - -float clampf_uint( float f ) { return fclamp( -0.5f, f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7) ); } -double clampd_uint( double f ) { return dclamp( -0.5, f, CL_UINT_MAX + 0.5 - MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31) * DBL_EPSILON ); } -float clampf_uint_rte( float f ) { return fclamp( -0.5f, f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7) ); } -double clampd_uint_rte( double f ) { return dclamp( -0.5, f, CL_UINT_MAX + 0.5 - MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31) * DBL_EPSILON ); } -float clampf_uint_rtp( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7) ); } -double clampd_uint_rtp( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, CL_UINT_MAX ); } -float clampf_uint_rtn( float f ) { return fclamp( -0.0f, f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)); } -double clampd_uint_rtn( double f ) { return dclamp( -0.0, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp31, 0x1fffffffffffffLL, -21) ); } -float clampf_uint_rtz( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)); } -double clampd_uint_rtz( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp31, 0x1fffffffffffffLL, -21)); } - -float clampf_int( float f ) { return fclamp( INT_MIN, f, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) ); } -double clampd_int( double f ) { return dclamp( INT_MIN - 0.5, f, CL_INT_MAX + 0.5 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * DBL_EPSILON ); } -float clampf_int_rte( float f ) { return fclamp( INT_MIN, f, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) ); } -double clampd_int_rte( double f ) { return dclamp( INT_MIN - 0.5, f, CL_INT_MAX + 0.5 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * DBL_EPSILON ); } -float clampf_int_rtp( float f ) { return fclamp( INT_MIN, f, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) ); } -double clampd_int_rtp( double f ) { return dclamp( INT_MIN - 1.0 + DBL_EPSILON * MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31), f, CL_INT_MAX ); } -float clampf_int_rtn( float f ) { return fclamp( INT_MIN, f, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) ); } -double clampd_int_rtn( double f ) { return dclamp( INT_MIN, f, CL_INT_MAX + 1.0 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * DBL_EPSILON ); } -float clampf_int_rtz( float f ) { return fclamp( INT_MIN, f, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) ); } -double clampd_int_rtz( double f ) { return dclamp( INT_MIN - 1.0 + DBL_EPSILON * MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31), f, CL_INT_MAX + 1.0 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * DBL_EPSILON ); } - -float clampf_float( float f ){ return f; } -double clampd_float( double f ){ return f; } -float clampf_float_rte( float f ){ return f; } -double clampd_float_rte( double f ){ return f; } -float clampf_float_rtp( float f ){ return f; } -double clampd_float_rtp( double f ){ return f; } -float clampf_float_rtn( float f ){ return f; } -double clampd_float_rtn( double f ){ return f; } -float clampf_float_rtz( float f ){ return f; } -double clampd_float_rtz( double f ){ return f; } - -float clampf_double( float f ){ return f; } -double clampd_double( double f ){ return f; } -float clampf_double_rte( float f ){ return f; } -double clampd_double_rte( double f ){ return f; } -float clampf_double_rtp( float f ){ return f; } -double clampd_double_rtp( double f ){ return f; } -float clampf_double_rtn( float f ){ return f; } -double clampd_double_rtn( double f ){ return f; } -float clampf_double_rtz( float f ){ return f; } -double clampd_double_rtz( double f ){ return f; } - -float clampf_ulong( float f ) { return fclamp( -0.5f, f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39) ); } -double clampd_ulong( double f ) { return dclamp( -0.5, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11) ); } -float clampf_ulong_rte( float f ) { return fclamp( -0.5f, f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39) ); } -double clampd_ulong_rte( double f ) { return dclamp( -0.5, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11) ); } -float clampf_ulong_rtp( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39) ); } -double clampd_ulong_rtp( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11) ); } -float clampf_ulong_rtn( float f ) { return fclamp( -0.0f, f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39) ); } -double clampd_ulong_rtn( double f ) { return dclamp( -0.0, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11) ); } -float clampf_ulong_rtz( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39) ); } -double clampd_ulong_rtz( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11) ); } - -float clampf_long( float f ) { return fclamp( MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), f, MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38) ); } -double clampd_long( double f ) { return dclamp( MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10) ); } -float clampf_long_rte( float f ) { return fclamp( MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), f, MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38) ); } -double clampd_long_rte( double f ) { return dclamp( MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10) ); } -float clampf_long_rtp( float f ) { return fclamp( MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), f, MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38) ); } -double clampd_long_rtp( double f ) { return dclamp( MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10) ); } -float clampf_long_rtn( float f ) { return fclamp( MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), f, MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38) ); } -double clampd_long_rtn( double f ) { return dclamp( MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10) ); } -float clampf_long_rtz( float f ) { return fclamp( MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), f, MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38) ); } -double clampd_long_rtz( double f ) { return dclamp( MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10) ); } - -#pragma mark - - -int alwaysPass( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); -int alwaysFail( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); -int check_uchar( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); -int check_char( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); -int check_ushort( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); -int check_short( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); -int check_uint( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); -int check_int( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); -int check_ulong( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); -int check_long( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); -int check_float( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); -int check_double( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); - -void init_uchar( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d ); -void init_char( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d ); -void init_ushort( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d ); -void init_short( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d ); -void init_uint( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d ); -void init_int( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d ); -void init_float( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d ); -void init_double( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d ); -void init_ulong( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d ); -void init_long( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d ); - -InitDataFunc gInitFunctions[ kTypeCount ] = { - init_uchar, init_char, - init_ushort, init_short, - init_uint, init_int, - init_float, init_double, - init_ulong, init_long - }; - - -CheckResults gCheckResults[ kTypeCount ] = { - check_uchar, check_char, check_ushort, check_short, check_uint, - check_int, check_float, check_double, check_ulong, check_long - }; -#if !defined (__APPLE__) -#define UNUSED -#else -#define UNUSED __attribute__((unused)) -#endif +void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status, + void *data) +{ + std::unique_ptr &info = + *(std::unique_ptr *)data; + + cl_uint vectorSize = info->vectorSize; + cl_uint count = info->parent->count; + Type outType = + info->parent->outType; // the data type of the conversion result + Type inType = info->parent->inType; // the data type of the conversion input + size_t j; + cl_int error; + cl_event doneBarrier = info->parent->doneBarrier; + + // report spurious error condition + if (CL_SUCCESS != status) + { + vlog_error("ERROR: CalcReferenceValuesComplete did not succeed! (%d)\n", + status); + gFailCount++; // lazy about thread safety here + return; + } -int alwaysPass( void UNUSED *out1, void UNUSED *out2, void UNUSED *allowZ, uint32_t UNUSED count, int UNUSED vectorSize){ return 0; } -int alwaysFail( void UNUSED *out1, void UNUSED *out2, void UNUSED *allowZ, uint32_t UNUSED count, int UNUSED vectorSize ){ return -1; } + // Now we know that both results have been mapped back from the device, and + // the main thread is done calculating the reference results. It is now time + // to check the results. -int check_uchar( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize ) -{ - const cl_uchar *t = (const cl_uchar*)test; - const cl_uchar *c = (const cl_uchar*)correct; - const cl_uchar *a = (const cl_uchar*)allowZ; - uint32_t i; + // verify results + void *mapped = info->p; - for( i = 0; i < count; i++ ) - if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_uchar)0)) + // Patch up NaNs conversions to integer to zero -- these can be converted to + // any integer + if (outType != kfloat && outType != kdouble) + { + if (inType == kfloat) { - vlog( "\nError for vector size %d found at 0x%8.8x: *0x%2.2x vs 0x%2.2x\n", vectorSize, i, c[i], t[i] ); - return i + 1; + float *inp = (float *)gIn; + for (j = 0; j < count; j++) + { + if (isnan(inp[j])) + memset((char *)mapped + j * gTypeSizes[outType], 0, + gTypeSizes[outType]); + } } - - return 0; -} - -int check_char( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize ) -{ - const cl_char *t = (const cl_char*)test; - const cl_char *c = (const cl_char*)correct; - const cl_uchar *a = (const cl_uchar*)allowZ; - uint32_t i; - - for( i = 0; i < count; i++ ) - if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_char)0)) + if (inType == kdouble) + { + double *inp = (double *)gIn; + for (j = 0; j < count; j++) + { + if (isnan(inp[j])) + memset((char *)mapped + j * gTypeSizes[outType], 0, + gTypeSizes[outType]); + } + } + } + else if (inType == kfloat || inType == kdouble) + { // outtype and intype is float or double. NaN conversions for float <-> + // double can be any NaN + if (inType == kfloat && outType == kdouble) { - vlog( "\nError for vector size %d found at 0x%8.8x: *0x%2.2x vs 0x%2.2x\n", vectorSize, i, c[i], t[i] ); - return i + 1; + float *inp = (float *)gIn; + double *outp = (double *)mapped; + for (j = 0; j < count; j++) + { + if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN; + } } + if (inType == kdouble && outType == kfloat) + { + double *inp = (double *)gIn; + float *outp = (float *)mapped; + for (j = 0; j < count; j++) + { + if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN; + } + } + } - return 0; -} + if (memcmp(mapped, gRef, count * gTypeSizes[outType])) + info->result = + info->check_result(mapped, count, vectorSizes[vectorSize]); + else + info->result = 0; -int check_ushort( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize ) -{ - const cl_ushort *t = (const cl_ushort*)test; - const cl_ushort *c = (const cl_ushort*)correct; - const cl_uchar *a = (const cl_uchar*)allowZ; - uint32_t i; + // Fill the output buffer with junk and release it + { + cl_uint pattern = 0xffffdead; + memset_pattern4(mapped, &pattern, count * gTypeSizes[outType]); + if ((error = clEnqueueUnmapMemObject(gQueue, gOutBuffers[vectorSize], + mapped, 0, NULL, NULL))) + { + vlog_error("ERROR: clEnqueueUnmapMemObject failed in " + "CalcReferenceValuesComplete (%d)\n", + error); + gFailCount++; + } + } - for( i = 0; i < count; i++ ) - if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_ushort)0)) + if (1 == ThreadPool_AtomicAdd(&info->parent->barrierCount, -1)) + { + if ((status = clSetUserEventStatus(doneBarrier, CL_COMPLETE))) { - vlog( "\nError for vector size %d found at 0x%8.8x: *0x%4.4x vs 0x%4.4x\n", vectorSize, i, c[i], t[i] ); - return i + 1; + vlog_error("ERROR: clSetUserEventStatus failed in " + "CalcReferenceValuesComplete (err: %d). We're probably " + "going to deadlock.\n", + status); + gFailCount++; + return; } - return 0; + if ((status = clReleaseEvent(doneBarrier))) + { + vlog_error("ERROR: clReleaseEvent failed in " + "CalcReferenceValuesComplete (err: %d).\n", + status); + gFailCount++; + return; + } + } + // e was already released by WriteInputBufferComplete. It should be + // destroyed automatically soon after all the calls to + // CalcReferenceValuesComplete exit. } -int check_short( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize ) -{ - const cl_short *t = (const cl_short*)test; - const cl_short *c = (const cl_short*)correct; - const cl_uchar *a = (const cl_uchar*)allowZ; - uint32_t i; +// - for( i = 0; i < count; i++ ) - if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_short)0)) - { - vlog( "\nError for vector size %d found at 0x%8.8x: *0x%4.4x vs 0x%4.4x\n", vectorSize, i, c[i], t[i] ); - return i + 1; - } +namespace conv_test { - return 0; -} +//////////////////////////////////////////////////////////////////////////////// -int check_uint( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize ) +cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p) { - const cl_uint *t = (const cl_uint*)test; - const cl_uint *c = (const cl_uint*)correct; - const cl_uchar *a = (const cl_uchar*)allowZ; - uint32_t i; + DataInitBase *info = (DataInitBase *)p; - for( i = 0; i < count; i++ ) - if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_uint)0)) - { - vlog( "\nError for vector size %d found at 0x%8.8x: *0x%8.8x vs 0x%8.8x\n", vectorSize, i, c[i], t[i] ); - return i + 1; - } + info->init(job_id, thread_id); - return 0; + return CL_SUCCESS; } -int check_int( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize ) +//////////////////////////////////////////////////////////////////////////////// + +cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p) { - const cl_int *t = (const cl_int*)test; - const cl_int *c = (const cl_int*)correct; - const cl_uchar *a = (const cl_uchar*)allowZ; - uint32_t i; + DataInitBase *info = (DataInitBase *)p; - for( i = 0; i < count; i++ ) - if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_int)0)) - { - vlog( "\nError for vector size %d found at 0x%8.8x: *0x%8.8x vs 0x%8.8x\n", vectorSize, i, c[i], t[i] ); - return i + 1; - } + cl_uint count = info->size; + Type inType = info->inType; + Type outType = info->outType; + RoundingMode round = info->round; + size_t j; - return 0; -} + Force64BitFPUPrecision(); -int check_ulong( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize ) -{ - const cl_ulong *t = (const cl_ulong*)test; - const cl_ulong *c = (const cl_ulong*)correct; - const cl_uchar *a = (const cl_uchar*)allowZ; - uint32_t i; + void *s = (cl_uchar *)gIn + job_id * count * gTypeSizes[info->inType]; + void *a = (cl_uchar *)gAllowZ + job_id * count; + void *d = (cl_uchar *)gRef + job_id * count * gTypeSizes[info->outType]; - for( i = 0; i < count; i++ ) - if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_ulong)0)) + + if (outType != inType) + { + // create the reference while we wait +#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) + /* ARM VFP doesn't have hardware instruction for converting from 64-bit + * integer to float types, hence GCC ARM uses the floating-point + * emulation code despite which -mfloat-abi setting it is. But the + * emulation code in libgcc.a has only one rounding mode (round to + * nearest even in this case) and ignores the user rounding mode setting + * in hardware. As a result setting rounding modes in hardware won't + * give correct rounding results for type covert from 64-bit integer to + * float using GCC for ARM compiler so for testing different rounding + * modes, we need to use alternative reference function. ARM64 does have + * an instruction, however we cannot guarantee the compiler will use it. + * On all ARM architechures use emulation to calculate reference.*/ + switch (round) { - vlog( "\nError for vector size %d found at 0x%8.8x: *0x%16.16llx vs 0x%16.16llx\n", vectorSize, i, c[i], t[i] ); - return i + 1; + /* conversions to floating-point type use the current rounding mode. + * The only default floating-point rounding mode supported is round + * to nearest even i.e the current rounding mode will be _rte for + * floating-point types. */ + case kDefaultRoundingMode: qcom_rm = qcomRTE; break; + case kRoundToNearestEven: qcom_rm = qcomRTE; break; + case kRoundUp: qcom_rm = qcomRTP; break; + case kRoundDown: qcom_rm = qcomRTN; break; + case kRoundTowardZero: qcom_rm = qcomRTZ; break; + default: + vlog_error("ERROR: undefined rounding mode %d\n", round); + break; } + qcom_sat = info->sat; +#endif - return 0; -} + RoundingMode oldRound = set_round(round, outType); -int check_long( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize ) -{ - const cl_long *t = (const cl_long*)test; - const cl_long *c = (const cl_long*)correct; - const cl_uchar *a = (const cl_uchar*)allowZ; - uint32_t i; + if (info->sat) + info->conv_array_sat(d, s, count); + else + info->conv_array(d, s, count); - for( i = 0; i < count; i++ ) - if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_long)0)) + set_round(oldRound, outType); + + // Decide if we allow a zero result in addition to the correctly rounded + // one + memset(a, 0, count); + if (gForceFTZ) { - vlog( "\nError for vector size %d found at 0x%8.8x: *0x%16.16llx vs 0x%16.16llx\n", vectorSize, i, c[i], t[i] ); - return i + 1; + if (inType == kfloat || outType == kfloat) + setAllowZ((uint8_t *)a, (uint32_t *)s, count); } + } + else + { + // Copy the input to the reference + memcpy(d, s, info->size * gTypeSizes[inType]); + } - return 0; -} - -int check_float( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize ) -{ - const cl_uint *t = (const cl_uint*)test; - const cl_uint *c = (const cl_uint*)correct; - const cl_uchar *a = (const cl_uchar*)allowZ; - uint32_t i; - - for( i = 0; i < count; i++ ) - if (t[i] != c[i] && - // Allow nan's to be binary different - !((t[i] & 0x7fffffffU) > 0x7f800000U && - (c[i] & 0x7fffffffU) > 0x7f800000U) && - !(a[i] != (cl_uchar)0 && - t[i] == (c[i] & 0x80000000U))) { - vlog( "\nError for vector size %d found at 0x%8.8x: *%a vs %a\n", - vectorSize, i, ((float*)correct)[i], ((float*)test)[i] ); - return i + 1; + // Patch up NaNs conversions to integer to zero -- these can be converted to + // any integer + if (info->outType != kfloat && info->outType != kdouble) + { + if (inType == kfloat) + { + float *inp = (float *)s; + for (j = 0; j < count; j++) + { + if (isnan(inp[j])) + memset((char *)d + j * gTypeSizes[outType], 0, + gTypeSizes[outType]); + } + } + if (inType == kdouble) + { + double *inp = (double *)s; + for (j = 0; j < count; j++) + { + if (isnan(inp[j])) + memset((char *)d + j * gTypeSizes[outType], 0, + gTypeSizes[outType]); + } + } + } + else if (inType == kfloat || inType == kdouble) + { // outtype and intype is float or double. NaN conversions for float <-> + // double can be any NaN + if (inType == kfloat && outType == kdouble) + { + float *inp = (float *)s; + for (j = 0; j < count; j++) + { + if (isnan(inp[j])) ((double *)d)[j] = NAN; + } } + if (inType == kdouble && outType == kfloat) + { + double *inp = (double *)s; + for (j = 0; j < count; j++) + { + if (isnan(inp[j])) ((float *)d)[j] = NAN; + } + } + } - return 0; + return CL_SUCCESS; } -int check_double( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize ) -{ - const cl_ulong *t = (const cl_ulong*)test; - const cl_ulong *c = (const cl_ulong*)correct; - const cl_uchar *a = (const cl_uchar*)allowZ; - uint32_t i; - - for( i = 0; i < count; i++ ) - if (t[i] != c[i] && - // Allow nan's to be binary different - !((t[i] & 0x7fffffffffffffffULL) > 0x7ff0000000000000ULL && - (c[i] & 0x7fffffffffffffffULL) > 0x7f80000000000000ULL) && - !(a[i] != (cl_uchar)0 && - t[i] == (c[i] & 0x8000000000000000ULL))) { - vlog( "\nError for vector size %d found at 0x%8.8x: *%a vs %a\n", - vectorSize, i, ((double*)correct)[i], ((double*)test)[i] ); - return i + 1; - } +//////////////////////////////////////////////////////////////////////////////// +uint64_t GetTime(void) +{ +#if defined(__APPLE__) + return mach_absolute_time(); +#elif defined(_MSC_VER) + return ReadTime(); +#else + // mach_absolute_time is a high precision timer with precision < 1 + // microsecond. +#warning need accurate clock here. Times are invalid. return 0; +#endif } +//////////////////////////////////////////////////////////////////////////////// -void init_uchar( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, Type UNUSED destType, uint64_t start, int count, MTdata UNUSED d ) +// Note: not called reentrantly +void WriteInputBufferComplete(void *data) { - cl_uchar *o = (cl_uchar *)out; - int i; + cl_int status; + WriteInputBufferInfo *info = (WriteInputBufferInfo *)data; + cl_uint count = info->count; + int vectorSize; - for( i = 0; i < count; i++ ) - o[i] = start++; -} + info->barrierCount = gMaxVectorSize - gMinVectorSize; -void init_char( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, Type UNUSED destType, uint64_t start, int count, MTdata UNUSED d ) -{ - char *o = (char *)out; - int i; + // now that we know that the write buffer is complete, enqueue callbacks to + // wait for the main thread to finish calculating the reference results. + for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++) + { + size_t workItemCount = + (count + vectorSizes[vectorSize] - 1) / (vectorSizes[vectorSize]); - for( i = 0; i < count; i++ ) - o[i] = start++; -} + if ((status = conv_test::RunKernel(info->calcInfo[vectorSize]->kernel, + gInBuffer, gOutBuffers[vectorSize], + workItemCount))) + { + gFailCount++; + return; + } -void init_ushort( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, Type UNUSED destType, uint64_t start, int count, MTdata UNUSED d ) -{ - cl_ushort *o = (cl_ushort *)out; - int i; + info->calcInfo[vectorSize]->p = clEnqueueMapBuffer( + gQueue, gOutBuffers[vectorSize], CL_TRUE, + CL_MAP_READ | CL_MAP_WRITE, 0, count * gTypeSizes[info->outType], 0, + NULL, NULL, &status); + { + if (status) + { + vlog_error("ERROR: WriteInputBufferComplete calback failed " + "with status: %d\n", + status); + gFailCount++; + return; + } + } + } - for( i = 0; i < count; i++ ) - o[i] = start++; -} + for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++) + { + MapResultValuesComplete(info->calcInfo[vectorSize]); + } -void init_short( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, UNUSED Type destType, uint64_t start, int count, MTdata UNUSED d ) -{ - short *o = (short *)out; - int i; + // Make sure the work starts moving -- otherwise we may deadlock + if ((status = clFlush(gQueue))) + { + vlog_error( + "ERROR: WriteInputBufferComplete calback failed with status: %d\n", + status); + gFailCount++; + return; + } - for( i = 0; i < count; i++ ) - o[i] = start++; + // e was already released by the main thread. It should be destroyed + // automatically soon after we exit. } -void init_uint( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, Type UNUSED destType, uint64_t start, int count, MTdata d ) +//////////////////////////////////////////////////////////////////////////////// + +cl_program MakeProgram(Type outType, Type inType, SaturationMode sat, + RoundingMode round, int vectorSize, cl_kernel *outKernel) { - static const unsigned int specialValuesUInt[] = { - INT_MIN, INT_MIN + 1, INT_MIN + 2, - -(1<<30)-3,-(1<<30)-2,-(1<<30)-1, -(1<<30), -(1<<30)+1, -(1<<30)+2, -(1<<30)+3, - -(1<<24)-3,-(1<<24)-2,-(1<<24)-1, -(1<<24), -(1<<24)+1, -(1<<24)+2, -(1<<24)+3, - -(1<<23)-3,-(1<<23)-2,-(1<<23)-1, -(1<<23), -(1<<23)+1, -(1<<23)+2, -(1<<23)+3, - -(1<<22)-3,-(1<<22)-2,-(1<<22)-1, -(1<<22), -(1<<22)+1, -(1<<22)+2, -(1<<22)+3, - -(1<<21)-3,-(1<<21)-2,-(1<<21)-1, -(1<<21), -(1<<21)+1, -(1<<21)+2, -(1<<21)+3, - -(1<<16)-3,-(1<<16)-2,-(1<<16)-1, -(1<<16), -(1<<16)+1, -(1<<16)+2, -(1<<16)+3, - -(1<<15)-3,-(1<<15)-2,-(1<<15)-1, -(1<<15), -(1<<15)+1, -(1<<15)+2, -(1<<15)+3, - -(1<<8)-3,-(1<<8)-2,-(1<<8)-1, -(1<<8), -(1<<8)+1, -(1<<8)+2, -(1<<8)+3, - -(1<<7)-3,-(1<<7)-2,-(1<<7)-1, -(1<<7), -(1<<7)+1, -(1<<7)+2, -(1<<7)+3, - -4, -3, -2, -1, 0, 1, 2, 3, 4, - (1<<7)-3,(1<<7)-2,(1<<7)-1, (1<<7), (1<<7)+1, (1<<7)+2, (1<<7)+3, - (1<<8)-3,(1<<8)-2,(1<<8)-1, (1<<8), (1<<8)+1, (1<<8)+2, (1<<8)+3, - (1<<15)-3,(1<<15)-2,(1<<15)-1, (1<<15), (1<<15)+1, (1<<15)+2, (1<<15)+3, - (1<<16)-3,(1<<16)-2,(1<<16)-1, (1<<16), (1<<16)+1, (1<<16)+2, (1<<16)+3, - (1<<21)-3,(1<<21)-2,(1<<21)-1, (1<<21), (1<<21)+1, (1<<21)+2, (1<<21)+3, - (1<<22)-3,(1<<22)-2,(1<<22)-1, (1<<22), (1<<22)+1, (1<<22)+2, (1<<22)+3, - (1<<23)-3,(1<<23)-2,(1<<23)-1, (1<<23), (1<<23)+1, (1<<23)+2, (1<<23)+3, - (1<<24)-3,(1<<24)-2,(1<<24)-1, (1<<24), (1<<24)+1, (1<<24)+2, (1<<24)+3, - (1<<30)-3,(1<<30)-2,(1<<30)-1, (1<<30), (1<<30)+1, (1<<30)+2, (1<<30)+3, - INT_MAX-3, INT_MAX-2, INT_MAX-1, INT_MAX, // 0x80000000, 0x80000001 0x80000002 already covered above - UINT_MAX-3, UINT_MAX-2, UINT_MAX-1, UINT_MAX - }; - - cl_uint *o = (cl_uint *)out; - int i; + cl_program program; + char testName[256]; + int error = 0; - for( i = 0; i < count; i++) { - if( gIsEmbedded ) - o[i] = (cl_uint) genrand_int32(d); - else - o[i] = (cl_uint)i + start; - } + std::ostringstream source; + if (outType == kdouble || inType == kdouble) + source << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; - if( 0 == start ) + // Create the program. This is a bit complicated because we are trying to + // avoid byte and short stores. + if (0 == vectorSize) { - size_t tableSize = sizeof( specialValuesUInt ); - if( sizeof( cl_uint) * count < tableSize ) - tableSize = sizeof( cl_uint) * count; - memcpy( (char*)(o + i) - tableSize, specialValuesUInt, tableSize ); + // Create the type names. + char inName[32]; + char outName[32]; + strncpy(inName, gTypeNames[inType], sizeof(inName)); + strncpy(outName, gTypeNames[outType], sizeof(outName)); + sprintf(testName, "test_implicit_%s_%s", outName, inName); + + source << "__kernel void " << testName << "( __global " << inName + << " *src, __global " << outName << " *dest )\n"; + source << "{\n"; + source << " size_t i = get_global_id(0);\n"; + source << " dest[i] = src[i];\n"; + source << "}\n"; + + vlog("Building implicit %s -> %s conversion test\n", gTypeNames[inType], + gTypeNames[outType]); + fflush(stdout); } -} + else + { + int vectorSizetmp = vectorSizes[vectorSize]; -void init_int( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, Type UNUSED destType, uint64_t start, int count, MTdata d ) -{ - static const unsigned int specialValuesInt[] = { - INT_MIN, INT_MIN + 1, INT_MIN + 2, - -(1<<30)-3,-(1<<30)-2,-(1<<30)-1, -(1<<30), -(1<<30)+1, -(1<<30)+2, -(1<<30)+3, - -(1<<24)-3,-(1<<24)-2,-(1<<24)-1, -(1<<24), -(1<<24)+1, -(1<<24)+2, -(1<<24)+3, - -(1<<23)-3,-(1<<23)-2,-(1<<23)-1, -(1<<23), -(1<<23)+1, -(1<<23)+2, -(1<<23)+3, - -(1<<22)-3,-(1<<22)-2,-(1<<22)-1, -(1<<22), -(1<<22)+1, -(1<<22)+2, -(1<<22)+3, - -(1<<21)-3,-(1<<21)-2,-(1<<21)-1, -(1<<21), -(1<<21)+1, -(1<<21)+2, -(1<<21)+3, - -(1<<16)-3,-(1<<16)-2,-(1<<16)-1, -(1<<16), -(1<<16)+1, -(1<<16)+2, -(1<<16)+3, - -(1<<15)-3,-(1<<15)-2,-(1<<15)-1, -(1<<15), -(1<<15)+1, -(1<<15)+2, -(1<<15)+3, - -(1<<8)-3,-(1<<8)-2,-(1<<8)-1, -(1<<8), -(1<<8)+1, -(1<<8)+2, -(1<<8)+3, - -(1<<7)-3,-(1<<7)-2,-(1<<7)-1, -(1<<7), -(1<<7)+1, -(1<<7)+2, -(1<<7)+3, - -4, -3, -2, -1, 0, 1, 2, 3, 4, - (1<<7)-3,(1<<7)-2,(1<<7)-1, (1<<7), (1<<7)+1, (1<<7)+2, (1<<7)+3, - (1<<8)-3,(1<<8)-2,(1<<8)-1, (1<<8), (1<<8)+1, (1<<8)+2, (1<<8)+3, - (1<<15)-3,(1<<15)-2,(1<<15)-1, (1<<15), (1<<15)+1, (1<<15)+2, (1<<15)+3, - (1<<16)-3,(1<<16)-2,(1<<16)-1, (1<<16), (1<<16)+1, (1<<16)+2, (1<<16)+3, - (1<<21)-3,(1<<21)-2,(1<<21)-1, (1<<21), (1<<21)+1, (1<<21)+2, (1<<21)+3, - (1<<22)-3,(1<<22)-2,(1<<22)-1, (1<<22), (1<<22)+1, (1<<22)+2, (1<<22)+3, - (1<<23)-3,(1<<23)-2,(1<<23)-1, (1<<23), (1<<23)+1, (1<<23)+2, (1<<23)+3, - (1<<24)-3,(1<<24)-2,(1<<24)-1, (1<<24), (1<<24)+1, (1<<24)+2, (1<<24)+3, - (1<<30)-3,(1<<30)-2,(1<<30)-1, (1<<30), (1<<30)+1, (1<<30)+2, (1<<30)+3, - INT_MAX-3, INT_MAX-2, INT_MAX-1, INT_MAX, // 0x80000000, 0x80000001 0x80000002 already covered above - UINT_MAX-3, UINT_MAX-2, UINT_MAX-1, UINT_MAX - }; - - int *o = (int *)out; - int i; + // Create the type names. + char convertString[128]; + char inName[32]; + char outName[32]; + switch (vectorSizetmp) + { + case 1: + strncpy(inName, gTypeNames[inType], sizeof(inName)); + strncpy(outName, gTypeNames[outType], sizeof(outName)); + snprintf(convertString, sizeof(convertString), "convert_%s%s%s", + outName, gSaturationNames[sat], + gRoundingModeNames[round]); + snprintf(testName, 256, "test_%s_%s", convertString, inName); + vlog("Building %s( %s ) test\n", convertString, inName); + break; + case 3: + strncpy(inName, gTypeNames[inType], sizeof(inName)); + strncpy(outName, gTypeNames[outType], sizeof(outName)); + snprintf(convertString, sizeof(convertString), + "convert_%s3%s%s", outName, gSaturationNames[sat], + gRoundingModeNames[round]); + snprintf(testName, 256, "test_%s_%s3", convertString, inName); + vlog("Building %s( %s3 ) test\n", convertString, inName); + break; + default: + snprintf(inName, sizeof(inName), "%s%d", gTypeNames[inType], + vectorSizetmp); + snprintf(outName, sizeof(outName), "%s%d", gTypeNames[outType], + vectorSizetmp); + snprintf(convertString, sizeof(convertString), "convert_%s%s%s", + outName, gSaturationNames[sat], + gRoundingModeNames[round]); + snprintf(testName, 256, "test_%s_%s", convertString, inName); + vlog("Building %s( %s ) test\n", convertString, inName); + break; + } + fflush(stdout); - for( i = 0; i < count; i++ ) { - if( gIsEmbedded ) { - o[i] = (int) genrand_int32(d); - } - else { - o[i] = (int) i + start; - } + if (vectorSizetmp == 3) + { + source << "__kernel void " << testName << "( __global " << inName + << " *src, __global " << outName << " *dest )\n"; + source << "{\n"; + source << " size_t i = get_global_id(0);\n"; + source << " if( i + 1 < get_global_size(0))\n"; + source << " vstore3( " << convertString + << "( vload3( i, src)), i, dest );\n"; + source << " else\n"; + source << " {\n"; + source << " " << inName << "3 in;\n"; + source << " " << outName << "3 out;\n"; + source << " if( 0 == (i & 1) )\n"; + source << " in.y = src[3*i+1];\n"; + source << " in.x = src[3*i];\n"; + source << " out = " << convertString << "( in ); \n"; + source << " dest[3*i] = out.x;\n"; + source << " if( 0 == (i & 1) )\n"; + source << " dest[3*i+1] = out.y;\n"; + source << " }\n"; + source << "}\n"; + } + else + { + source << "__kernel void " << testName << "( __global " << inName + << " *src, __global " << outName << " *dest )\n"; + source << "{\n"; + source << " size_t i = get_global_id(0);\n"; + source << " dest[i] = " << convertString << "( src[i] );\n"; + source << "}\n"; + } } + *outKernel = NULL; - if( 0 == start ) + const char *flags = NULL; + if (gForceFTZ) flags = "-cl-denorms-are-zero"; + + // build it + std::string sourceString = source.str(); + const char *programSource = sourceString.c_str(); + error = create_single_kernel_helper(gContext, &program, outKernel, 1, + &programSource, testName, flags); + if (error) { - size_t tableSize = sizeof( specialValuesInt ); - if( sizeof( int) * count < tableSize ) - tableSize = sizeof( int) * count; - memcpy( (char*)(o + i) - tableSize, specialValuesInt, tableSize ); + vlog_error("Failed to build kernel/program (err = %d).\n", error); + return NULL; } + + return program; } -void init_float( void *out, SaturationMode sat, RoundingMode round, Type destType, uint64_t start, int count, MTdata d ) +// + +int RunKernel(cl_kernel kernel, void *inBuf, void *outBuf, size_t blockCount) { - static const float specialValuesFloat[] = { - -NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39), MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38), - MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), -1000.f, -100.f, -4.0f, -3.5f, - -3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25), - MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26), MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27), - MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150), - MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f, - +NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38), - MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), +1000.f, +100.f, +4.0f, +3.5f, - +3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25), - MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27), - MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150), - MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f - }; - - cl_uint *o = (cl_uint *)out; - int i; + // The global dimensions are just the blockCount to execute since we haven't + // set up multiple queues for multiple devices. + int error; - for( i = 0; i < count; i++ ) { - if( gIsEmbedded ) - o[i] = (cl_uint) genrand_int32(d); - else - o[i] = (cl_uint) i + start; - } + error = clSetKernelArg(kernel, 0, sizeof(inBuf), &inBuf); + error |= clSetKernelArg(kernel, 1, sizeof(outBuf), &outBuf); - if( 0 == start ) + if (error) { - size_t tableSize = sizeof( specialValuesFloat ); - if( sizeof( float) * count < tableSize ) - tableSize = sizeof( float) * count; - memcpy( (char*)(o + i) - tableSize, specialValuesFloat, tableSize ); + vlog_error("FAILED -- could not set kernel args (%d)\n", error); + return error; } - if( kUnsaturated == sat ) + if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &blockCount, + NULL, 0, NULL, NULL))) { - clampf func = gClampFloat[ destType ][round]; - float *f = (float *)out; - - for( i = 0; i < count; i++ ) - f[i] = func( f[i] ); + vlog_error("FAILED -- could not execute kernel (%d)\n", error); + return error; } -} - -// used to convert a bucket of bits into a search pattern through double -static inline double DoubleFromUInt32( uint32_t bits ); -static inline double DoubleFromUInt32( uint32_t bits ) -{ - union{ uint64_t u; double d;} u; - - // split 0x89abcdef to 0x89abc00000000def - u.u = bits & 0xfffU; - u.u |= (uint64_t) (bits & ~0xfffU) << 32; - // sign extend the leading bit of def segment as sign bit so that the middle region consists of either all 1s or 0s - u.u -= (bits & 0x800U) << 1; - - // return result - return u.d; + return 0; } -// A table of more difficult cases to get right -static const double specialValuesDouble[] = { - -NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.80000000000001p64, -0x180000000000001LL, 8), - MAKE_HEX_DOUBLE(-0x1.8p64, -0x18LL, 60), MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp64, -0x17ffffffffffffLL, 12), MAKE_HEX_DOUBLE(-0x1.80000000000001p63, -0x180000000000001LL, 7), MAKE_HEX_DOUBLE(-0x1.8p63, -0x18LL, 59), MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp63, -0x17ffffffffffffLL, 11), - MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10), MAKE_HEX_DOUBLE(-0x1.80000000000001p32, -0x180000000000001LL, -24), MAKE_HEX_DOUBLE(-0x1.8p32, -0x18LL, 28), MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp32, -0x17ffffffffffffLL, -20), - MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.80000000000001p31, -0x180000000000001LL, -25), MAKE_HEX_DOUBLE(-0x1.8p31, -0x18LL, 27), MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp31, -0x17ffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), -1000., -100., -4.0, -3.5, - -3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53), - MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55), - MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074), - MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074), - MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), -0.0, - - MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(0x1.80000000000001p63, 0x180000000000001LL, 7), MAKE_HEX_DOUBLE(0x1.8p63, 0x18LL, 59), MAKE_HEX_DOUBLE(0x1.7ffffffffffffp63, 0x17ffffffffffffLL, 11), MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10), - MAKE_HEX_DOUBLE(+0x1.80000000000001p32, +0x180000000000001LL, -24), MAKE_HEX_DOUBLE(+0x1.8p32, +0x18LL, 28), MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp32, +0x17ffffffffffffLL, -20), - MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.80000000000001p31, +0x180000000000001LL, -25), MAKE_HEX_DOUBLE(+0x1.8p31, +0x18LL, 27), MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp31, +0x17ffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), +1000., +100., +4.0, +3.5, - +3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53), - MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55), - MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074), - MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074), - MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0, - - MAKE_HEX_DOUBLE(-0x1.ffffffffffffep62, -0x1ffffffffffffeLL, 10), MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp62, -0x1ffffffffffffcLL, 10), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10), MAKE_HEX_DOUBLE(+0x1.ffffffffffffep62, +0x1ffffffffffffeLL, 10), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp62, +0x1ffffffffffffcLL, 10), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10), - MAKE_HEX_DOUBLE(-0x1.ffffffffffffep51, -0x1ffffffffffffeLL, -1), MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp51, -0x1ffffffffffffcLL, -1), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp51, -0x1fffffffffffffLL, -1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffep51, +0x1ffffffffffffeLL, -1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp51, +0x1ffffffffffffcLL, -1), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp51, +0x1fffffffffffffLL, -1), - MAKE_HEX_DOUBLE(-0x1.ffffffffffffep52, -0x1ffffffffffffeLL, 0), MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp52, -0x1ffffffffffffcLL, 0), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp52, -0x1fffffffffffffLL, 0), MAKE_HEX_DOUBLE(+0x1.ffffffffffffep52, +0x1ffffffffffffeLL, 0), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp52, +0x1ffffffffffffcLL, 0), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp52, +0x1fffffffffffffLL, 0), - MAKE_HEX_DOUBLE(-0x1.ffffffffffffep53, -0x1ffffffffffffeLL, 1), MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp53, -0x1ffffffffffffcLL, 1), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp53, -0x1fffffffffffffLL, 1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffep53, +0x1ffffffffffffeLL, 1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp53, +0x1ffffffffffffcLL, 1), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp53, +0x1fffffffffffffLL, 1), - MAKE_HEX_DOUBLE(-0x1.0000000000002p52, -0x10000000000002LL, 0), MAKE_HEX_DOUBLE(-0x1.0000000000001p52, -0x10000000000001LL, 0), MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52), MAKE_HEX_DOUBLE(+0x1.0000000000002p52, +0x10000000000002LL, 0), MAKE_HEX_DOUBLE(+0x1.0000000000001p52, +0x10000000000001LL, 0), MAKE_HEX_DOUBLE(+0x1.0p52, +0x1LL, 52), - MAKE_HEX_DOUBLE(-0x1.0000000000002p53, -0x10000000000002LL, 1), MAKE_HEX_DOUBLE(-0x1.0000000000001p53, -0x10000000000001LL, 1), MAKE_HEX_DOUBLE(-0x1.0p53, -0x1LL, 53), MAKE_HEX_DOUBLE(+0x1.0000000000002p53, +0x10000000000002LL, 1), MAKE_HEX_DOUBLE(+0x1.0000000000001p53, +0x10000000000001LL, 1), MAKE_HEX_DOUBLE(+0x1.0p53, +0x1LL, 53), - MAKE_HEX_DOUBLE(-0x1.0000000000002p54, -0x10000000000002LL, 2), MAKE_HEX_DOUBLE(-0x1.0000000000001p54, -0x10000000000001LL, 2), MAKE_HEX_DOUBLE(-0x1.0p54, -0x1LL, 54), MAKE_HEX_DOUBLE(+0x1.0000000000002p54, +0x10000000000002LL, 2), MAKE_HEX_DOUBLE(+0x1.0000000000001p54, +0x10000000000001LL, 2), MAKE_HEX_DOUBLE(+0x1.0p54, +0x1LL, 54), - MAKE_HEX_DOUBLE(-0x1.fffffffefffffp62, -0x1fffffffefffffLL, 10), MAKE_HEX_DOUBLE(-0x1.ffffffffp62, -0x1ffffffffLL, 30), MAKE_HEX_DOUBLE(-0x1.ffffffff00001p62, -0x1ffffffff00001LL, 10), MAKE_HEX_DOUBLE(0x1.fffffffefffffp62, 0x1fffffffefffffLL, 10), MAKE_HEX_DOUBLE(0x1.ffffffffp62, 0x1ffffffffLL, 30), MAKE_HEX_DOUBLE(0x1.ffffffff00001p62, 0x1ffffffff00001LL, 10), -}; - -void init_double( void *out, SaturationMode sat, RoundingMode round, Type destType, uint64_t start, int count, MTdata UNUSED d ) +int GetTestCase(const char *name, Type *outType, Type *inType, + SaturationMode *sat, RoundingMode *round) { - double *o = (double*)out; int i; - for( i = 0; i < count; i++ ) - { - uint64_t z = i + start; - o[i] = DoubleFromUInt32( (uint32_t) z ^ (uint32_t) (z >> 32)); - } + // Find the return type + for (i = 0; i < kTypeCount; i++) + if (name == strstr(name, gTypeNames[i])) + { + *outType = (Type)i; + name += strlen(gTypeNames[i]); - if( 0 == start ) - { - size_t tableSize = sizeof( specialValuesDouble ); - if( sizeof( cl_double) * count < tableSize ) - tableSize = sizeof( cl_double) * count; - memcpy( (char*)(o + i) - tableSize, specialValuesDouble, tableSize ); - } + break; + } - if( 0 == sat ) - { - clampd func = gClampDouble[ destType ][round]; + if (i == kTypeCount) return -1; - for( i = 0; i < count; i++ ) - o[i] = func( o[i] ); - } -} + // Check to see if _sat appears next + *sat = (SaturationMode)0; + for (i = 1; i < kSaturationModeCount; i++) + if (name == strstr(name, gSaturationNames[i])) + { + *sat = (SaturationMode)i; + name += strlen(gSaturationNames[i]); + break; + } -cl_ulong random64( MTdata d ) -{ - return (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32); -} + *round = (RoundingMode)0; + for (i = 1; i < kRoundingModeCount; i++) + if (name == strstr(name, gRoundingModeNames[i])) + { + *round = (RoundingMode)i; + name += strlen(gRoundingModeNames[i]); + break; + } -void init_ulong( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, Type UNUSED destType, uint64_t start, int count, MTdata d ) -{ - cl_ulong *o = (cl_ulong *)out; - cl_ulong i, j, k; + if (*name != '_') return -2; + name++; - i = 0; - if( start == 0 ) - { - //Try various powers of two - for( j = 0; j < (cl_ulong) count && j < 8 * sizeof(cl_ulong); j++ ) - o[j] = (cl_ulong) 1 << j; - i = j; - - // try the complement of those - for( j = 0; i < (cl_ulong) count && j < 8 * sizeof(cl_ulong); j++ ) - o[i++] = ~((cl_ulong) 1 << j); - - //Try various negative powers of two - for( j = 0; i < (cl_ulong) count && j < 8 * sizeof(cl_ulong); j++ ) - o[i++] = (cl_ulong) 0xFFFFFFFFFFFFFFFEULL << j; - - //try various powers of two plus 1, shifted by various amounts - for( j = 0; i < (cl_ulong)count && j < 8 * sizeof(cl_ulong); j++ ) - for( k = 0; i < (cl_ulong)count && k < 8 * sizeof(cl_ulong) - j; k++ ) - o[i++] = (((cl_ulong) 1 << j) + 1) << k; - - //try various powers of two minus 1 - for( j = 0; i < (cl_ulong)count && j < 8 * sizeof(cl_ulong); j++ ) - for( k = 0; i < (cl_ulong)count && k < 8 * sizeof(cl_ulong) - j; k++ ) - o[i++] = (((cl_ulong) 1 << j) - 1) << k; - - // Other patterns - cl_ulong pattern[] = { 0x3333333333333333ULL, 0x5555555555555555ULL, 0x9999999999999999ULL, 0x6666666666666666ULL, 0xccccccccccccccccULL, 0xaaaaaaaaaaaaaaaaULL }; - cl_ulong mask[] = { 0xffffffffffffffffULL, 0xff00ff00ff00ff00ULL, 0xffff0000ffff0000ULL, 0xffffffff00000000ULL }; - for( j = 0; i < (cl_ulong) count && j < sizeof(pattern) / sizeof( pattern[0]); j++ ) - for( k = 0; i + 2 <= (cl_ulong) count && k < sizeof(mask) / sizeof( mask[0]); k++ ) - { - o[i++] = pattern[j] & mask[k]; - o[i++] = pattern[j] & ~mask[k]; - } - } + for (i = 0; i < kTypeCount; i++) + if (name == strstr(name, gTypeNames[i])) + { + *inType = (Type)i; + name += strlen(gTypeNames[i]); - for( ; i < (cl_ulong) count; i++ ) - o[i] = random64(d); -} + break; + } -void init_long( void *out, SaturationMode sat, RoundingMode round, Type destType, uint64_t start, int count, MTdata d ) -{ - init_ulong( out, sat, round, destType, start, count, d ); -} + if (i == kTypeCount) return -3; -// ====== - -void uchar2uchar_many( void *out, void *in, size_t n); -void uchar2uchar_sat_many( void *out, void *in, size_t n); -void char2uchar_many( void *out, void *in, size_t n); -void char2uchar_sat_many( void *out, void *in, size_t n); -void ushort2uchar_many( void *out, void *in, size_t n); -void ushort2uchar_sat_many( void *out, void *in, size_t n); -void short2uchar_many( void *out, void *in, size_t n); -void short2uchar_sat_many( void *out, void *in, size_t n); -void uint2uchar_many( void *out, void *in, size_t n); -void uint2uchar_sat_many( void *out, void *in, size_t n); -void int2uchar_many( void *out, void *in, size_t n); -void int2uchar_sat_many( void *out, void *in, size_t n); -void float2uchar_many( void *out, void *in, size_t n); -void float2uchar_sat_many( void *out, void *in, size_t n); -void double2uchar_many( void *out, void *in, size_t n); -void double2uchar_sat_many( void *out, void *in, size_t n); -void ulong2uchar_many( void *out, void *in, size_t n); -void ulong2uchar_sat_many( void *out, void *in, size_t n); -void long2uchar_many( void *out, void *in, size_t n); -void long2uchar_sat_many( void *out, void *in, size_t n); -void uchar2char_many( void *out, void *in, size_t n); -void uchar2char_sat_many( void *out, void *in, size_t n); -void char2char_many( void *out, void *in, size_t n); -void char2char_sat_many( void *out, void *in, size_t n); -void ushort2char_many( void *out, void *in, size_t n); -void ushort2char_sat_many( void *out, void *in, size_t n); -void short2char_many( void *out, void *in, size_t n); -void short2char_sat_many( void *out, void *in, size_t n); -void uint2char_many( void *out, void *in, size_t n); -void uint2char_sat_many( void *out, void *in, size_t n); -void int2char_many( void *out, void *in, size_t n); -void int2char_sat_many( void *out, void *in, size_t n); -void float2char_many( void *out, void *in, size_t n); -void float2char_sat_many( void *out, void *in, size_t n); -void double2char_many( void *out, void *in, size_t n); -void double2char_sat_many( void *out, void *in, size_t n); -void ulong2char_many( void *out, void *in, size_t n); -void ulong2char_sat_many( void *out, void *in, size_t n); -void long2char_many( void *out, void *in, size_t n); -void long2char_sat_many( void *out, void *in, size_t n); -void uchar2ushort_many( void *out, void *in, size_t n); -void uchar2ushort_sat_many( void *out, void *in, size_t n); -void char2ushort_many( void *out, void *in, size_t n); -void char2ushort_sat_many( void *out, void *in, size_t n); -void ushort2ushort_many( void *out, void *in, size_t n); -void ushort2ushort_sat_many( void *out, void *in, size_t n); -void short2ushort_many( void *out, void *in, size_t n); -void short2ushort_sat_many( void *out, void *in, size_t n); -void uint2ushort_many( void *out, void *in, size_t n); -void uint2ushort_sat_many( void *out, void *in, size_t n); -void int2ushort_many( void *out, void *in, size_t n); -void int2ushort_sat_many( void *out, void *in, size_t n); -void float2ushort_many( void *out, void *in, size_t n); -void float2ushort_sat_many( void *out, void *in, size_t n); -void double2ushort_many( void *out, void *in, size_t n); -void double2ushort_sat_many( void *out, void *in, size_t n); -void ulong2ushort_many( void *out, void *in, size_t n); -void ulong2ushort_sat_many( void *out, void *in, size_t n); -void long2ushort_many( void *out, void *in, size_t n); -void long2ushort_sat_many( void *out, void *in, size_t n); -void uchar2short_many( void *out, void *in, size_t n); -void uchar2short_sat_many( void *out, void *in, size_t n); -void char2short_many( void *out, void *in, size_t n); -void char2short_sat_many( void *out, void *in, size_t n); -void ushort2short_many( void *out, void *in, size_t n); -void ushort2short_sat_many( void *out, void *in, size_t n); -void short2short_many( void *out, void *in, size_t n); -void short2short_sat_many( void *out, void *in, size_t n); -void uint2short_many( void *out, void *in, size_t n); -void uint2short_sat_many( void *out, void *in, size_t n); -void int2short_many( void *out, void *in, size_t n); -void int2short_sat_many( void *out, void *in, size_t n); -void float2short_many( void *out, void *in, size_t n); -void float2short_sat_many( void *out, void *in, size_t n); -void double2short_many( void *out, void *in, size_t n); -void double2short_sat_many( void *out, void *in, size_t n); -void ulong2short_many( void *out, void *in, size_t n); -void ulong2short_sat_many( void *out, void *in, size_t n); -void long2short_many( void *out, void *in, size_t n); -void long2short_sat_many( void *out, void *in, size_t n); -void uchar2uint_many( void *out, void *in, size_t n); -void uchar2uint_sat_many( void *out, void *in, size_t n); -void char2uint_many( void *out, void *in, size_t n); -void char2uint_sat_many( void *out, void *in, size_t n); -void ushort2uint_many( void *out, void *in, size_t n); -void ushort2uint_sat_many( void *out, void *in, size_t n); -void short2uint_many( void *out, void *in, size_t n); -void short2uint_sat_many( void *out, void *in, size_t n); -void uint2uint_many( void *out, void *in, size_t n); -void uint2uint_sat_many( void *out, void *in, size_t n); -void int2uint_many( void *out, void *in, size_t n); -void int2uint_sat_many( void *out, void *in, size_t n); -void float2uint_many( void *out, void *in, size_t n); -void float2uint_sat_many( void *out, void *in, size_t n); -void double2uint_many( void *out, void *in, size_t n); -void double2uint_sat_many( void *out, void *in, size_t n); -void ulong2uint_many( void *out, void *in, size_t n); -void ulong2uint_sat_many( void *out, void *in, size_t n); -void long2uint_many( void *out, void *in, size_t n); -void long2uint_sat_many( void *out, void *in, size_t n); -void uchar2int_many( void *out, void *in, size_t n); -void uchar2int_sat_many( void *out, void *in, size_t n); -void char2int_many( void *out, void *in, size_t n); -void char2int_sat_many( void *out, void *in, size_t n); -void ushort2int_many( void *out, void *in, size_t n); -void ushort2int_sat_many( void *out, void *in, size_t n); -void short2int_many( void *out, void *in, size_t n); -void short2int_sat_many( void *out, void *in, size_t n); -void uint2int_many( void *out, void *in, size_t n); -void uint2int_sat_many( void *out, void *in, size_t n); -void int2int_many( void *out, void *in, size_t n); -void int2int_sat_many( void *out, void *in, size_t n); -void float2int_many( void *out, void *in, size_t n); -void float2int_sat_many( void *out, void *in, size_t n); -void double2int_many( void *out, void *in, size_t n); -void double2int_sat_many( void *out, void *in, size_t n); -void ulong2int_many( void *out, void *in, size_t n); -void ulong2int_sat_many( void *out, void *in, size_t n); -void long2int_many( void *out, void *in, size_t n); -void long2int_sat_many( void *out, void *in, size_t n); -void uchar2float_many( void *out, void *in, size_t n); -void uchar2float_sat_many( void *out, void *in, size_t n); -void char2float_many( void *out, void *in, size_t n); -void char2float_sat_many( void *out, void *in, size_t n); -void ushort2float_many( void *out, void *in, size_t n); -void ushort2float_sat_many( void *out, void *in, size_t n); -void short2float_many( void *out, void *in, size_t n); -void short2float_sat_many( void *out, void *in, size_t n); -void uint2float_many( void *out, void *in, size_t n); -void uint2float_sat_many( void *out, void *in, size_t n); -void int2float_many( void *out, void *in, size_t n); -void int2float_sat_many( void *out, void *in, size_t n); -void float2float_many( void *out, void *in, size_t n); -void float2float_sat_many( void *out, void *in, size_t n); -void double2float_many( void *out, void *in, size_t n); -void double2float_sat_many( void *out, void *in, size_t n); -void ulong2float_many( void *out, void *in, size_t n); -void ulong2float_sat_many( void *out, void *in, size_t n); -void long2float_many( void *out, void *in, size_t n); -void long2float_sat_many( void *out, void *in, size_t n); -void uchar2double_many( void *out, void *in, size_t n); -void uchar2double_sat_many( void *out, void *in, size_t n); -void char2double_many( void *out, void *in, size_t n); -void char2double_sat_many( void *out, void *in, size_t n); -void ushort2double_many( void *out, void *in, size_t n); -void ushort2double_sat_many( void *out, void *in, size_t n); -void short2double_many( void *out, void *in, size_t n); -void short2double_sat_many( void *out, void *in, size_t n); -void uint2double_many( void *out, void *in, size_t n); -void uint2double_sat_many( void *out, void *in, size_t n); -void int2double_many( void *out, void *in, size_t n); -void int2double_sat_many( void *out, void *in, size_t n); -void float2double_many( void *out, void *in, size_t n); -void float2double_sat_many( void *out, void *in, size_t n); -void double2double_many( void *out, void *in, size_t n); -void double2double_sat_many( void *out, void *in, size_t n); -void ulong2double_many( void *out, void *in, size_t n); -void ulong2double_sat_many( void *out, void *in, size_t n); -void long2double_many( void *out, void *in, size_t n); -void long2double_sat_many( void *out, void *in, size_t n); -void uchar2ulong_many( void *out, void *in, size_t n); -void uchar2ulong_sat_many( void *out, void *in, size_t n); -void char2ulong_many( void *out, void *in, size_t n); -void char2ulong_sat_many( void *out, void *in, size_t n); -void ushort2ulong_many( void *out, void *in, size_t n); -void ushort2ulong_sat_many( void *out, void *in, size_t n); -void short2ulong_many( void *out, void *in, size_t n); -void short2ulong_sat_many( void *out, void *in, size_t n); -void uint2ulong_many( void *out, void *in, size_t n); -void uint2ulong_sat_many( void *out, void *in, size_t n); -void int2ulong_many( void *out, void *in, size_t n); -void int2ulong_sat_many( void *out, void *in, size_t n); -void float2ulong_many( void *out, void *in, size_t n); -void float2ulong_sat_many( void *out, void *in, size_t n); -void double2ulong_many( void *out, void *in, size_t n); -void double2ulong_sat_many( void *out, void *in, size_t n); -void ulong2ulong_many( void *out, void *in, size_t n); -void ulong2ulong_sat_many( void *out, void *in, size_t n); -void long2ulong_many( void *out, void *in, size_t n); -void long2ulong_sat_many( void *out, void *in, size_t n); -void uchar2long_many( void *out, void *in, size_t n); -void uchar2long_sat_many( void *out, void *in, size_t n); -void char2long_many( void *out, void *in, size_t n); -void char2long_sat_many( void *out, void *in, size_t n); -void ushort2long_many( void *out, void *in, size_t n); -void ushort2long_sat_many( void *out, void *in, size_t n); -void short2long_many( void *out, void *in, size_t n); -void short2long_sat_many( void *out, void *in, size_t n); -void uint2long_many( void *out, void *in, size_t n); -void uint2long_sat_many( void *out, void *in, size_t n); -void int2long_many( void *out, void *in, size_t n); -void int2long_sat_many( void *out, void *in, size_t n); -void float2long_many( void *out, void *in, size_t n); -void float2long_sat_many( void *out, void *in, size_t n); -void double2long_many( void *out, void *in, size_t n); -void double2long_sat_many( void *out, void *in, size_t n); -void ulong2long_many( void *out, void *in, size_t n); -void ulong2long_sat_many( void *out, void *in, size_t n); -void long2long_many( void *out, void *in, size_t n); -void long2long_sat_many( void *out, void *in, size_t n); - -void uchar2uchar_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_uchar )); } -void uchar2uchar_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_uchar )); } -void char2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_char)); }} -void char2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_char)); }} -void ushort2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_ushort)); }} -void ushort2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_ushort)); }} -void short2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_short)); }} -void short2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_short)); }} -void uint2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_uint)); }} -void uint2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_uint)); }} -void int2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_int)); }} -void int2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_int)); }} -void float2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_float)); }} -void float2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_float)); }} -void double2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_double)); }} -void double2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_double)); }} -void ulong2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_ulong)); }} -void ulong2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_ulong)); }} -void long2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_long)); }} -void long2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_long)); }} -void uchar2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_uchar)); }} -void uchar2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_uchar)); }} -void char2char_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_char )); } -void char2char_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_char )); } -void ushort2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_ushort)); }} -void ushort2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_ushort)); }} -void short2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_short)); }} -void short2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_short)); }} -void uint2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_uint)); }} -void uint2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_uint)); }} -void int2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_int)); }} -void int2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_int)); }} -void float2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_float)); }} -void float2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_float)); }} -void double2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_double)); }} -void double2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_double)); }} -void ulong2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_ulong)); }} -void ulong2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_ulong)); }} -void long2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_long)); }} -void long2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_long)); }} -void uchar2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_uchar)); }} -void uchar2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_uchar)); }} -void char2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_char)); }} -void char2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_char)); }} -void ushort2ushort_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_ushort )); } -void ushort2ushort_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_ushort )); } -void short2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_short)); }} -void short2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_short)); }} -void uint2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_uint)); }} -void uint2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_uint)); }} -void int2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_int)); }} -void int2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_int)); }} -void float2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_float)); }} -void float2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_float)); }} -void double2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_double)); }} -void double2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_double)); }} -void ulong2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_ulong)); }} -void ulong2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_ulong)); }} -void long2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_long)); }} -void long2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_long)); }} -void uchar2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_uchar)); }} -void uchar2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_uchar)); }} -void char2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_char)); }} -void char2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_char)); }} -void ushort2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_ushort)); }} -void ushort2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_ushort)); }} -void short2short_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_short )); } -void short2short_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_short )); } -void uint2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_uint)); }} -void uint2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_uint)); }} -void int2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_int)); }} -void int2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_int)); }} -void float2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_float)); }} -void float2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_float)); }} -void double2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_double)); }} -void double2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_double)); }} -void ulong2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_ulong)); }} -void ulong2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_ulong)); }} -void long2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_long)); }} -void long2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_long)); }} -void uchar2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_uchar)); }} -void uchar2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_uchar)); }} -void char2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_char)); }} -void char2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_char)); }} -void ushort2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_ushort)); }} -void ushort2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_ushort)); }} -void short2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_short)); }} -void short2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_short)); }} -void uint2uint_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_uint )); } -void uint2uint_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_uint )); } -void int2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_int)); }} -void int2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_int)); }} -void float2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_float)); }} -void float2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_float)); }} -void double2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_double)); }} -void double2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_double)); }} -void ulong2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_ulong)); }} -void ulong2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_ulong)); }} -void long2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_long)); }} -void long2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_long)); }} -void uchar2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_uchar)); }} -void uchar2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_uchar)); }} -void char2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_char)); }} -void char2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_char)); }} -void ushort2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_ushort)); }} -void ushort2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_ushort)); }} -void short2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_short)); }} -void short2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_short)); }} -void uint2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_uint)); }} -void uint2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_uint)); }} -void int2int_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_int )); } -void int2int_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_int )); } -void float2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_float)); }} -void float2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_float)); }} -void double2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_double)); }} -void double2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_double)); }} -void ulong2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_ulong)); }} -void ulong2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_ulong)); }} -void long2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_long)); }} -void long2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_long)); }} -void uchar2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_uchar)); }} -void uchar2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_uchar)); }} -void char2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_char)); }} -void char2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_char)); }} -void ushort2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_ushort)); }} -void ushort2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_ushort)); }} -void short2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_short)); }} -void short2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_short)); }} -void uint2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_uint)); }} -void uint2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_uint)); }} -void int2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_int)); }} -void int2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_int)); }} -void float2float_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_float )); } -void float2float_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_float )); } -void double2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_double)); }} -void double2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_double)); }} -void ulong2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_ulong)); }} -void ulong2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_ulong)); }} -void long2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_long)); }} -void long2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_long)); }} -void uchar2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_uchar)); }} -void uchar2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_uchar)); }} -void char2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_char)); }} -void char2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_char)); }} -void ushort2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_ushort)); }} -void ushort2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_ushort)); }} -void short2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_short)); }} -void short2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_short)); }} -void uint2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_uint)); }} -void uint2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_uint)); }} -void int2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_int)); }} -void int2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_int)); }} -void float2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_float)); }} -void float2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_float)); }} -void double2double_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_double )); } -void double2double_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_double )); } -void ulong2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_ulong)); }} -void ulong2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_ulong)); }} -void long2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_long)); }} -void long2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_long)); }} -void uchar2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_uchar)); }} -void uchar2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_uchar)); }} -void char2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_char)); }} -void char2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_char)); }} -void ushort2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_ushort)); }} -void ushort2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_ushort)); }} -void short2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_short)); }} -void short2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_short)); }} -void uint2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_uint)); }} -void uint2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_uint)); }} -void int2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_int)); }} -void int2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_int)); }} -void float2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_float)); }} -void float2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_float)); }} -void double2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_double)); }} -void double2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_double)); }} -void ulong2ulong_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_ulong )); } -void ulong2ulong_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_ulong )); } -void long2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_long)); }} -void long2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_long)); }} -void uchar2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_uchar)); }} -void uchar2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_uchar)); }} -void char2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_char)); }} -void char2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_char)); }} -void ushort2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_ushort)); }} -void ushort2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_ushort)); }} -void short2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_short)); }} -void short2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_short)); }} -void uint2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_uint)); }} -void uint2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_uint)); }} -void int2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_int)); }} -void int2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_int)); }} -void float2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_float)); }} -void float2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_float)); }} -void double2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_double)); }} -void double2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_double)); }} -void ulong2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_ulong)); }} -void ulong2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_ulong)); }} -void long2long_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_long )); } -void long2long_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_long )); } - -Convert gSaturatedConversions[kTypeCount][kTypeCount] = { - { uchar2uchar_sat_many, char2uchar_sat_many, ushort2uchar_sat_many, short2uchar_sat_many, uint2uchar_sat_many, int2uchar_sat_many, float2uchar_sat_many, double2uchar_sat_many, ulong2uchar_sat_many, long2uchar_sat_many, }, - { uchar2char_sat_many, char2char_sat_many, ushort2char_sat_many, short2char_sat_many, uint2char_sat_many, int2char_sat_many, float2char_sat_many, double2char_sat_many, ulong2char_sat_many, long2char_sat_many, }, - { uchar2ushort_sat_many, char2ushort_sat_many, ushort2ushort_sat_many, short2ushort_sat_many, uint2ushort_sat_many, int2ushort_sat_many, float2ushort_sat_many, double2ushort_sat_many, ulong2ushort_sat_many, long2ushort_sat_many, }, - { uchar2short_sat_many, char2short_sat_many, ushort2short_sat_many, short2short_sat_many, uint2short_sat_many, int2short_sat_many, float2short_sat_many, double2short_sat_many, ulong2short_sat_many, long2short_sat_many, }, - { uchar2uint_sat_many, char2uint_sat_many, ushort2uint_sat_many, short2uint_sat_many, uint2uint_sat_many, int2uint_sat_many, float2uint_sat_many, double2uint_sat_many, ulong2uint_sat_many, long2uint_sat_many, }, - { uchar2int_sat_many, char2int_sat_many, ushort2int_sat_many, short2int_sat_many, uint2int_sat_many, int2int_sat_many, float2int_sat_many, double2int_sat_many, ulong2int_sat_many,long2int_sat_many, }, - { uchar2float_sat_many, char2float_sat_many, ushort2float_sat_many, short2float_sat_many, uint2float_sat_many, int2float_sat_many, float2float_sat_many, double2float_sat_many, ulong2float_sat_many, long2float_sat_many, }, - { uchar2double_sat_many, char2double_sat_many, ushort2double_sat_many, short2double_sat_many, uint2double_sat_many, int2double_sat_many, float2double_sat_many, double2double_sat_many, ulong2double_sat_many, long2double_sat_many, }, - { uchar2ulong_sat_many, char2ulong_sat_many, ushort2ulong_sat_many, short2ulong_sat_many, uint2ulong_sat_many, int2ulong_sat_many, float2ulong_sat_many, double2ulong_sat_many, ulong2ulong_sat_many, long2ulong_sat_many, }, - { uchar2long_sat_many, char2long_sat_many, ushort2long_sat_many, short2long_sat_many, uint2long_sat_many, int2long_sat_many, float2long_sat_many, double2long_sat_many, ulong2long_sat_many, long2long_sat_many, }, -}; + if (*name != '\0') return -4; -Convert gConversions[kTypeCount][kTypeCount] = { - { uchar2uchar_many, char2uchar_many, ushort2uchar_many, short2uchar_many, uint2uchar_many, int2uchar_many, float2uchar_many, double2uchar_many, ulong2uchar_many, long2uchar_many, }, - { uchar2char_many, char2char_many, ushort2char_many, short2char_many, uint2char_many, int2char_many, float2char_many, double2char_many, ulong2char_many, long2char_many, }, - { uchar2ushort_many, char2ushort_many, ushort2ushort_many, short2ushort_many, uint2ushort_many, int2ushort_many, float2ushort_many, double2ushort_many, ulong2ushort_many, long2ushort_many, }, - { uchar2short_many, char2short_many, ushort2short_many, short2short_many, uint2short_many, int2short_many, float2short_many, double2short_many, ulong2short_many, long2short_many, }, - { uchar2uint_many, char2uint_many, ushort2uint_many, short2uint_many, uint2uint_many, int2uint_many, float2uint_many, double2uint_many, ulong2uint_many, long2uint_many, }, - { uchar2int_many, char2int_many, ushort2int_many, short2int_many, uint2int_many, int2int_many, float2int_many, double2int_many, ulong2int_many, long2int_many, }, - { uchar2float_many, char2float_many, ushort2float_many, short2float_many, uint2float_many, int2float_many, float2float_many, double2float_many, ulong2float_many, long2float_many, }, - { uchar2double_many, char2double_many, ushort2double_many, short2double_many, uint2double_many, int2double_many, float2double_many, double2double_many, ulong2double_many, long2double_many, }, - { uchar2ulong_many, char2ulong_many, ushort2ulong_many, short2ulong_many, uint2ulong_many, int2ulong_many, float2ulong_many, double2ulong_many, ulong2ulong_many, long2ulong_many, }, - { uchar2long_many, char2long_many, ushort2long_many, short2long_many, uint2long_many, int2long_many, float2long_many, double2long_many, ulong2long_many, long2long_many, }, -}; + return 0; +} + +} // namespace conv_test diff --git a/test_conformance/conversions/basic_test_conversions.h b/test_conformance/conversions/basic_test_conversions.h index ab887afdd..c1d284ec2 100644 --- a/test_conformance/conversions/basic_test_conversions.h +++ b/test_conformance/conversions/basic_test_conversions.h @@ -16,8 +16,6 @@ #ifndef BASIC_TEST_CONVERSIONS_H #define BASIC_TEST_CONVERSIONS_H -#include "harness/compat.h" - #if !defined(_WIN32) #include #endif @@ -32,23 +30,32 @@ #include #endif + #include "harness/mt19937.h" +#include "harness/testHarness.h" +#include "harness/typeWrappers.h" -typedef void (*Convert)( void *dest, void *src, size_t ); +#include +#include +#include -#define kVectorSizeCount 6 -#define kMaxVectorSize 16 +#include "conversions_data_info.h" -typedef enum -{ - kUnsaturated = 0, - kSaturated, +// typedef void (*Convert)( void *dest, void *src, size_t ); - kSaturationModeCount -}SaturationMode; +#define kVectorSizeCount 6 +#define kMaxVectorSize 16 +#define kPageSize 4096 -extern Convert gConversions[kTypeCount][kTypeCount]; // [dest format][source format] -extern Convert gSaturatedConversions[kTypeCount][kTypeCount]; // [dest format][source format] +#define BUFFER_SIZE (1024 * 1024) +#define EMBEDDED_REDUCTION_FACTOR 16 +#define PERF_LOOP_COUNT 100 + + +// extern Convert gConversions[kTypeCount][kTypeCount]; // [dest +// format][source format] extern Convert +// gSaturatedConversions[kTypeCount][kTypeCount]; // [dest format][source +// format] extern const char *gTypeNames[ kTypeCount ]; extern const char *gRoundingModeNames[ kRoundingModeCount ]; // { "", "_rte", "_rtp", "_rtn", "_rtz" } extern const char *gSaturationNames[ kSaturationModeCount ]; // { "", "_sat" } @@ -68,5 +75,354 @@ extern InitDataFunc gInitFunctions[ kTypeCount ]; typedef int (*CheckResults)( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); extern CheckResults gCheckResults[ kTypeCount ]; +#define kCallStyleCount (kVectorSizeCount + 1 /* for implicit scalar */) + +extern MTdata gMTdata; +extern cl_command_queue gQueue; +extern cl_context gContext; +extern cl_mem gInBuffer; +extern cl_mem gOutBuffers[]; +extern int gHasDouble; +extern int gTestDouble; +extern int gWimpyMode; +extern int gWimpyReductionFactor; +extern int gSkipTesting; +extern int gMinVectorSize; +extern int gMaxVectorSize; +extern int gForceFTZ; +extern int gTimeResults; +extern int gReportAverageTimes; +extern int gStartTestNumber; +extern int gEndTestNumber; +extern int gIsRTZ; +extern void *gIn; +extern void *gRef; +extern void *gAllowZ; +extern void *gOut[]; + +extern const char **argList; +extern int argCount; + +extern const char *sizeNames[]; +extern int vectorSizes[]; + +extern size_t gComputeDevices; +extern uint32_t gDeviceFrequency; + + +namespace conv_test { + +cl_program MakeProgram(Type outType, Type inType, SaturationMode sat, + RoundingMode round, int vectorSize, + cl_kernel *outKernel); + +int RunKernel(cl_kernel kernel, void *inBuf, void *outBuf, size_t blockCount); + +int GetTestCase(const char *name, Type *outType, Type *inType, + SaturationMode *sat, RoundingMode *round); + +cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p); +cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p); +uint64_t GetTime(void); + +void WriteInputBufferComplete(void *); +void *FlushToZero(void); +void UnFlushToZero(void *); +} + + +struct CalcRefValsBase +{ + virtual int check_result(void *, uint32_t, int) { return 0; } + + // pointer back to the parent WriteInputBufferInfo struct + struct WriteInputBufferInfo *parent; + clKernelWrapper kernel; // the kernel for this vector size + clProgramWrapper program; // the program for this vector size + cl_uint vectorSize; // the vector size for this callback chain + void *p; // the pointer to mapped result data for this vector size + cl_int result; +}; + + +template +struct CalcRefValsPat : CalcRefValsBase +{ + int check_result(void *, uint32_t, int) override; +}; + + +struct WriteInputBufferInfo +{ + WriteInputBufferInfo() + : calcReferenceValues(nullptr), doneBarrier(nullptr), count(0), + outType(kuchar), inType(kuchar), barrierCount(0) + {} + + volatile cl_event + calcReferenceValues; // user event which signals when main thread is + // done calculating reference values + volatile cl_event + doneBarrier; // user event which signals when worker threads are done + cl_uint count; // the number of elements in the array + Type outType; // the data type of the conversion result + Type inType; // the data type of the conversion input + volatile int barrierCount; + + std::vector> calcInfo; +}; + + +// Must be aligned with Type enums! +using TypeIter = std::tuple; + +constexpr bool isTypeFp[] = { 0, 0, 0, 0, 0, 0, 1, 1, 0, 0 }; + + +// Helper test fixture for constructing OpenCL objects used in testing +// a variety of simple command-buffer enqueue scenarios. +struct ConversionsTest +{ + ConversionsTest(cl_device_id device, cl_context context, + cl_command_queue queue); + + virtual cl_int SetUp(int elements); + + // Test body returning an OpenCL error code + virtual cl_int Run(); + + template + int DoTest(Type outType, Type inType, SaturationMode sat, + RoundingMode round); + + template + void TestTypesConversion(const Type &inType, const Type &outType, int &tn); + +protected: + cl_context context; + cl_device_id device; + cl_command_queue queue; + + size_t num_elements; + + TypeIter typeIterator; +}; + + +struct CustomConversionsTest : ConversionsTest +{ + CustomConversionsTest(cl_device_id device, cl_context context, + cl_command_queue queue) + : ConversionsTest(device, context, queue) + {} + + cl_int Run() override; +}; + + +template +int MakeAndRunTest(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + auto test_fixture = T(device, context, queue); + + cl_int error = test_fixture.SetUp(num_elements); + test_error_ret(error, "Error in test initialization", TEST_FAIL); + + return test_fixture.Run(); +} + + +struct TestType +{ + template bool testType(Type in) + { + switch (in) + { + default: return false; + case kuchar: return std::is_same::value; + case kchar: return std::is_same::value; + case kushort: return std::is_same::value; + case kshort: return std::is_same::value; + case kuint: return std::is_same::value; + case kint: return std::is_same::value; + case kfloat: return std::is_same::value; + case kdouble: return std::is_same::value; + case kulong: return std::is_same::value; + case klong: return std::is_same::value; + } + } +}; + + +// Helper structures to iterate over all tuple attributes of different types +struct IterOverTypes : public TestType +{ + IterOverTypes(const TypeIter &typeIter, ConversionsTest &test) + : inType((Type)0), outType((Type)0), typeIter(typeIter), test(test), + testNumber(-1) + {} + + void Run() { for_each_out_elem(typeIter); } + +protected: + //////////////////////////////////////////////////////////////////////////////////////// + + template + void iterate_out_type(const OutType &t) + { + for_each_in_elem<0, Out, OutType>(typeIter); + outType = (Type)(outType + 1); + inType = (Type)0; + } + + //////////////////////////////////////////////////////////////////////////////////////// + + template + void iterate_in_type(const InType &t) + { + if (!testType(inType)) vlog_error("Unexpected data type!\n"); + + if (!testType(outType)) vlog_error("Unexpected data type!\n"); + + // run the conversions + test.TestTypesConversion( + inType, outType, testNumber); + inType = (Type)(inType + 1); + } + + //////////////////////////////////////////////////////////////////////////////////////// + + template + inline typename std::enable_if::type + for_each_out_elem( + const std::tuple &) // Unused arguments are given no names. + {} + + //////////////////////////////////////////////////////////////////////////////////////// + + template + inline typename std::enable_if < Out::type + for_each_out_elem(const std::tuple &t) + { + iterate_out_type(std::get(t)); + for_each_out_elem(t); + } + + //////////////////////////////////////////////////////////////////////////////////////// + + template + inline typename std::enable_if::type + for_each_in_elem( + const std::tuple &) // Unused arguments are given no names. + {} + + //////////////////////////////////////////////////////////////////////////////////////// + + template + inline typename std::enable_if < In::type + for_each_in_elem(const std::tuple &t) + { + iterate_in_type(std::get(t)); + for_each_in_elem(t); + } + + //////////////////////////////////////////////////////////////////////////////////////// + +protected: + Type inType; + Type outType; + const TypeIter &typeIter; + ConversionsTest &test; + int testNumber; +}; + + +// Helper structures to select type 2 type conversion test case +struct IterOverSelectedTypes : public TestType +{ + IterOverSelectedTypes(const TypeIter &typeIter, ConversionsTest &test, + const Type &in, const Type &out) + : inType(in), outType(out), typeIter(typeIter), test(test), + testNumber(-1) + {} + + void Run() { for_each_out_elem(typeIter); } + +protected: + //////////////////////////////////////////////////////////////////////////////////////// + + template + void iterate_out_type(const OutType &t) + { + for_each_in_elem<0, Out, OutType>(typeIter); + } + + //////////////////////////////////////////////////////////////////////////////////////// + + template + void iterate_in_type(const InType &t) + { + if (testType(inType) && testType(outType)) + { + // run the conversions + test.TestTypesConversion(inType, outType, + testNumber); + } + } + + //////////////////////////////////////////////////////////////////////////////////////// + + template + inline typename std::enable_if::type + for_each_out_elem(const std::tuple &) + {} + + //////////////////////////////////////////////////////////////////////////////////////// + + template + inline typename std::enable_if < Out::type + for_each_out_elem(const std::tuple &t) + { + iterate_out_type(std::get(t)); + for_each_out_elem(t); + } + + //////////////////////////////////////////////////////////////////////////////////////// + + template + inline typename std::enable_if::type + for_each_in_elem(const std::tuple &) + {} + + //////////////////////////////////////////////////////////////////////////////////////// + + template + inline typename std::enable_if < In::type + for_each_in_elem(const std::tuple &t) + { + iterate_in_type(std::get(t)); + for_each_in_elem(t); + } + + //////////////////////////////////////////////////////////////////////////////////////// + +protected: + Type inType; + Type outType; + const TypeIter &typeIter; + ConversionsTest &test; + int testNumber; +}; + + #endif /* BASIC_TEST_CONVERSIONS_H */ diff --git a/test_conformance/conversions/conversions_data_info.h b/test_conformance/conversions/conversions_data_info.h new file mode 100644 index 000000000..a4e9c9689 --- /dev/null +++ b/test_conformance/conversions/conversions_data_info.h @@ -0,0 +1,812 @@ +// +// Copyright (c) 2023 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#ifndef CONVERSIONS_DATA_INFO_H +#define CONVERSIONS_DATA_INFO_H + +#if defined(__APPLE__) +#include +#else +#include +#endif + +#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) +#include "fplib.h" +extern bool qcom_sat; +extern roundingMode qcom_rm; +#endif + +#include "harness/mt19937.h" +#include "harness/rounding_mode.h" +#include "harness/typeWrappers.h" + +#include + +#if defined(__linux__) +#include +#include +#endif + +extern size_t gTypeSizes[kTypeCount]; +extern void *gIn; + + +typedef enum +{ + kUnsaturated = 0, + kSaturated, + + kSaturationModeCount +} SaturationMode; + + +struct DataInitInfo +{ + cl_ulong start; + cl_uint size; + Type outType; + Type inType; + SaturationMode sat; + RoundingMode round; + cl_uint threads; + + + static std::vector specialValuesUInt; + static std::vector specialValuesFloat; + static std::vector specialValuesDouble; +}; + + +struct DataInitBase : public DataInitInfo +{ + DataInitBase(const DataInitInfo &agg): DataInitInfo(agg) {} + virtual void conv_array(void *out, void *in, size_t n) {} + virtual void conv_array_sat(void *out, void *in, size_t n) {} + virtual void init(const cl_uint &, const cl_uint &) {} +}; + + +template +struct DataInfoSpec : public DataInitBase +{ + + DataInfoSpec(const DataInitInfo &agg); + + // helpers + float round_to_int(float f); + long long round_to_int_and_clamp(double d); + + OutType absolute(const OutType &x); + + // actual conversion of reference values + void conv(OutType *out, InType *in); + void conv_sat(OutType *out, InType *in); + + // min/max ranges for output type of data + std::pair ranges; + + // matrix of clamping ranges for each rounding type + std::vector> clamp_ranges; + + std::vector mdv; + + //////////////////////////////////////////////////////////////////////////// + void conv_array(void *out, void *in, size_t n) override + { + for (size_t i = 0; i < n; i++) + conv(&((OutType *)out)[i], &((InType *)in)[i]); + } + + //////////////////////////////////////////////////////////////////////////// + void conv_array_sat(void *out, void *in, size_t n) override + { + for (size_t i = 0; i < n; i++) + conv_sat(&((OutType *)out)[i], &((InType *)in)[i]); + } + + //////////////////////////////////////////////////////////////////////////// + void init(const cl_uint &, const cl_uint &) override; + InType clamp(const InType &); + //////////////////////////////////////////////////////////////////////////// + inline float fclamp(float lo, float v, float hi) + { + v = v < lo ? lo : v; + return v < hi ? v : hi; + } + //////////////////////////////////////////////////////////////////////////// + inline double dclamp(double lo, double v, double hi) + { + v = v < lo ? lo : v; + return v < hi ? v : hi; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////// + +template +DataInfoSpec::DataInfoSpec( + const DataInitInfo &agg) + : DataInitBase(agg), mdv(0) +{ + if (std::is_same::value) + ranges = std::make_pair(CL_FLT_MIN, CL_FLT_MAX); + else if (std::is_same::value) + ranges = std::make_pair(CL_DBL_MIN, CL_DBL_MAX); + else if (std::is_same::value) + ranges = std::make_pair(0, CL_UCHAR_MAX); + else if (std::is_same::value) + ranges = std::make_pair(CL_CHAR_MIN, CL_CHAR_MAX); + else if (std::is_same::value) + ranges = std::make_pair(0, CL_USHRT_MAX); + else if (std::is_same::value) + ranges = std::make_pair(CL_SHRT_MIN, CL_SHRT_MAX); + else if (std::is_same::value) + ranges = std::make_pair(0, CL_UINT_MAX); + else if (std::is_same::value) + ranges = std::make_pair(CL_INT_MIN, CL_INT_MAX); + else if (std::is_same::value) + ranges = std::make_pair(0, CL_ULONG_MAX); + else if (std::is_same::value) + ranges = std::make_pair(CL_LONG_MIN, CL_LONG_MAX); + + InType outMin = ((InType)ranges.first); + InType outMax = ((InType)ranges.second); + + // clang-format off + // for readability sake keep this section unformatted + if (std::is_floating_point::value) + { // from float/double + InType eps = std::is_same::value ? (InType) FLT_EPSILON : (InType) DBL_EPSILON; + if (std::is_integral::value) + { // to char/uchar/short/ushort/int/uint/long/ulong + if (sizeof(OutType)<=sizeof(cl_short)) + { // to char/uchar/short/ushort + clamp_ranges= + {{outMin-0.5f, outMax + 0.5f - outMax * 0.5f * eps}, + {outMin-0.5f, outMax + 0.5f - outMax * 0.5f * eps}, + {outMin-1.0f+(std::is_signed::value?outMax:0.5f)*eps, outMax-1.f}, + {outMin-0.0f, outMax - outMax * 0.5f * eps }, + {outMin-1.0f+(std::is_signed::value?outMax:0.5f)*eps, outMax - outMax * 0.5f * eps}}; + } + else if (std::is_same::value) + { // from float + if (std::is_same::value) + { // to uint + clamp_ranges= + { {outMin-0.5f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)}, + {outMin-0.5f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)}, + {outMin-1.0f+0.5f*eps, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)}, + {outMin-0.0f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7) }, + {outMin-1.0f+0.5f*eps, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)}}; + } + else if (std::is_same::value) + { // to int + clamp_ranges= + { {outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6)}, + {outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6)}, + {outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6)}, + {outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) }, + {outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6)}}; + } + else if (std::is_same::value) + { // to ulong + clamp_ranges= + {{outMin-0.5f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39)}, + {outMin-0.5f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39)}, + {outMin-1.0f+(std::is_signed::value?outMax:0.5f)*eps, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39)}, + {outMin-0.0f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39) }, + {outMin-1.0f+(std::is_signed::value?outMax:0.5f)*eps, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39)}}; + } + else if (std::is_same::value) + { // to long + clamp_ranges= + { {MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)}, + {MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)}, + {MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)}, + {MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)}, + {MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)}}; + } + } + else + { // from double + if (std::is_same::value) + { // to uint + clamp_ranges= + { {outMin-0.5f, outMax + 0.5 - MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31) * eps}, + {outMin-0.5f, outMax + 0.5 - MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31) * eps}, + {outMin-1.0f+0.5f*eps, outMax}, + {outMin-0.0f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp31, 0x1fffffffffffffLL, -21) }, + {outMin-1.0f+0.5f*eps, MAKE_HEX_DOUBLE(0x1.fffffffffffffp31, 0x1fffffffffffffLL, -21)}}; + } + else if (std::is_same::value) + { // to int + clamp_ranges= + { {outMin-0.5f, outMax + 0.5 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * eps}, + {outMin-0.5f, outMax + 0.5 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * eps}, + {outMin-1.0f+outMax*eps, outMax}, + {outMin-0.0f, outMax + 1.0 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * eps }, + {outMin-1.0f+outMax*eps, outMax + 1.0 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * eps}}; + } + else if (std::is_same::value) + { // to ulong + clamp_ranges= + {{outMin-0.5f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11)}, + {outMin-0.5f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11)}, + {outMin-1.0f+(std::is_signed::value?outMax:0.5f)*eps, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11)}, + {outMin-0.0f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11) }, + {outMin-1.0f+(std::is_signed::value?outMax:0.5f)*eps, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11)}}; + } + else if (std::is_same::value) + { // to long + clamp_ranges= + { {MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)}, + {MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)}, + {MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)}, + {MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)}, + {MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)}}; + } + } + } + } + // clang-format on +} + +//////////////////////////////////////////////////////////////////////////////////////// + +template +float DataInfoSpec::round_to_int(float f) +{ + static const float magic[2] = { MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23), + -MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23) }; + + // Round fractional values to integer in round towards nearest mode + if (fabsf(f) < MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23)) + { + volatile float x = f; + float magicVal = magic[f < 0]; + +#if defined(__SSE__) + // Defeat x87 based arithmetic, which cant do FTZ, and will round this + // incorrectly + __m128 v = _mm_set_ss(x); + __m128 m = _mm_set_ss(magicVal); + v = _mm_add_ss(v, m); + v = _mm_sub_ss(v, m); + _mm_store_ss((float *)&x, v); +#else + x += magicVal; + x -= magicVal; +#endif + f = x; + } + return f; +} + +//////////////////////////////////////////////////////////////////////////////////////// + +template +long long +DataInfoSpec::round_to_int_and_clamp(double f) +{ + static const double magic[2] = { MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52), + MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52) }; + + if (f >= -(double)LLONG_MIN) return LLONG_MAX; + + if (f <= (double)LLONG_MIN) return LLONG_MIN; + + // Round fractional values to integer in round towards nearest mode + if (fabs(f) < MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52)) + { + volatile double x = f; + double magicVal = magic[f < 0]; +#if defined(__SSE2__) || defined(_MSC_VER) + // Defeat x87 based arithmetic, which cant do FTZ, and will round this + // incorrectly + __m128d v = _mm_set_sd(x); + __m128d m = _mm_set_sd(magicVal); + v = _mm_add_sd(v, m); + v = _mm_sub_sd(v, m); + _mm_store_sd((double *)&x, v); +#else + x += magicVal; + x -= magicVal; +#endif + f = x; + } + return (long long)f; +} + +//////////////////////////////////////////////////////////////////////////////////////// + +template +OutType DataInfoSpec::absolute(const OutType &x) +{ + union { + cl_uint u; + OutType f; + } u; + u.f = x; + if (std::is_same::value) + u.u &= 0x7fffffff; + else if (std::is_same::value) + u.u &= 0x7fffffffffffffffULL; + else + log_error("Unexpected argument type of DataInfoSpec::absolute"); + + return u.f; +} + +//////////////////////////////////////////////////////////////////////////////////////// + +template +void DataInfoSpec::conv(OutType *out, InType *in) +{ + if (std::is_same::value) + { + cl_float inVal = *in; + + if (std::is_floating_point::value) + { + *out = (OutType)inVal; + } + else if (std::is_same::value) + { +#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) + // VS2005 (at least) on x86 uses fistp to store the float as a + // 64-bit int. However, fistp stores it as a signed int, and some of + // the test values won't fit into a signed int. (These test values + // are >= 2^63.) The result on VS2005 is that these end up silently + // (at least by default settings) clamped to the max lowest ulong. + cl_float x = round_to_int(inVal); + if (x >= 9223372036854775808.0f) + { + x -= 9223372036854775808.0f; + ((cl_ulong *)out)[0] = x; + ((cl_ulong *)out)[0] += 9223372036854775808ULL; + } + else + { + ((cl_ulong *)out)[0] = x; + } +#else + *out = round_to_int(inVal); +#endif + } + else if (std::is_same::value) + { + *out = round_to_int_and_clamp(inVal); + } + else + *out = round_to_int(inVal); + } + else if (std::is_same::value) + { + if (std::is_same::value) + *out = (OutType)*in; + else + *out = rint(*in); + } + else if (std::is_same::value + || std::is_same::value) + { + if (std::is_same::value) + { +#if defined(_MSC_VER) + cl_ulong l = ((cl_ulong *)in)[0]; + double result; + + if (std::is_same::value) + { + cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1)) + : (cl_long)l; +#if defined(_M_X64) + _mm_store_sd(&result, _mm_cvtsi64_sd(_mm_setzero_pd(), sl)); +#else + result = sl; +#endif + ((double *)out)[0] = + (l == 0 ? 0.0 : (((cl_long)l < 0) ? result * 2.0 : result)); + } + else + { + _mm_store_sd(&result, _mm_cvtsi64_sd(_mm_setzero_pd(), l)); + ((double *)out)[0] = + (l == 0 ? 0.0 : result); // Per IEEE-754-2008 5.4.1, 0's + // always convert to +0.0 + } +#else + *out = (*in == 0 ? 0.0 : (OutType)*in); +#endif + } + else if (std::is_same::value) + { + cl_float outVal = 0.f; + +#if defined(_MSC_VER) && defined(_M_X64) + cl_ulong l = ((cl_ulong *)in)[0]; + float result; + if (std::is_same::value) + { + cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1)) + : (cl_long)l; + _mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), sl)); + outVal = (l == 0 ? 0.0f + : (((cl_long)l < 0) ? result * 2.0f : result)); + } + else + { + _mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), l)); + outVal = (l == 0 ? 0.0f : result); // Per IEEE-754-2008 5.4.1, + // 0's always convert to +0.0 + } +#else + InType l = ((InType *)in)[0]; +#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) + /* ARM VFP doesn't have hardware instruction for converting from + * 64-bit integer to float types, hence GCC ARM uses the + * floating-point emulation code despite which -mfloat-abi setting + * it is. But the emulation code in libgcc.a has only one rounding + * mode (round to nearest even in this case) and ignores the user + * rounding mode setting in hardware. As a result setting rounding + * modes in hardware won't give correct rounding results for type + * covert from 64-bit integer to float using GCC for ARM compiler so + * for testing different rounding modes, we need to use alternative + * reference function. ARM64 does have an instruction, however we + * cannot guarantee the compiler will use it. On all ARM + * architechures use emulation to calculate reference.*/ + if (std::is_same::value) + outVal = qcom_u64_2_f32(l, qcom_sat, qcom_rm); + else + outVal = (l == 0 ? 0.0f : qcom_s64_2_f32(l, qcom_sat, qcom_rm)); +#else + outVal = (l == 0 ? 0.0f : (float)l); // Per IEEE-754-2008 5.4.1, 0's + // always convert to +0.0 +#endif +#endif + + *out = outVal; + } + else + { + *out = (OutType)*in; + } + } + else + { + if (std::is_same::value) + *out = (*in == 0 ? 0.f : *in); // Per IEEE-754-2008 5.4.1, 0's + // always convert to +0.0 + else if (std::is_same::value) + *out = (*in == 0 ? 0.0 : *in); + else + *out = (OutType)*in; + } +} + +//////////////////////////////////////////////////////////////////////////////////////// + +#define CLAMP(_lo, _x, _hi) \ + ((_x) < (_lo) ? (_lo) : ((_x) > (_hi) ? (_hi) : (_x))) + +//////////////////////////////////////////////////////////////////////////////////////// + +template +void DataInfoSpec::conv_sat(OutType *out, + InType *in) +{ + if (std::is_floating_point::value) + { + if (std::is_floating_point::value) + { // in float/double, out float/double + *out = (OutType)(*in); + } + else if ((std::is_same::value) + && std::is_same::value) + { + cl_float x = round_to_int(*in); + +#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) + // VS2005 (at least) on x86 uses fistp to store the float as a + // 64-bit int. However, fistp stores it as a signed int, and some of + // the test values won't fit into a signed int. (These test values + // are >= 2^63.) The result on VS2005 is that these end up silently + // (at least by default settings) clamped to the max lowest ulong. + if (x >= 18446744073709551616.0f) + { // 2^64 + *out = 0xFFFFFFFFFFFFFFFFULL; + } + else if (x < 0) + { + *out = 0; + } + else if (x >= 9223372036854775808.0f) + { // 2^63 + x -= 9223372036854775808.0f; + *out = x; + *out += 9223372036854775808ULL; + } + else + { + *out = x; + } +#else + *out = x >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64) + ? 0xFFFFFFFFFFFFFFFFULL + : x < 0 ? 0 : (OutType)x; +#endif + } + else if ((std::is_same::value) + && std::is_same::value) + { + cl_float f = round_to_int(*in); + *out = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63) + ? 0x7FFFFFFFFFFFFFFFULL + : f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63) + ? 0x8000000000000000LL + : (OutType)f; + } + else if (std::is_same::value + && std::is_same::value) + { + InType f = rint(*in); + *out = f >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64) + ? 0xFFFFFFFFFFFFFFFFULL + : f < 0 ? 0 : (OutType)f; + } + else if (std::is_same::value + && std::is_same::value) + { + InType f = rint(*in); + *out = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63) + ? 0x7FFFFFFFFFFFFFFFULL + : f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63) + ? 0x8000000000000000LL + : (OutType)f; + } + else + { // in float/double, out char/uchar/short/ushort/int/uint + *out = + CLAMP(ranges.first, round_to_int_and_clamp(*in), ranges.second); + } + } + else if (std::is_integral::value + && std::is_integral::value) + { + { + if ((std::is_signed::value + && std::is_signed::value) + || (!std::is_signed::value + && !std::is_signed::value)) + { + if (sizeof(InType) <= sizeof(OutType)) + { + *out = (OutType)*in; + } + else + { + *out = CLAMP(ranges.first, *in, ranges.second); + } + } + else + { // mixed signed/unsigned types + if (sizeof(InType) < sizeof(OutType)) + { + *out = (!std::is_signed::value) + ? (OutType)*in + : CLAMP(0, *in, ranges.second); // *in < 0 ? 0 : *in + } + else + { // bigger/equal mixed signed/unsigned types - always clamp + *out = CLAMP(0, *in, ranges.second); + } + } + } + } + else + { // InType integral, OutType floating + *out = std::is_signed::value ? (OutType)*in + : absolute((OutType)*in); + } +} + +//////////////////////////////////////////////////////////////////////////////////////// + +template +void DataInfoSpec::init(const cl_uint &job_id, + const cl_uint &thread_id) +{ + uint64_t ulStart = start; + void *pIn = (char *)gIn + job_id * size * gTypeSizes[inType]; + + if (std::is_integral::value) + { + InType *o = (InType *)pIn; + if (sizeof(InType) <= sizeof(cl_short)) + { // char/uchar/ushort/short + for (int i = 0; i < size; i++) o[i] = ulStart++; + } + else if (sizeof(InType) <= sizeof(cl_int)) + { // int/uint + int i = 0; + if (gIsEmbedded) + for (i = 0; i < size; i++) + o[i] = (InType)genrand_int32(mdv[thread_id]); + else + for (i = 0; i < size; i++) o[i] = (InType)i + ulStart; + + if (0 == ulStart) + { + size_t tableSize = specialValuesUInt.size() + * sizeof(decltype(specialValuesUInt)::value_type); + if (sizeof(InType) * size < tableSize) + tableSize = sizeof(InType) * size; + memcpy((char *)(o + i) - tableSize, &specialValuesUInt.front(), + tableSize); + } + } + else + { // long/ulong + cl_ulong *o = (cl_ulong *)pIn; + cl_ulong i, j, k; + + i = 0; + if (ulStart == 0) + { + // Try various powers of two + for (j = 0; j < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++) + o[j] = (cl_ulong)1 << j; + i = j; + + // try the complement of those + for (j = 0; i < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++) + o[i++] = ~((cl_ulong)1 << j); + + // Try various negative powers of two + for (j = 0; i < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++) + o[i++] = (cl_ulong)0xFFFFFFFFFFFFFFFEULL << j; + + // try various powers of two plus 1, shifted by various amounts + for (j = 0; i < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++) + for (k = 0; + i < (cl_ulong)size && k < 8 * sizeof(cl_ulong) - j; + k++) + o[i++] = (((cl_ulong)1 << j) + 1) << k; + + // try various powers of two minus 1 + for (j = 0; i < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++) + for (k = 0; + i < (cl_ulong)size && k < 8 * sizeof(cl_ulong) - j; + k++) + o[i++] = (((cl_ulong)1 << j) - 1) << k; + + // Other patterns + cl_ulong pattern[] = { + 0x3333333333333333ULL, 0x5555555555555555ULL, + 0x9999999999999999ULL, 0x6666666666666666ULL, + 0xccccccccccccccccULL, 0xaaaaaaaaaaaaaaaaULL + }; + cl_ulong mask[] = { 0xffffffffffffffffULL, + 0xff00ff00ff00ff00ULL, + 0xffff0000ffff0000ULL, + 0xffffffff00000000ULL }; + for (j = 0; i < (cl_ulong)size + && j < sizeof(pattern) / sizeof(pattern[0]); + j++) + for (k = 0; i + 2 <= (cl_ulong)size + && k < sizeof(mask) / sizeof(mask[0]); + k++) + { + o[i++] = pattern[j] & mask[k]; + o[i++] = pattern[j] & ~mask[k]; + } + } + + auto &md = mdv[thread_id]; + for (; i < (cl_ulong)size; i++) + o[i] = (cl_ulong)genrand_int32(md) + | ((cl_ulong)genrand_int32(md) << 32); + } + } // integrals + else if (std::is_same::value) + { + cl_uint *o = (cl_uint *)pIn; + int i; + + if (gIsEmbedded) + for (i = 0; i < size; i++) + o[i] = (cl_uint)genrand_int32(mdv[thread_id]); + else + for (i = 0; i < size; i++) o[i] = (cl_uint)i + ulStart; + + if (0 == ulStart) + { + size_t tableSize = specialValuesFloat.size() + * sizeof(decltype(specialValuesFloat)::value_type); + if (sizeof(InType) * size < tableSize) + tableSize = sizeof(InType) * size; + memcpy((char *)(o + i) - tableSize, &specialValuesFloat.front(), + tableSize); + } + + if (kUnsaturated == sat) + { + InType *f = (InType *)pIn; + for (i = 0; i < size; i++) f[i] = clamp(f[i]); + } + } + else if (std::is_same::value) + { + InType *o = (InType *)pIn; + int i = 0; + + union { + uint64_t u; + InType d; + } u; + + for (i = 0; i < size; i++) + { + uint64_t z = i + ulStart; + + uint32_t bits = ((uint32_t)z ^ (uint32_t)(z >> 32)); + // split 0x89abcdef to 0x89abc00000000def + u.u = bits & 0xfffU; + u.u |= (uint64_t)(bits & ~0xfffU) << 32; + // sign extend the leading bit of def segment as sign bit so that + // the middle region consists of either all 1s or 0s + u.u -= (bits & 0x800U) << 1; + o[i] = u.d; + } + + if (0 == ulStart) + { + size_t tableSize = specialValuesDouble.size() + * sizeof(decltype(specialValuesDouble)::value_type); + if (sizeof(InType) * size < tableSize) + tableSize = sizeof(InType) * size; + memcpy((char *)(o + i) - tableSize, &specialValuesDouble.front(), + tableSize); + } + + if (0 == sat) + for (i = 0; i < size; i++) o[i] = clamp(o[i]); + } +} + +//////////////////////////////////////////////////////////////////////////////////////// + +template +InType DataInfoSpec::clamp(const InType &in) +{ + if (std::is_integral::value) + { + if (std::is_same::value) + { + return fclamp(clamp_ranges[round].first, in, + clamp_ranges[round].second); + } + else if (std::is_same::value) + { + return dclamp(clamp_ranges[round].first, in, + clamp_ranges[round].second); + } + } + return in; +} + +//////////////////////////////////////////////////////////////////////////////////////// + +#endif /* CONVERSIONS_DATA_INFO_H */ diff --git a/test_conformance/conversions/fplib.h b/test_conformance/conversions/fplib.h index 534550a32..c69b1e891 100644 --- a/test_conformance/conversions/fplib.h +++ b/test_conformance/conversions/fplib.h @@ -13,6 +13,9 @@ // See the License for the specific language governing permissions and // limitations under the License. // +#ifndef CONVERSIONS_FPLIB_H +#define CONVERSIONS_FPLIB_H + #include #include @@ -28,3 +31,5 @@ typedef enum float qcom_u64_2_f32(uint64_t data, bool sat, roundingMode rnd); float qcom_s64_2_f32(int64_t data, bool sat, roundingMode rnd); + +#endif diff --git a/test_conformance/conversions/test_conversions.cpp b/test_conformance/conversions/test_conversions.cpp index 2ee05463c..08fcdb473 100644 --- a/test_conformance/conversions/test_conversions.cpp +++ b/test_conformance/conversions/test_conversions.cpp @@ -13,12 +13,12 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "harness/compat.h" #include "harness/rounding_mode.h" #include "harness/ThreadPool.h" #include "harness/testHarness.h" -#include "harness/kernelHelpers.h" #include "harness/parseParameters.h" +#include "harness/mt19937.h" + #if defined(__APPLE__) #include #endif @@ -33,7 +33,6 @@ #include #endif -#include "mingw_compat.h" #if defined(__MINGW32__) #include #endif @@ -49,278 +48,75 @@ #include #include +#include +#include #include "Sleep.h" -#include "basic_test_conversions.h" -#if (defined(_WIN32) && defined(_MSC_VER)) -// need for _controlfp_s and rouinding modes in RoundingMode -#include "harness/testHarness.h" -#endif - -#pragma mark - -#pragma mark globals - -#define BUFFER_SIZE (1024 * 1024) -#define kPageSize 4096 -#define EMBEDDED_REDUCTION_FACTOR 16 -#define PERF_LOOP_COUNT 100 +#include "basic_test_conversions.h" +#include +#include -#define kCallStyleCount (kVectorSizeCount + 1 /* for implicit scalar */) +#include "harness/mt19937.h" #if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) #include "fplib.h" -extern bool qcom_sat; -extern roundingMode qcom_rm; #endif -const char **argList = NULL; -int argCount = 0; -cl_context gContext = NULL; -cl_command_queue gQueue = NULL; -char appName[64] = "ctest"; -int gStartTestNumber = -1; -int gEndTestNumber = 0; -#if defined(__APPLE__) -int gTimeResults = 1; -#else -int gTimeResults = 0; +#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) +/* Rounding modes and saturation for use with qcom 64 bit to float conversion + * library */ +bool qcom_sat; +roundingMode qcom_rm; #endif -int gReportAverageTimes = 0; -void *gIn = NULL; -void *gRef = NULL; -void *gAllowZ = NULL; -void *gOut[kCallStyleCount] = { NULL }; -cl_mem gInBuffer; -cl_mem gOutBuffers[kCallStyleCount]; -size_t gComputeDevices = 0; -uint32_t gDeviceFrequency = 0; -int gWimpyMode = 0; -int gWimpyReductionFactor = 128; -int gSkipTesting = 0; -int gForceFTZ = 0; -int gMultithread = 1; -int gIsRTZ = 0; -uint32_t gSimdSize = 1; -int gHasDouble = 0; -int gTestDouble = 1; -const char *sizeNames[] = { "", "", "2", "3", "4", "8", "16" }; -const int vectorSizes[] = { 1, 1, 2, 3, 4, 8, 16 }; -int gMinVectorSize = 0; -int gMaxVectorSize = sizeof(vectorSizes) / sizeof(vectorSizes[0]); -static MTdata gMTdata; - -#pragma mark - -#pragma mark Declarations + static int ParseArgs(int argc, const char **argv); static void PrintUsage(void); test_status InitCL(cl_device_id device); -static int GetTestCase(const char *name, Type *outType, Type *inType, - SaturationMode *sat, RoundingMode *round); -static int DoTest(cl_device_id device, Type outType, Type inType, - SaturationMode sat, RoundingMode round, MTdata d); -static cl_program MakeProgram(Type outType, Type inType, SaturationMode sat, - RoundingMode round, int vectorSize, - cl_kernel *outKernel); -static int RunKernel(cl_kernel kernel, void *inBuf, void *outBuf, - size_t blockCount); - -void *FlushToZero(void); -void UnFlushToZero(void *); - -// Windows (since long double got deprecated) sets the x87 to 53-bit precision -// (that's x87 default state). This causes problems with the tests that -// convert long and ulong to float and double or otherwise deal with values -// that need more precision than 53-bit. So, set the x87 to 64-bit precision. -static inline void Force64BitFPUPrecision(void) -{ -#if __MINGW32__ - // The usual method is to use _controlfp as follows: - // #include - // _controlfp(_PC_64, _MCW_PC); - // - // _controlfp is available on MinGW32 but not on MinGW64. Instead of having - // divergent code just use inline assembly which works for both. - unsigned short int orig_cw = 0; - unsigned short int new_cw = 0; - __asm__ __volatile__("fstcw %0" : "=m"(orig_cw)); - new_cw = orig_cw | 0x0300; // set precision to 64-bit - __asm__ __volatile__("fldcw %0" ::"m"(new_cw)); -#else - /* Implement for other platforms if needed */ -#endif -} -int test_conversions(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements) -{ - int error, i, testNumber = -1; - int startMinVectorSize = gMinVectorSize; - Type inType, outType; - RoundingMode round; - SaturationMode sat; - if (argCount) - { - for (i = 0; i < argCount; i++) - { - if (GetTestCase(argList[i], &outType, &inType, &sat, &round)) - { - vlog_error("\n\t\t**** ERROR: Unable to parse function name " - "%s. Skipping.... *****\n\n", - argList[i]); - continue; - } +const char *gTypeNames[kTypeCount] = { "uchar", "char", "ushort", "short", + "uint", "int", "float", "double", + "ulong", "long" }; - // skip double if we don't have it - if (!gTestDouble && (inType == kdouble || outType == kdouble)) - { - if (gHasDouble) - { - vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n", - gTypeNames[outType], gSaturationNames[sat], - gRoundingModeNames[round], gTypeNames[inType]); - vlog("\t\tcl_khr_fp64 enabled, but double testing turned " - "off.\n"); - } +const char *gRoundingModeNames[kRoundingModeCount] = { "", "_rte", "_rtp", + "_rtn", "_rtz" }; - continue; - } +const char *gSaturationNames[2] = { "", "_sat" }; - // skip longs on embedded - if (!gHasLong - && (inType == klong || outType == klong || inType == kulong - || outType == kulong)) - { - continue; - } +size_t gTypeSizes[kTypeCount] = { + sizeof(cl_uchar), sizeof(cl_char), sizeof(cl_ushort), sizeof(cl_short), + sizeof(cl_uint), sizeof(cl_int), sizeof(cl_float), sizeof(cl_double), + sizeof(cl_ulong), sizeof(cl_long), +}; - // Skip the implicit converts if the rounding mode is not default or - // test is saturated - if (0 == startMinVectorSize) - { - if (sat || round != kDefaultRoundingMode) - gMinVectorSize = 1; - else - gMinVectorSize = 0; - } +char appName[64] = "ctest"; +int gMultithread = 1; - if ((error = DoTest(device, outType, inType, sat, round, gMTdata))) - { - vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n", - gTypeNames[outType], gSaturationNames[sat], - gRoundingModeNames[round], gTypeNames[inType]); - } - } + +int test_conversions(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + if (argCount) + { + return MakeAndRunTest(device, context, queue, + num_elements); } else { - for (outType = (Type)0; outType < kTypeCount; - outType = (Type)(outType + 1)) - { - for (inType = (Type)0; inType < kTypeCount; - inType = (Type)(inType + 1)) - { - // skip longs on embedded - if (!gHasLong - && (inType == klong || outType == klong || inType == kulong - || outType == kulong)) - { - continue; - } - - for (sat = (SaturationMode)0; sat < kSaturationModeCount; - sat = (SaturationMode)(sat + 1)) - { - // skip illegal saturated conversions to float type - if (kSaturated == sat - && (outType == kfloat || outType == kdouble)) - { - continue; - } - - for (round = (RoundingMode)0; round < kRoundingModeCount; - round = (RoundingMode)(round + 1)) - { - if (++testNumber < gStartTestNumber) - { - // vlog( "%d) skipping convert_%sn%s%s( %sn - // )\n", testNumber, gTypeNames[ outType ], - // gSaturationNames[ sat ], - // gRoundingModeNames[round], gTypeNames[inType] - // ); - continue; - } - else - { - if (gEndTestNumber > 0 - && testNumber >= gEndTestNumber) - { - goto exit; - } - } - - vlog("%d) Testing convert_%sn%s%s( %sn ):\n", - testNumber, gTypeNames[outType], - gSaturationNames[sat], gRoundingModeNames[round], - gTypeNames[inType]); - - // skip double if we don't have it - if (!gTestDouble - && (inType == kdouble || outType == kdouble)) - { - if (gHasDouble) - { - vlog_error("\t *** %d) convert_%sn%s%s( %sn ) " - "FAILED ** \n", - testNumber, gTypeNames[outType], - gSaturationNames[sat], - gRoundingModeNames[round], - gTypeNames[inType]); - vlog("\t\tcl_khr_fp64 enabled, but double " - "testing turned off.\n"); - } - continue; - } - - // Skip the implicit converts if the rounding mode is - // not default or test is saturated - if (0 == startMinVectorSize) - { - if (sat || round != kDefaultRoundingMode) - gMinVectorSize = 1; - else - gMinVectorSize = 0; - } - - if ((error = DoTest(device, outType, inType, sat, round, - gMTdata))) - { - vlog_error("\t *** %d) convert_%sn%s%s( %sn ) " - "FAILED ** \n", - testNumber, gTypeNames[outType], - gSaturationNames[sat], - gRoundingModeNames[round], - gTypeNames[inType]); - } - } - } - } - } + return MakeAndRunTest(device, context, queue, + num_elements); } - -exit: - return gFailCount; } + test_definition test_list[] = { ADD_TEST(conversions), }; const int test_num = ARRAY_SIZE(test_list); -#pragma mark - int main(int argc, const char **argv) { @@ -378,8 +174,6 @@ int main(int argc, const char **argv) return ret; } -#pragma mark - -#pragma mark setup static int ParseArgs(int argc, const char **argv) { @@ -509,7 +303,7 @@ static int ParseArgs(int argc, const char **argv) gWimpyMode = 1; } - vlog( "\n" ); + vlog("\n"); PrintArch(); @@ -526,6 +320,7 @@ static int ParseArgs(int argc, const char **argv) return 0; } + static void PrintUsage(void) { int i; @@ -564,63 +359,6 @@ static void PrintUsage(void) } -static int GetTestCase(const char *name, Type *outType, Type *inType, - SaturationMode *sat, RoundingMode *round) -{ - int i; - - // Find the return type - for (i = 0; i < kTypeCount; i++) - if (name == strstr(name, gTypeNames[i])) - { - *outType = (Type)i; - name += strlen(gTypeNames[i]); - - break; - } - - if (i == kTypeCount) return -1; - - // Check to see if _sat appears next - *sat = (SaturationMode)0; - for (i = 1; i < kSaturationModeCount; i++) - if (name == strstr(name, gSaturationNames[i])) - { - *sat = (SaturationMode)i; - name += strlen(gSaturationNames[i]); - break; - } - - *round = (RoundingMode)0; - for (i = 1; i < kRoundingModeCount; i++) - if (name == strstr(name, gRoundingModeNames[i])) - { - *round = (RoundingMode)i; - name += strlen(gRoundingModeNames[i]); - break; - } - - if (*name != '_') return -2; - name++; - - for (i = 0; i < kTypeCount; i++) - if (name == strstr(name, gTypeNames[i])) - { - *inType = (Type)i; - name += strlen(gTypeNames[i]); - - break; - } - - if (i == kTypeCount) return -3; - - if (*name != '\0') return -4; - - return 0; -} - -#pragma mark - -#pragma mark OpenCL test_status InitCL(cl_device_id device) { @@ -678,6 +416,20 @@ test_status InitCL(cl_device_id device) } gTestDouble &= gHasDouble; + // detect whether profile of the device is embedded + char profile[1024] = ""; + if ((error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), + profile, NULL))) + { + vlog_error("clGetDeviceInfo failed. (%d)\n", error); + return TEST_FAIL; + } + else if (strstr(profile, "EMBEDDED_PROFILE")) + { + gIsEmbedded = 1; + if (!is_extension_available(device, "cles_khr_int64")) gHasLong = 0; + } + gContext = clCreateContext(NULL, 1, &device, notify_callback, NULL, &error); if (NULL == gContext || error) { @@ -726,10 +478,8 @@ test_status InitCL(cl_device_id device) } } - gMTdata = init_genrand(gRandomSeed); - char c[1024]; static const char *no_yes[] = { "NO", "YES" }; vlog("\nCompute Device info:\n"); @@ -760,977 +510,4 @@ test_status InitCL(cl_device_id device) return TEST_PASS; } -static int RunKernel(cl_kernel kernel, void *inBuf, void *outBuf, - size_t blockCount) -{ - // The global dimensions are just the blockCount to execute since we haven't - // set up multiple queues for multiple devices. - int error; - - error = clSetKernelArg(kernel, 0, sizeof(inBuf), &inBuf); - error |= clSetKernelArg(kernel, 1, sizeof(outBuf), &outBuf); - - if (error) - { - vlog_error("FAILED -- could not set kernel args (%d)\n", error); - return error; - } - - if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &blockCount, - NULL, 0, NULL, NULL))) - { - vlog_error("FAILED -- could not execute kernel (%d)\n", error); - return error; - } - - return 0; -} - -#if defined(__APPLE__) -#include -#endif - -uint64_t GetTime(void); -uint64_t GetTime(void) -{ -#if defined(__APPLE__) - return mach_absolute_time(); -#elif defined(_MSC_VER) - return ReadTime(); -#else - // mach_absolute_time is a high precision timer with precision < 1 - // microsecond. -#warning need accurate clock here. Times are invalid. - return 0; -#endif -} - - -#if defined(_MSC_VER) -/* function is defined in "compat.h" */ -#else -double SubtractTime(uint64_t endTime, uint64_t startTime); -double SubtractTime(uint64_t endTime, uint64_t startTime) -{ - uint64_t diff = endTime - startTime; - static double conversion = 0.0; - - if (0.0 == conversion) - { -#if defined(__APPLE__) - mach_timebase_info_data_t info = { 0, 0 }; - kern_return_t err = mach_timebase_info(&info); - if (0 == err) - conversion = 1e-9 * (double)info.numer / (double)info.denom; -#else - // This function consumes output from GetTime() above, and converts the - // time to secionds. -#warning need accurate ticks to seconds conversion factor here. Times are invalid. -#endif - } - - // strictly speaking we should also be subtracting out timer latency here - return conversion * (double)diff; -} -#endif - -typedef struct CalcReferenceValuesInfo -{ - struct WriteInputBufferInfo - *parent; // pointer back to the parent WriteInputBufferInfo struct - cl_kernel kernel; // the kernel for this vector size - cl_program program; // the program for this vector size - cl_uint vectorSize; // the vector size for this callback chain - void *p; // the pointer to mapped result data for this vector size - cl_int result; -} CalcReferenceValuesInfo; - -typedef struct WriteInputBufferInfo -{ - volatile cl_event - calcReferenceValues; // user event which signals when main thread is - // done calculating reference values - volatile cl_event - doneBarrier; // user event which signals when worker threads are done - cl_uint count; // the number of elements in the array - Type outType; // the data type of the conversion result - Type inType; // the data type of the conversion input - volatile int barrierCount; - CalcReferenceValuesInfo calcInfo[kCallStyleCount]; -} WriteInputBufferInfo; - -cl_uint RoundUpToNextPowerOfTwo(cl_uint x); -cl_uint RoundUpToNextPowerOfTwo(cl_uint x) -{ - if (0 == (x & (x - 1))) return x; - - while (x & (x - 1)) x &= x - 1; - - return x + x; -} - -void WriteInputBufferComplete(void *); - -typedef struct DataInitInfo -{ - cl_ulong start; - cl_uint size; - Type outType; - Type inType; - SaturationMode sat; - RoundingMode round; - MTdata *d; -} DataInitInfo; - -cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p); -cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p) -{ - DataInitInfo *info = (DataInitInfo *)p; - - gInitFunctions[info->inType]( - (char *)gIn + job_id * info->size * gTypeSizes[info->inType], info->sat, - info->round, info->outType, info->start + job_id * info->size, - info->size, info->d[thread_id]); - return CL_SUCCESS; -} - -static void setAllowZ(uint8_t *allow, uint32_t *x, cl_uint count) -{ - cl_uint i; - for (i = 0; i < count; ++i) - allow[i] |= (uint8_t)((x[i] & 0x7f800000U) == 0); -} - -cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p); -cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p) -{ - DataInitInfo *info = (DataInitInfo *)p; - cl_uint count = info->size; - Type inType = info->inType; - Type outType = info->outType; - RoundingMode round = info->round; - size_t j; - - Force64BitFPUPrecision(); - - void *s = (cl_uchar *)gIn + job_id * count * gTypeSizes[info->inType]; - void *a = (cl_uchar *)gAllowZ + job_id * count; - void *d = (cl_uchar *)gRef + job_id * count * gTypeSizes[info->outType]; - if (outType != inType) - { - // create the reference while we wait - Convert f = gConversions[outType][inType]; - if (info->sat) f = gSaturatedConversions[outType][inType]; - -#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) - /* ARM VFP doesn't have hardware instruction for converting from 64-bit - * integer to float types, hence GCC ARM uses the floating-point - * emulation code despite which -mfloat-abi setting it is. But the - * emulation code in libgcc.a has only one rounding mode (round to - * nearest even in this case) and ignores the user rounding mode setting - * in hardware. As a result setting rounding modes in hardware won't - * give correct rounding results for type covert from 64-bit integer to - * float using GCC for ARM compiler so for testing different rounding - * modes, we need to use alternative reference function. ARM64 does have - * an instruction, however we cannot guarantee the compiler will use it. - * On all ARM architechures use emulation to calculate reference.*/ - switch (round) - { - /* conversions to floating-point type use the current rounding mode. - * The only default floating-point rounding mode supported is round - * to nearest even i.e the current rounding mode will be _rte for - * floating-point types. */ - case kDefaultRoundingMode: qcom_rm = qcomRTE; break; - case kRoundToNearestEven: qcom_rm = qcomRTE; break; - case kRoundUp: qcom_rm = qcomRTP; break; - case kRoundDown: qcom_rm = qcomRTN; break; - case kRoundTowardZero: qcom_rm = qcomRTZ; break; - default: - vlog_error("ERROR: undefined rounding mode %d\n", round); - break; - } - qcom_sat = info->sat; -#endif - - RoundingMode oldRound = set_round(round, outType); - f(d, s, count); - set_round(oldRound, outType); - - // Decide if we allow a zero result in addition to the correctly rounded - // one - memset(a, 0, count); - if (gForceFTZ) - { - if (inType == kfloat) setAllowZ((uint8_t *)a, (uint32_t *)s, count); - if (outType == kfloat) - setAllowZ((uint8_t *)a, (uint32_t *)d, count); - } - } - else - { - // Copy the input to the reference - memcpy(d, s, info->size * gTypeSizes[inType]); - } - - // Patch up NaNs conversions to integer to zero -- these can be converted to - // any integer - if (info->outType != kfloat && info->outType != kdouble) - { - if (inType == kfloat) - { - float *inp = (float *)s; - for (j = 0; j < count; j++) - { - if (isnan(inp[j])) - memset((char *)d + j * gTypeSizes[outType], 0, - gTypeSizes[outType]); - } - } - if (inType == kdouble) - { - double *inp = (double *)s; - for (j = 0; j < count; j++) - { - if (isnan(inp[j])) - memset((char *)d + j * gTypeSizes[outType], 0, - gTypeSizes[outType]); - } - } - } - else if (inType == kfloat || inType == kdouble) - { // outtype and intype is float or double. NaN conversions for float <-> - // double can be any NaN - if (inType == kfloat && outType == kdouble) - { - float *inp = (float *)s; - for (j = 0; j < count; j++) - { - if (isnan(inp[j])) ((double *)d)[j] = NAN; - } - } - if (inType == kdouble && outType == kfloat) - { - double *inp = (double *)s; - for (j = 0; j < count; j++) - { - if (isnan(inp[j])) ((float *)d)[j] = NAN; - } - } - } - - return CL_SUCCESS; -} - -static int DoTest(cl_device_id device, Type outType, Type inType, - SaturationMode sat, RoundingMode round, MTdata d) -{ -#ifdef __APPLE__ - cl_ulong wall_start = mach_absolute_time(); -#endif - - DataInitInfo init_info = { 0, 0, outType, inType, sat, round, NULL }; - WriteInputBufferInfo writeInputBufferInfo; - int vectorSize; - int error = 0; - cl_uint threads = GetThreadCount(); - uint64_t i; - - gTestCount++; - size_t blockCount = - BUFFER_SIZE / std::max(gTypeSizes[inType], gTypeSizes[outType]); - size_t step = blockCount; - uint64_t lastCase = 1ULL << (8 * gTypeSizes[inType]); - - memset(&writeInputBufferInfo, 0, sizeof(writeInputBufferInfo)); - init_info.d = (MTdata *)malloc(threads * sizeof(MTdata)); - if (NULL == init_info.d) - { - vlog_error( - "ERROR: Unable to allocate storage for random number generator!\n"); - return -1; - } - for (i = 0; i < threads; i++) - { - init_info.d[i] = init_genrand(genrand_int32(d)); - if (NULL == init_info.d[i]) - { - vlog_error("ERROR: Unable to allocate storage for random number " - "generator!\n"); - return -1; - } - } - - writeInputBufferInfo.outType = outType; - writeInputBufferInfo.inType = inType; - - for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++) - { - writeInputBufferInfo.calcInfo[vectorSize].program = - MakeProgram(outType, inType, sat, round, vectorSize, - &writeInputBufferInfo.calcInfo[vectorSize].kernel); - if (NULL == writeInputBufferInfo.calcInfo[vectorSize].program) - { - gFailCount++; - return -1; - } - if (NULL == writeInputBufferInfo.calcInfo[vectorSize].kernel) - { - gFailCount++; - vlog_error("\t\tFAILED -- Failed to create kernel.\n"); - return -2; - } - - writeInputBufferInfo.calcInfo[vectorSize].parent = - &writeInputBufferInfo; - writeInputBufferInfo.calcInfo[vectorSize].vectorSize = vectorSize; - writeInputBufferInfo.calcInfo[vectorSize].result = -1; - } - - if (gSkipTesting) goto exit; - - // Patch up rounding mode if default is RTZ - // We leave the part above in default rounding mode so that the right kernel - // is compiled. - if (round == kDefaultRoundingMode && gIsRTZ && (outType == kfloat)) - init_info.round = round = kRoundTowardZero; - - // Figure out how many elements are in a work block - - // we handle 64-bit types a bit differently. - if (8 * gTypeSizes[inType] > 32) lastCase = 0x100000000ULL; - - if (!gWimpyMode && gIsEmbedded) - step = blockCount * EMBEDDED_REDUCTION_FACTOR; - - if (gWimpyMode) step = (size_t)blockCount * (size_t)gWimpyReductionFactor; - vlog("Testing... "); - fflush(stdout); - for (i = 0; i < (uint64_t)lastCase; i += step) - { - - if (0 == (i & ((lastCase >> 3) - 1))) - { - vlog("."); - fflush(stdout); - } - - cl_uint count = (uint32_t)std::min((uint64_t)blockCount, lastCase - i); - writeInputBufferInfo.count = count; - - // Crate a user event to represent the status of the reference value - // computation completion - writeInputBufferInfo.calcReferenceValues = - clCreateUserEvent(gContext, &error); - if (error || NULL == writeInputBufferInfo.calcReferenceValues) - { - vlog_error("ERROR: Unable to create user event. (%d)\n", error); - gFailCount++; - goto exit; - } - - // retain for consumption by MapOutputBufferComplete - for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; - vectorSize++) - { - if ((error = - clRetainEvent(writeInputBufferInfo.calcReferenceValues))) - { - vlog_error("ERROR: Unable to retain user event. (%d)\n", error); - gFailCount++; - goto exit; - } - } - - // Crate a user event to represent when the callbacks are done verifying - // correctness - writeInputBufferInfo.doneBarrier = clCreateUserEvent(gContext, &error); - if (error || NULL == writeInputBufferInfo.calcReferenceValues) - { - vlog_error("ERROR: Unable to create user event for barrier. (%d)\n", - error); - gFailCount++; - goto exit; - } - - // retain for use by the callback that calls this - if ((error = clRetainEvent(writeInputBufferInfo.doneBarrier))) - { - vlog_error("ERROR: Unable to retain user event doneBarrier. (%d)\n", - error); - gFailCount++; - goto exit; - } - - // Call this in a multithreaded manner - // gInitFunctions[ inType ]( gIn, sat, round, outType, i, count, d - // ); - cl_uint chunks = RoundUpToNextPowerOfTwo(threads) * 2; - init_info.start = i; - init_info.size = count / chunks; - if (init_info.size < 16384) - { - chunks = RoundUpToNextPowerOfTwo(threads); - init_info.size = count / chunks; - if (init_info.size < 16384) - { - init_info.size = count; - chunks = 1; - } - } - ThreadPool_Do(InitData, chunks, &init_info); - - // Copy the results to the device - if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_TRUE, 0, - count * gTypeSizes[inType], gIn, 0, - NULL, NULL))) - { - vlog_error("ERROR: clEnqueueWriteBuffer failed. (%d)\n", error); - gFailCount++; - goto exit; - } - - // Call completion callback for the write, which will enqueue the rest - // of the work. - WriteInputBufferComplete((void *)&writeInputBufferInfo); - - // Make sure the work is actually running, so we don't deadlock - if ((error = clFlush(gQueue))) - { - vlog_error("clFlush failed with error %d\n", error); - gFailCount++; - goto exit; - } - - ThreadPool_Do(PrepareReference, chunks, &init_info); - - // signal we are done calculating the reference results - if ((error = clSetUserEventStatus( - writeInputBufferInfo.calcReferenceValues, CL_COMPLETE))) - { - vlog_error( - "Error: Failed to set user event status to CL_COMPLETE: %d\n", - error); - gFailCount++; - goto exit; - } - - // Wait for the event callbacks to finish verifying correctness. - if ((error = clWaitForEvents( - 1, (cl_event *)&writeInputBufferInfo.doneBarrier))) - { - vlog_error("Error: Failed to wait for barrier: %d\n", error); - gFailCount++; - goto exit; - } - - if ((error = clReleaseEvent(writeInputBufferInfo.calcReferenceValues))) - { - vlog_error("Error: Failed to release calcReferenceValues: %d\n", - error); - gFailCount++; - goto exit; - } - - if ((error = clReleaseEvent(writeInputBufferInfo.doneBarrier))) - { - vlog_error("Error: Failed to release done barrier: %d\n", error); - gFailCount++; - goto exit; - } - - - for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; - vectorSize++) - { - if ((error = writeInputBufferInfo.calcInfo[vectorSize].result)) - { - switch (inType) - { - case kuchar: - case kchar: - vlog("Input value: 0x%2.2x ", - ((unsigned char *)gIn)[error - 1]); - break; - case kushort: - case kshort: - vlog("Input value: 0x%4.4x ", - ((unsigned short *)gIn)[error - 1]); - break; - case kuint: - case kint: - vlog("Input value: 0x%8.8x ", - ((unsigned int *)gIn)[error - 1]); - break; - case kfloat: - vlog("Input value: %a ", ((float *)gIn)[error - 1]); - break; - break; - case kulong: - case klong: - vlog("Input value: 0x%16.16llx ", - ((unsigned long long *)gIn)[error - 1]); - break; - case kdouble: - vlog("Input value: %a ", ((double *)gIn)[error - 1]); - break; - default: - vlog_error("Internal error at %s: %d\n", __FILE__, - __LINE__); - abort(); - break; - } - - // tell the user which conversion it was. - if (0 == vectorSize) - vlog(" (implicit scalar conversion from %s to %s)\n", - gTypeNames[inType], gTypeNames[outType]); - else - vlog(" (convert_%s%s%s%s( %s%s ))\n", gTypeNames[outType], - sizeNames[vectorSize], gSaturationNames[sat], - gRoundingModeNames[round], gTypeNames[inType], - sizeNames[vectorSize]); - - gFailCount++; - goto exit; - } - } - } - - log_info("done.\n"); - - if (gTimeResults) - { - // Kick off tests for the various vector lengths - for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; - vectorSize++) - { - size_t workItemCount = blockCount / vectorSizes[vectorSize]; - if (vectorSizes[vectorSize] * gTypeSizes[outType] < 4) - workItemCount /= - 4 / (vectorSizes[vectorSize] * gTypeSizes[outType]); - - double sum = 0.0; - double bestTime = INFINITY; - cl_uint k; - for (k = 0; k < PERF_LOOP_COUNT; k++) - { - uint64_t startTime = GetTime(); - if ((error = RunKernel( - writeInputBufferInfo.calcInfo[vectorSize].kernel, - gInBuffer, gOutBuffers[vectorSize], workItemCount))) - { - gFailCount++; - goto exit; - } - - // Make sure OpenCL is done - if ((error = clFinish(gQueue))) - { - vlog_error("Error %d at clFinish\n", error); - goto exit; - } - - uint64_t endTime = GetTime(); - double time = SubtractTime(endTime, startTime); - sum += time; - if (time < bestTime) bestTime = time; - } - - if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT; - double clocksPerOp = bestTime * (double)gDeviceFrequency - * gComputeDevices * gSimdSize * 1e6 - / (workItemCount * vectorSizes[vectorSize]); - if (0 == vectorSize) - vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", - "implicit convert %s -> %s", gTypeNames[inType], - gTypeNames[outType]); - else - vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", - "convert_%s%s%s%s( %s%s )", gTypeNames[outType], - sizeNames[vectorSize], gSaturationNames[sat], - gRoundingModeNames[round], gTypeNames[inType], - sizeNames[vectorSize]); - } - } - - if (gWimpyMode) - vlog("\tWimp pass"); - else - vlog("\tpassed"); - -#ifdef __APPLE__ - // record the run time - vlog("\t(%f s)", 1e-9 * (mach_absolute_time() - wall_start)); -#endif - vlog("\n\n"); - fflush(stdout); - - -exit: - // clean up - for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++) - { - clReleaseProgram(writeInputBufferInfo.calcInfo[vectorSize].program); - clReleaseKernel(writeInputBufferInfo.calcInfo[vectorSize].kernel); - } - - if (init_info.d) - { - for (i = 0; i < threads; i++) free_mtdata(init_info.d[i]); - free(init_info.d); - } - - return error; -} - -void MapResultValuesComplete(void *data); - -// Note: not called reentrantly -void WriteInputBufferComplete(void *data) -{ - cl_int status; - WriteInputBufferInfo *info = (WriteInputBufferInfo *)data; - cl_uint count = info->count; - int vectorSize; - - info->barrierCount = gMaxVectorSize - gMinVectorSize; - - // now that we know that the write buffer is complete, enqueue callbacks to - // wait for the main thread to finish calculating the reference results. - for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++) - { - size_t workItemCount = - (count + vectorSizes[vectorSize] - 1) / (vectorSizes[vectorSize]); - - if ((status = RunKernel(info->calcInfo[vectorSize].kernel, gInBuffer, - gOutBuffers[vectorSize], workItemCount))) - { - gFailCount++; - return; - } - - info->calcInfo[vectorSize].p = clEnqueueMapBuffer( - gQueue, gOutBuffers[vectorSize], CL_TRUE, - CL_MAP_READ | CL_MAP_WRITE, 0, count * gTypeSizes[info->outType], 0, - NULL, NULL, &status); - { - if (status) - { - vlog_error("ERROR: WriteInputBufferComplete calback failed " - "with status: %d\n", - status); - gFailCount++; - return; - } - } - } - - for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++) - { - MapResultValuesComplete(info->calcInfo + vectorSize); - } - - // Make sure the work starts moving -- otherwise we may deadlock - if ((status = clFlush(gQueue))) - { - vlog_error( - "ERROR: WriteInputBufferComplete calback failed with status: %d\n", - status); - gFailCount++; - return; - } - - // e was already released by the main thread. It should be destroyed - // automatically soon after we exit. -} - -void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status, - void *data); - -// Note: May be called reentrantly -void MapResultValuesComplete(void *data) -{ - cl_int status; - CalcReferenceValuesInfo *info = (CalcReferenceValuesInfo *)data; - cl_event calcReferenceValues = info->parent->calcReferenceValues; - - // we know that the map is done, wait for the main thread to finish - // calculating the reference values - if ((status = clSetEventCallback(calcReferenceValues, CL_COMPLETE, - CalcReferenceValuesComplete, data))) - { - vlog_error("ERROR: clSetEventCallback failed in " - "MapResultValuesComplete with status: %d\n", - status); - gFailCount++; // not thread safe -- being lazy here - } - - // this thread no longer needs its reference to info->calcReferenceValues, - // so release it - if ((status = clReleaseEvent(calcReferenceValues))) - { - vlog_error("ERROR: clReleaseEvent(info->calcReferenceValues) failed " - "with status: %d\n", - status); - gFailCount++; // not thread safe -- being lazy here - } - - // no need to flush since we didn't enqueue anything - - // e was already released by WriteInputBufferComplete. It should be - // destroyed automatically soon after we exit. -} - - -void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status, - void *data) -{ - CalcReferenceValuesInfo *info = (CalcReferenceValuesInfo *)data; - cl_uint vectorSize = info->vectorSize; - cl_uint count = info->parent->count; - Type outType = - info->parent->outType; // the data type of the conversion result - Type inType = info->parent->inType; // the data type of the conversion input - size_t j; - cl_int error; - cl_event doneBarrier = info->parent->doneBarrier; - - // report spurious error condition - if (CL_SUCCESS != status) - { - vlog_error("ERROR: CalcReferenceValuesComplete did not succeed! (%d)\n", - status); - gFailCount++; // lazy about thread safety here - return; - } - - // Now we know that both results have been mapped back from the device, and - // the main thread is done calculating the reference results. It is now time - // to check the results. - - // verify results - void *mapped = info->p; - - // Patch up NaNs conversions to integer to zero -- these can be converted to - // any integer - if (outType != kfloat && outType != kdouble) - { - if (inType == kfloat) - { - float *inp = (float *)gIn; - for (j = 0; j < count; j++) - { - if (isnan(inp[j])) - memset((char *)mapped + j * gTypeSizes[outType], 0, - gTypeSizes[outType]); - } - } - if (inType == kdouble) - { - double *inp = (double *)gIn; - for (j = 0; j < count; j++) - { - if (isnan(inp[j])) - memset((char *)mapped + j * gTypeSizes[outType], 0, - gTypeSizes[outType]); - } - } - } - else if (inType == kfloat || inType == kdouble) - { // outtype and intype is float or double. NaN conversions for float <-> - // double can be any NaN - if (inType == kfloat && outType == kdouble) - { - float *inp = (float *)gIn; - double *outp = (double *)mapped; - for (j = 0; j < count; j++) - { - if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN; - } - } - if (inType == kdouble && outType == kfloat) - { - double *inp = (double *)gIn; - float *outp = (float *)mapped; - for (j = 0; j < count; j++) - { - if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN; - } - } - } - - if (memcmp(mapped, gRef, count * gTypeSizes[outType])) - info->result = gCheckResults[outType](mapped, gRef, gAllowZ, count, - vectorSizes[vectorSize]); - else - info->result = 0; - - // Fill the output buffer with junk and release it - { - cl_uint pattern = 0xffffdead; - memset_pattern4(mapped, &pattern, count * gTypeSizes[outType]); - if ((error = clEnqueueUnmapMemObject(gQueue, gOutBuffers[vectorSize], - mapped, 0, NULL, NULL))) - { - vlog_error("ERROR: clEnqueueUnmapMemObject failed in " - "CalcReferenceValuesComplete (%d)\n", - error); - gFailCount++; - } - } - - if (1 == ThreadPool_AtomicAdd(&info->parent->barrierCount, -1)) - { - if ((status = clSetUserEventStatus(doneBarrier, CL_COMPLETE))) - { - vlog_error("ERROR: clSetUserEventStatus failed in " - "CalcReferenceValuesComplete (err: %d). We're probably " - "going to deadlock.\n", - status); - gFailCount++; - return; - } - - if ((status = clReleaseEvent(doneBarrier))) - { - vlog_error("ERROR: clReleaseEvent failed in " - "CalcReferenceValuesComplete (err: %d).\n", - status); - gFailCount++; - return; - } - } - // e was already released by WriteInputBufferComplete. It should be - // destroyed automatically soon after all the calls to - // CalcReferenceValuesComplete exit. -} - -static cl_program MakeProgram(Type outType, Type inType, SaturationMode sat, - RoundingMode round, int vectorSize, - cl_kernel *outKernel) -{ - cl_program program; - char testName[256]; - int error = 0; - - std::ostringstream source; - if (outType == kdouble || inType == kdouble) - source << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; - - // Create the program. This is a bit complicated because we are trying to - // avoid byte and short stores. - if (0 == vectorSize) - { - // Create the type names. - char inName[32]; - char outName[32]; - strncpy(inName, gTypeNames[inType], sizeof(inName)); - strncpy(outName, gTypeNames[outType], sizeof(outName)); - sprintf(testName, "test_implicit_%s_%s", outName, inName); - - source << "__kernel void " << testName << "( __global " << inName - << " *src, __global " << outName << " *dest )\n"; - source << "{\n"; - source << " size_t i = get_global_id(0);\n"; - source << " dest[i] = src[i];\n"; - source << "}\n"; - - vlog("Building implicit %s -> %s conversion test\n", gTypeNames[inType], - gTypeNames[outType]); - fflush(stdout); - } - else - { - int vectorSizetmp = vectorSizes[vectorSize]; - - // Create the type names. - char convertString[128]; - char inName[32]; - char outName[32]; - switch (vectorSizetmp) - { - case 1: - strncpy(inName, gTypeNames[inType], sizeof(inName)); - strncpy(outName, gTypeNames[outType], sizeof(outName)); - snprintf(convertString, sizeof(convertString), "convert_%s%s%s", - outName, gSaturationNames[sat], - gRoundingModeNames[round]); - snprintf(testName, 256, "test_%s_%s", convertString, inName); - vlog("Building %s( %s ) test\n", convertString, inName); - break; - case 3: - strncpy(inName, gTypeNames[inType], sizeof(inName)); - strncpy(outName, gTypeNames[outType], sizeof(outName)); - snprintf(convertString, sizeof(convertString), - "convert_%s3%s%s", outName, gSaturationNames[sat], - gRoundingModeNames[round]); - snprintf(testName, 256, "test_%s_%s3", convertString, inName); - vlog("Building %s( %s3 ) test\n", convertString, inName); - break; - default: - snprintf(inName, sizeof(inName), "%s%d", gTypeNames[inType], - vectorSizetmp); - snprintf(outName, sizeof(outName), "%s%d", gTypeNames[outType], - vectorSizetmp); - snprintf(convertString, sizeof(convertString), "convert_%s%s%s", - outName, gSaturationNames[sat], - gRoundingModeNames[round]); - snprintf(testName, 256, "test_%s_%s", convertString, inName); - vlog("Building %s( %s ) test\n", convertString, inName); - break; - } - fflush(stdout); - - if (vectorSizetmp == 3) - { - source << "__kernel void " << testName << "( __global " << inName - << " *src, __global " << outName << " *dest )\n"; - source << "{\n"; - source << " size_t i = get_global_id(0);\n"; - source << " if( i + 1 < get_global_size(0))\n"; - source << " vstore3( " << convertString - << "( vload3( i, src)), i, dest );\n"; - source << " else\n"; - source << " {\n"; - source << " " << inName << "3 in;\n"; - source << " " << outName << "3 out;\n"; - source << " if( 0 == (i & 1) )\n"; - source << " in.y = src[3*i+1];\n"; - source << " in.x = src[3*i];\n"; - source << " out = " << convertString << "( in ); \n"; - source << " dest[3*i] = out.x;\n"; - source << " if( 0 == (i & 1) )\n"; - source << " dest[3*i+1] = out.y;\n"; - source << " }\n"; - source << "}\n"; - } - else - { - source << "__kernel void " << testName << "( __global " << inName - << " *src, __global " << outName << " *dest )\n"; - source << "{\n"; - source << " size_t i = get_global_id(0);\n"; - source << " dest[i] = " << convertString << "( src[i] );\n"; - source << "}\n"; - } - } - *outKernel = NULL; - - const char *flags = NULL; - if (gForceFTZ) flags = "-cl-denorms-are-zero"; - - // build it - std::string sourceString = source.str(); - const char *programSource = sourceString.c_str(); - error = create_single_kernel_helper(gContext, &program, outKernel, 1, - &programSource, testName, flags); - if (error) - { - vlog_error("Failed to build kernel/program (err = %d).\n", error); - clReleaseProgram(program); - return NULL; - } - - return program; -} From 4dbfba8865674906c852d590c96ec427c91b1a2a Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Wed, 24 May 2023 09:27:08 +0200 Subject: [PATCH 2/8] Added missing virtual descructor --- test_conformance/conversions/basic_test_conversions.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test_conformance/conversions/basic_test_conversions.h b/test_conformance/conversions/basic_test_conversions.h index c1d284ec2..9064a7afb 100644 --- a/test_conformance/conversions/basic_test_conversions.h +++ b/test_conformance/conversions/basic_test_conversions.h @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2023 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -133,6 +133,7 @@ void UnFlushToZero(void *); struct CalcRefValsBase { + virtual ~CalcRefValsBase() = default; virtual int check_result(void *, uint32_t, int) { return 0; } // pointer back to the parent WriteInputBufferInfo struct From 76b9177c93b994f6c75cdf421f853d8c459a4636 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Fri, 9 Jun 2023 09:24:36 +0200 Subject: [PATCH 3/8] Added corrections due to code review --- .../conversions/basic_test_conversions.cpp | 12 ----- .../conversions/basic_test_conversions.h | 50 ++----------------- .../conversions/conversions_data_info.h | 5 +- .../conversions/test_conversions.cpp | 7 +-- 4 files changed, 7 insertions(+), 67 deletions(-) diff --git a/test_conformance/conversions/basic_test_conversions.cpp b/test_conformance/conversions/basic_test_conversions.cpp index a01f60015..1fff7cb49 100644 --- a/test_conformance/conversions/basic_test_conversions.cpp +++ b/test_conformance/conversions/basic_test_conversions.cpp @@ -15,12 +15,7 @@ // #include "harness/testHarness.h" #include "harness/compat.h" -#include "harness/rounding_mode.h" #include "harness/ThreadPool.h" -#include "harness/testHarness.h" -#include "harness/kernelHelpers.h" -#include "harness/mt19937.h" -#include "harness/kernelHelpers.h" #if defined(__APPLE__) #include @@ -43,8 +38,6 @@ #include #include -#include -#include #if !defined(_WIN32) #include #include @@ -58,11 +51,6 @@ #include "basic_test_conversions.h" -#if (defined(_WIN32) && defined(_MSC_VER)) -// need for _controlfp_s and rouinding modes in RoundingMode -#include "harness/testHarness.h" -#endif - #if defined(_WIN32) #include #include diff --git a/test_conformance/conversions/basic_test_conversions.h b/test_conformance/conversions/basic_test_conversions.h index 9064a7afb..c0ae8817b 100644 --- a/test_conformance/conversions/basic_test_conversions.h +++ b/test_conformance/conversions/basic_test_conversions.h @@ -30,7 +30,6 @@ #include #endif - #include "harness/mt19937.h" #include "harness/testHarness.h" #include "harness/typeWrappers.h" @@ -41,8 +40,6 @@ #include "conversions_data_info.h" -// typedef void (*Convert)( void *dest, void *src, size_t ); - #define kVectorSizeCount 6 #define kMaxVectorSize 16 #define kPageSize 4096 @@ -51,11 +48,6 @@ #define EMBEDDED_REDUCTION_FACTOR 16 #define PERF_LOOP_COUNT 100 - -// extern Convert gConversions[kTypeCount][kTypeCount]; // [dest -// format][source format] extern Convert -// gSaturatedConversions[kTypeCount][kTypeCount]; // [dest format][source -// format] extern const char *gTypeNames[ kTypeCount ]; extern const char *gRoundingModeNames[ kRoundingModeCount ]; // { "", "_rte", "_rtp", "_rtn", "_rtz" } extern const char *gSaturationNames[ kSaturationModeCount ]; // { "", "_sat" } @@ -109,7 +101,6 @@ extern int vectorSizes[]; extern size_t gComputeDevices; extern uint32_t gDeviceFrequency; - namespace conv_test { cl_program MakeProgram(Type outType, Type inType, SaturationMode sat, @@ -130,7 +121,6 @@ void *FlushToZero(void); void UnFlushToZero(void *); } - struct CalcRefValsBase { virtual ~CalcRefValsBase() = default; @@ -145,14 +135,12 @@ struct CalcRefValsBase cl_int result; }; - template struct CalcRefValsPat : CalcRefValsBase { int check_result(void *, uint32_t, int) override; }; - struct WriteInputBufferInfo { WriteInputBufferInfo() @@ -173,14 +161,12 @@ struct WriteInputBufferInfo std::vector> calcInfo; }; - // Must be aligned with Type enums! using TypeIter = std::tuple; constexpr bool isTypeFp[] = { 0, 0, 0, 0, 0, 0, 1, 1, 0, 0 }; - // Helper test fixture for constructing OpenCL objects used in testing // a variety of simple command-buffer enqueue scenarios. struct ConversionsTest @@ -188,10 +174,10 @@ struct ConversionsTest ConversionsTest(cl_device_id device, cl_context context, cl_command_queue queue); - virtual cl_int SetUp(int elements); + cl_int SetUp(int elements); // Test body returning an OpenCL error code - virtual cl_int Run(); + cl_int Run(); template int DoTest(Type outType, Type inType, SaturationMode sat, @@ -210,7 +196,6 @@ struct ConversionsTest TypeIter typeIterator; }; - struct CustomConversionsTest : ConversionsTest { CustomConversionsTest(cl_device_id device, cl_context context, @@ -218,10 +203,9 @@ struct CustomConversionsTest : ConversionsTest : ConversionsTest(device, context, queue) {} - cl_int Run() override; + cl_int Run(); }; - template int MakeAndRunTest(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) @@ -234,7 +218,6 @@ int MakeAndRunTest(cl_device_id device, cl_context context, return test_fixture.Run(); } - struct TestType { template bool testType(Type in) @@ -256,7 +239,6 @@ struct TestType } }; - // Helper structures to iterate over all tuple attributes of different types struct IterOverTypes : public TestType { @@ -268,7 +250,6 @@ struct IterOverTypes : public TestType void Run() { for_each_out_elem(typeIter); } protected: - //////////////////////////////////////////////////////////////////////////////////////// template void iterate_out_type(const OutType &t) @@ -278,8 +259,6 @@ struct IterOverTypes : public TestType inType = (Type)0; } - //////////////////////////////////////////////////////////////////////////////////////// - template void iterate_in_type(const InType &t) @@ -294,16 +273,12 @@ struct IterOverTypes : public TestType inType = (Type)(inType + 1); } - //////////////////////////////////////////////////////////////////////////////////////// - template inline typename std::enable_if::type for_each_out_elem( const std::tuple &) // Unused arguments are given no names. {} - //////////////////////////////////////////////////////////////////////////////////////// - template inline typename std::enable_if < Out::type for_each_out_elem(const std::tuple &t) @@ -312,8 +287,6 @@ struct IterOverTypes : public TestType for_each_out_elem(t); } - //////////////////////////////////////////////////////////////////////////////////////// - template inline typename std::enable_if::type @@ -321,8 +294,6 @@ struct IterOverTypes : public TestType const std::tuple &) // Unused arguments are given no names. {} - //////////////////////////////////////////////////////////////////////////////////////// - template inline typename std::enable_if < In::type @@ -332,8 +303,6 @@ struct IterOverTypes : public TestType for_each_in_elem(t); } - //////////////////////////////////////////////////////////////////////////////////////// - protected: Type inType; Type outType; @@ -355,7 +324,6 @@ struct IterOverSelectedTypes : public TestType void Run() { for_each_out_elem(typeIter); } protected: - //////////////////////////////////////////////////////////////////////////////////////// template void iterate_out_type(const OutType &t) @@ -363,8 +331,6 @@ struct IterOverSelectedTypes : public TestType for_each_in_elem<0, Out, OutType>(typeIter); } - //////////////////////////////////////////////////////////////////////////////////////// - template void iterate_in_type(const InType &t) @@ -378,15 +344,11 @@ struct IterOverSelectedTypes : public TestType } } - //////////////////////////////////////////////////////////////////////////////////////// - template inline typename std::enable_if::type for_each_out_elem(const std::tuple &) {} - //////////////////////////////////////////////////////////////////////////////////////// - template inline typename std::enable_if < Out::type for_each_out_elem(const std::tuple &t) @@ -395,16 +357,12 @@ struct IterOverSelectedTypes : public TestType for_each_out_elem(t); } - //////////////////////////////////////////////////////////////////////////////////////// - template inline typename std::enable_if::type for_each_in_elem(const std::tuple &) {} - //////////////////////////////////////////////////////////////////////////////////////// - template inline typename std::enable_if < In::type @@ -414,8 +372,6 @@ struct IterOverSelectedTypes : public TestType for_each_in_elem(t); } - //////////////////////////////////////////////////////////////////////////////////////// - protected: Type inType; Type outType; diff --git a/test_conformance/conversions/conversions_data_info.h b/test_conformance/conversions/conversions_data_info.h index a4e9c9689..81e01e46a 100644 --- a/test_conformance/conversions/conversions_data_info.h +++ b/test_conformance/conversions/conversions_data_info.h @@ -71,7 +71,7 @@ struct DataInitInfo struct DataInitBase : public DataInitInfo { - DataInitBase(const DataInitInfo &agg): DataInitInfo(agg) {} + explicit DataInitBase(const DataInitInfo &agg): DataInitInfo(agg) {} virtual void conv_array(void *out, void *in, size_t n) {} virtual void conv_array_sat(void *out, void *in, size_t n) {} virtual void init(const cl_uint &, const cl_uint &) {} @@ -81,8 +81,7 @@ struct DataInitBase : public DataInitInfo template struct DataInfoSpec : public DataInitBase { - - DataInfoSpec(const DataInitInfo &agg); + explicit DataInfoSpec(const DataInitInfo &agg); // helpers float round_to_int(float f); diff --git a/test_conformance/conversions/test_conversions.cpp b/test_conformance/conversions/test_conversions.cpp index 08fcdb473..a8be2098d 100644 --- a/test_conformance/conversions/test_conversions.cpp +++ b/test_conformance/conversions/test_conversions.cpp @@ -13,7 +13,6 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "harness/rounding_mode.h" #include "harness/ThreadPool.h" #include "harness/testHarness.h" #include "harness/parseParameters.h" @@ -54,10 +53,8 @@ #include "Sleep.h" #include "basic_test_conversions.h" -#include -#include - -#include "harness/mt19937.h" +#include +#include #if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) #include "fplib.h" From 70838ce82bb70ae3ae8f2f0e889612493661a6d2 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Fri, 9 Jun 2023 09:39:18 +0200 Subject: [PATCH 4/8] More separators removed --- .../conversions/conversions_data_info.h | 30 +------------------ 1 file changed, 1 insertion(+), 29 deletions(-) diff --git a/test_conformance/conversions/conversions_data_info.h b/test_conformance/conversions/conversions_data_info.h index 81e01e46a..d1f42ce36 100644 --- a/test_conformance/conversions/conversions_data_info.h +++ b/test_conformance/conversions/conversions_data_info.h @@ -30,7 +30,6 @@ extern roundingMode qcom_rm; #include "harness/mt19937.h" #include "harness/rounding_mode.h" -#include "harness/typeWrappers.h" #include @@ -51,7 +50,6 @@ typedef enum kSaturationModeCount } SaturationMode; - struct DataInitInfo { cl_ulong start; @@ -68,7 +66,6 @@ struct DataInitInfo static std::vector specialValuesDouble; }; - struct DataInitBase : public DataInitInfo { explicit DataInitBase(const DataInitInfo &agg): DataInitInfo(agg) {} @@ -77,7 +74,6 @@ struct DataInitBase : public DataInitInfo virtual void init(const cl_uint &, const cl_uint &) {} }; - template struct DataInfoSpec : public DataInitBase { @@ -101,30 +97,26 @@ struct DataInfoSpec : public DataInitBase std::vector mdv; - //////////////////////////////////////////////////////////////////////////// void conv_array(void *out, void *in, size_t n) override { for (size_t i = 0; i < n; i++) conv(&((OutType *)out)[i], &((InType *)in)[i]); } - //////////////////////////////////////////////////////////////////////////// void conv_array_sat(void *out, void *in, size_t n) override { for (size_t i = 0; i < n; i++) conv_sat(&((OutType *)out)[i], &((InType *)in)[i]); } - //////////////////////////////////////////////////////////////////////////// void init(const cl_uint &, const cl_uint &) override; InType clamp(const InType &); - //////////////////////////////////////////////////////////////////////////// inline float fclamp(float lo, float v, float hi) { v = v < lo ? lo : v; return v < hi ? v : hi; } - //////////////////////////////////////////////////////////////////////////// + inline double dclamp(double lo, double v, double hi) { v = v < lo ? lo : v; @@ -132,8 +124,6 @@ struct DataInfoSpec : public DataInitBase } }; -//////////////////////////////////////////////////////////////////////////////////////// - template DataInfoSpec::DataInfoSpec( const DataInitInfo &agg) @@ -262,8 +252,6 @@ DataInfoSpec::DataInfoSpec( // clang-format on } -//////////////////////////////////////////////////////////////////////////////////////// - template float DataInfoSpec::round_to_int(float f) { @@ -293,8 +281,6 @@ float DataInfoSpec::round_to_int(float f) return f; } -//////////////////////////////////////////////////////////////////////////////////////// - template long long DataInfoSpec::round_to_int_and_clamp(double f) @@ -328,8 +314,6 @@ DataInfoSpec::round_to_int_and_clamp(double f) return (long long)f; } -//////////////////////////////////////////////////////////////////////////////////////// - template OutType DataInfoSpec::absolute(const OutType &x) { @@ -348,8 +332,6 @@ OutType DataInfoSpec::absolute(const OutType &x) return u.f; } -//////////////////////////////////////////////////////////////////////////////////////// - template void DataInfoSpec::conv(OutType *out, InType *in) { @@ -495,13 +477,9 @@ void DataInfoSpec::conv(OutType *out, InType *in) } } -//////////////////////////////////////////////////////////////////////////////////////// - #define CLAMP(_lo, _x, _hi) \ ((_x) < (_lo) ? (_lo) : ((_x) > (_hi) ? (_hi) : (_x))) -//////////////////////////////////////////////////////////////////////////////////////// - template void DataInfoSpec::conv_sat(OutType *out, InType *in) @@ -621,8 +599,6 @@ void DataInfoSpec::conv_sat(OutType *out, } } -//////////////////////////////////////////////////////////////////////////////////////// - template void DataInfoSpec::init(const cl_uint &job_id, const cl_uint &thread_id) @@ -785,8 +761,6 @@ void DataInfoSpec::init(const cl_uint &job_id, } } -//////////////////////////////////////////////////////////////////////////////////////// - template InType DataInfoSpec::clamp(const InType &in) { @@ -806,6 +780,4 @@ InType DataInfoSpec::clamp(const InType &in) return in; } -//////////////////////////////////////////////////////////////////////////////////////// - #endif /* CONVERSIONS_DATA_INFO_H */ From 122a08b26f92a5be06a60951239e83514abef173 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Fri, 9 Jun 2023 09:58:21 +0200 Subject: [PATCH 5/8] Fixed clang format --- test_conformance/conversions/basic_test_conversions.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/test_conformance/conversions/basic_test_conversions.h b/test_conformance/conversions/basic_test_conversions.h index c0ae8817b..23f959532 100644 --- a/test_conformance/conversions/basic_test_conversions.h +++ b/test_conformance/conversions/basic_test_conversions.h @@ -250,7 +250,6 @@ struct IterOverTypes : public TestType void Run() { for_each_out_elem(typeIter); } protected: - template void iterate_out_type(const OutType &t) { @@ -324,7 +323,6 @@ struct IterOverSelectedTypes : public TestType void Run() { for_each_out_elem(typeIter); } protected: - template void iterate_out_type(const OutType &t) { From 616ef769ad558a2679111d3f38c49edc51aec562 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Thu, 15 Jun 2023 10:13:04 +0200 Subject: [PATCH 6/8] Added multiple corrections related to code review --- .../conversions/basic_test_conversions.cpp | 24 ++++------- .../conversions/basic_test_conversions.h | 20 ++++----- .../conversions/conversions_data_info.h | 42 +++++++++---------- 3 files changed, 37 insertions(+), 49 deletions(-) diff --git a/test_conformance/conversions/basic_test_conversions.cpp b/test_conformance/conversions/basic_test_conversions.cpp index 1fff7cb49..4571be3c3 100644 --- a/test_conformance/conversions/basic_test_conversions.cpp +++ b/test_conformance/conversions/basic_test_conversions.cpp @@ -283,10 +283,9 @@ static inline void Force64BitFPUPrecision(void) } -template -int CalcRefValsPat::check_result(void *test, - uint32_t count, - int vectorSize) +template +int CalcRefValsPat::check_result(void *test, uint32_t count, + int vectorSize) { const cl_uchar *a = (const cl_uchar *)gAllowZ; @@ -449,7 +448,7 @@ cl_int ConversionsTest::SetUp(int elements) } -template +template void ConversionsTest::TestTypesConversion(const Type &inType, const Type &outType, int &testNumber) { @@ -517,8 +516,7 @@ void ConversionsTest::TestTypesConversion(const Type &inType, gMinVectorSize = 0; } - if ((error = DoTest(outType, inType, - sat, round))) + if ((error = DoTest(outType, inType, sat, round))) { vlog_error("\t *** %d) convert_%sn%s%s( %sn ) " "FAILED ** \n", @@ -531,7 +529,7 @@ void ConversionsTest::TestTypesConversion(const Type &inType, } -template +template int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat, RoundingMode round) { @@ -539,15 +537,11 @@ int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat, cl_ulong wall_start = mach_absolute_time(); #endif -#if 0 uint64_t lastCase = 1ULL << (8 * gTypeSizes[inType]); -#else cl_uint threads = GetThreadCount(); - uint64_t lastCase = 1000000ULL; -#endif DataInitInfo info = { 0, 0, outType, inType, sat, round, threads }; - DataInfoSpec init_info(info); + DataInfoSpec init_info(info); WriteInputBufferInfo writeInputBufferInfo; int vectorSize; int error = 0; @@ -570,7 +564,7 @@ int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat, for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++) { writeInputBufferInfo.calcInfo[vectorSize].reset( - new CalcRefValsPat()); + new CalcRefValsPat()); writeInputBufferInfo.calcInfo[vectorSize]->program = conv_test::MakeProgram( outType, inType, sat, round, vectorSize, @@ -604,11 +598,9 @@ int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat, init_info.round = round = kRoundTowardZero; } -#if 0 // Figure out how many elements are in a work block // we handle 64-bit types a bit differently. if (8 * gTypeSizes[inType] > 32) lastCase = 0x100000000ULL; -#endif if (!gWimpyMode && gIsEmbedded) step = blockCount * EMBEDDED_REDUCTION_FACTOR; diff --git a/test_conformance/conversions/basic_test_conversions.h b/test_conformance/conversions/basic_test_conversions.h index 23f959532..cf0e2c66a 100644 --- a/test_conformance/conversions/basic_test_conversions.h +++ b/test_conformance/conversions/basic_test_conversions.h @@ -135,7 +135,7 @@ struct CalcRefValsBase cl_int result; }; -template +template struct CalcRefValsPat : CalcRefValsBase { int check_result(void *, uint32_t, int) override; @@ -165,12 +165,12 @@ struct WriteInputBufferInfo using TypeIter = std::tuple; -constexpr bool isTypeFp[] = { 0, 0, 0, 0, 0, 0, 1, 1, 0, 0 }; - // Helper test fixture for constructing OpenCL objects used in testing // a variety of simple command-buffer enqueue scenarios. struct ConversionsTest { + virtual ~ConversionsTest() = default; + ConversionsTest(cl_device_id device, cl_context context, cl_command_queue queue); @@ -179,11 +179,11 @@ struct ConversionsTest // Test body returning an OpenCL error code cl_int Run(); - template + template int DoTest(Type outType, Type inType, SaturationMode sat, RoundingMode round); - template + template void TestTypesConversion(const Type &inType, const Type &outType, int &tn); protected: @@ -210,7 +210,7 @@ template int MakeAndRunTest(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) { - auto test_fixture = T(device, context, queue); + T test_fixture(device, context, queue); cl_int error = test_fixture.SetUp(num_elements); test_error_ret(error, "Error in test initialization", TEST_FAIL); @@ -267,8 +267,7 @@ struct IterOverTypes : public TestType if (!testType(outType)) vlog_error("Unexpected data type!\n"); // run the conversions - test.TestTypesConversion( - inType, outType, testNumber); + test.TestTypesConversion(inType, outType, testNumber); inType = (Type)(inType + 1); } @@ -336,9 +335,8 @@ struct IterOverSelectedTypes : public TestType if (testType(inType) && testType(outType)) { // run the conversions - test.TestTypesConversion(inType, outType, - testNumber); + test.TestTypesConversion(inType, outType, + testNumber); } } diff --git a/test_conformance/conversions/conversions_data_info.h b/test_conformance/conversions/conversions_data_info.h index d1f42ce36..b02773b1f 100644 --- a/test_conformance/conversions/conversions_data_info.h +++ b/test_conformance/conversions/conversions_data_info.h @@ -60,7 +60,6 @@ struct DataInitInfo RoundingMode round; cl_uint threads; - static std::vector specialValuesUInt; static std::vector specialValuesFloat; static std::vector specialValuesDouble; @@ -68,13 +67,15 @@ struct DataInitInfo struct DataInitBase : public DataInitInfo { + virtual ~DataInitBase() = default; + explicit DataInitBase(const DataInitInfo &agg): DataInitInfo(agg) {} virtual void conv_array(void *out, void *in, size_t n) {} virtual void conv_array_sat(void *out, void *in, size_t n) {} virtual void init(const cl_uint &, const cl_uint &) {} }; -template +template struct DataInfoSpec : public DataInitBase { explicit DataInfoSpec(const DataInitInfo &agg); @@ -124,9 +125,8 @@ struct DataInfoSpec : public DataInitBase } }; -template -DataInfoSpec::DataInfoSpec( - const DataInitInfo &agg) +template +DataInfoSpec::DataInfoSpec(const DataInitInfo &agg) : DataInitBase(agg), mdv(0) { if (std::is_same::value) @@ -252,8 +252,8 @@ DataInfoSpec::DataInfoSpec( // clang-format on } -template -float DataInfoSpec::round_to_int(float f) +template +float DataInfoSpec::round_to_int(float f) { static const float magic[2] = { MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23), -MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23) }; @@ -281,9 +281,8 @@ float DataInfoSpec::round_to_int(float f) return f; } -template -long long -DataInfoSpec::round_to_int_and_clamp(double f) +template +long long DataInfoSpec::round_to_int_and_clamp(double f) { static const double magic[2] = { MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52), MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52) }; @@ -314,8 +313,8 @@ DataInfoSpec::round_to_int_and_clamp(double f) return (long long)f; } -template -OutType DataInfoSpec::absolute(const OutType &x) +template +OutType DataInfoSpec::absolute(const OutType &x) { union { cl_uint u; @@ -332,8 +331,8 @@ OutType DataInfoSpec::absolute(const OutType &x) return u.f; } -template -void DataInfoSpec::conv(OutType *out, InType *in) +template +void DataInfoSpec::conv(OutType *out, InType *in) { if (std::is_same::value) { @@ -480,9 +479,8 @@ void DataInfoSpec::conv(OutType *out, InType *in) #define CLAMP(_lo, _x, _hi) \ ((_x) < (_lo) ? (_lo) : ((_x) > (_hi) ? (_hi) : (_x))) -template -void DataInfoSpec::conv_sat(OutType *out, - InType *in) +template +void DataInfoSpec::conv_sat(OutType *out, InType *in) { if (std::is_floating_point::value) { @@ -599,9 +597,9 @@ void DataInfoSpec::conv_sat(OutType *out, } } -template -void DataInfoSpec::init(const cl_uint &job_id, - const cl_uint &thread_id) +template +void DataInfoSpec::init(const cl_uint &job_id, + const cl_uint &thread_id) { uint64_t ulStart = start; void *pIn = (char *)gIn + job_id * size * gTypeSizes[inType]; @@ -761,8 +759,8 @@ void DataInfoSpec::init(const cl_uint &job_id, } } -template -InType DataInfoSpec::clamp(const InType &in) +template +InType DataInfoSpec::clamp(const InType &in) { if (std::is_integral::value) { From ebea247d321193de2a7cf7aa63aa3fba97286773 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 20 Jun 2023 11:46:13 +0200 Subject: [PATCH 7/8] Corrected missing implicit test lost after modernization corrections --- .../conversions/basic_test_conversions.cpp | 4 ++-- .../conversions/basic_test_conversions.h | 16 ++++++++++------ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/test_conformance/conversions/basic_test_conversions.cpp b/test_conformance/conversions/basic_test_conversions.cpp index 4571be3c3..ffdb948ac 100644 --- a/test_conformance/conversions/basic_test_conversions.cpp +++ b/test_conformance/conversions/basic_test_conversions.cpp @@ -450,12 +450,12 @@ cl_int ConversionsTest::SetUp(int elements) template void ConversionsTest::TestTypesConversion(const Type &inType, - const Type &outType, int &testNumber) + const Type &outType, int &testNumber, + int startMinVectorSize) { SaturationMode sat; RoundingMode round; int error; - int startMinVectorSize = gMinVectorSize; // skip longs on embedded if (!gHasLong diff --git a/test_conformance/conversions/basic_test_conversions.h b/test_conformance/conversions/basic_test_conversions.h index cf0e2c66a..aed0601be 100644 --- a/test_conformance/conversions/basic_test_conversions.h +++ b/test_conformance/conversions/basic_test_conversions.h @@ -184,7 +184,8 @@ struct ConversionsTest RoundingMode round); template - void TestTypesConversion(const Type &inType, const Type &outType, int &tn); + void TestTypesConversion(const Type &inType, const Type &outType, int &tn, + const int smvs); protected: cl_context context; @@ -244,7 +245,7 @@ struct IterOverTypes : public TestType { IterOverTypes(const TypeIter &typeIter, ConversionsTest &test) : inType((Type)0), outType((Type)0), typeIter(typeIter), test(test), - testNumber(-1) + testNumber(-1), startMinVectorSize(gMinVectorSize) {} void Run() { for_each_out_elem(typeIter); } @@ -267,7 +268,8 @@ struct IterOverTypes : public TestType if (!testType(outType)) vlog_error("Unexpected data type!\n"); // run the conversions - test.TestTypesConversion(inType, outType, testNumber); + test.TestTypesConversion(inType, outType, testNumber, + startMinVectorSize); inType = (Type)(inType + 1); } @@ -307,6 +309,7 @@ struct IterOverTypes : public TestType const TypeIter &typeIter; ConversionsTest &test; int testNumber; + int startMinVectorSize; }; @@ -316,7 +319,7 @@ struct IterOverSelectedTypes : public TestType IterOverSelectedTypes(const TypeIter &typeIter, ConversionsTest &test, const Type &in, const Type &out) : inType(in), outType(out), typeIter(typeIter), test(test), - testNumber(-1) + testNumber(-1), startMinVectorSize(gMinVectorSize) {} void Run() { for_each_out_elem(typeIter); } @@ -335,8 +338,8 @@ struct IterOverSelectedTypes : public TestType if (testType(inType) && testType(outType)) { // run the conversions - test.TestTypesConversion(inType, outType, - testNumber); + test.TestTypesConversion( + inType, outType, testNumber, startMinVectorSize); } } @@ -374,6 +377,7 @@ struct IterOverSelectedTypes : public TestType const TypeIter &typeIter; ConversionsTest &test; int testNumber; + int startMinVectorSize; }; From 3c3d5b23ea08418e3f8c68c6f6315a9748bceeba Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Wed, 21 Jun 2023 12:38:45 +0200 Subject: [PATCH 8/8] Corrected single, selected test to limit number of unnecessary operations --- .../conversions/basic_test_conversions.cpp | 3 ++- .../conversions/basic_test_conversions.h | 17 +++++++++++------ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/test_conformance/conversions/basic_test_conversions.cpp b/test_conformance/conversions/basic_test_conversions.cpp index ffdb948ac..43fb449bc 100644 --- a/test_conformance/conversions/basic_test_conversions.cpp +++ b/test_conformance/conversions/basic_test_conversions.cpp @@ -406,7 +406,8 @@ cl_int CustomConversionsTest::Run() gMinVectorSize = 0; } - IterOverSelectedTypes iter(typeIterator, *this, inType, outType); + IterOverSelectedTypes iter(typeIterator, *this, inType, outType, round, + sat); iter.Run(); diff --git a/test_conformance/conversions/basic_test_conversions.h b/test_conformance/conversions/basic_test_conversions.h index aed0601be..2314ee748 100644 --- a/test_conformance/conversions/basic_test_conversions.h +++ b/test_conformance/conversions/basic_test_conversions.h @@ -317,9 +317,11 @@ struct IterOverTypes : public TestType struct IterOverSelectedTypes : public TestType { IterOverSelectedTypes(const TypeIter &typeIter, ConversionsTest &test, - const Type &in, const Type &out) - : inType(in), outType(out), typeIter(typeIter), test(test), - testNumber(-1), startMinVectorSize(gMinVectorSize) + const Type in, const Type out, + const RoundingMode round, const SaturationMode sat) + : inType(in), outType(out), rounding(round), saturation(sat), + typeIter(typeIter), test(test), testNumber(-1), + startMinVectorSize(gMinVectorSize) {} void Run() { for_each_out_elem(typeIter); } @@ -337,9 +339,9 @@ struct IterOverSelectedTypes : public TestType { if (testType(inType) && testType(outType)) { - // run the conversions - test.TestTypesConversion( - inType, outType, testNumber, startMinVectorSize); + // run selected conversion + // testing of the result will happen afterwards + test.DoTest(outType, inType, saturation, rounding); } } @@ -374,6 +376,9 @@ struct IterOverSelectedTypes : public TestType protected: Type inType; Type outType; + RoundingMode rounding; + SaturationMode saturation; + const TypeIter &typeIter; ConversionsTest &test; int testNumber;