From 0e229b8f01afc9e16ca83234b656830c26f11215 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 20 Jun 2023 17:42:57 +0200 Subject: [PATCH] Added cl_khr_fp16 extension support for test_fpmath from basic (#1718) * Added half and double support for fpmath test from basic (issue #142, basic) * Cosmetic corrections due to code review * Removed unnecessary casting * Added corrections due to code review * Tuning range of input generation to avoid hitting infinity * Moved string helpers procedures due to request from test_commonfns PR #1695 --- .../harness/stringHelpers.h | 0 test_conformance/basic/CMakeLists.txt | 2 +- test_conformance/basic/main.cpp | 37 +- test_conformance/basic/procs.h | 10 +- test_conformance/basic/test_astype.cpp | 7 +- test_conformance/basic/test_fpmath.cpp | 386 ++++++++++++++++++ test_conformance/basic/test_fpmath_float.cpp | 196 --------- 7 files changed, 427 insertions(+), 211 deletions(-) rename test_conformance/basic/utils.h => test_common/harness/stringHelpers.h (100%) create mode 100644 test_conformance/basic/test_fpmath.cpp delete mode 100644 test_conformance/basic/test_fpmath_float.cpp diff --git a/test_conformance/basic/utils.h b/test_common/harness/stringHelpers.h similarity index 100% rename from test_conformance/basic/utils.h rename to test_common/harness/stringHelpers.h diff --git a/test_conformance/basic/CMakeLists.txt b/test_conformance/basic/CMakeLists.txt index c07d32b66..c89a93cf0 100644 --- a/test_conformance/basic/CMakeLists.txt +++ b/test_conformance/basic/CMakeLists.txt @@ -2,7 +2,7 @@ set(MODULE_NAME BASIC) set(${MODULE_NAME}_SOURCES main.cpp - test_fpmath_float.cpp + test_fpmath.cpp test_intmath.cpp test_hiloeo.cpp test_local.cpp test_pointercast.cpp test_if.cpp test_loop.cpp diff --git a/test_conformance/basic/main.cpp b/test_conformance/basic/main.cpp index 86c3cec35..24262dbf9 100644 --- a/test_conformance/basic/main.cpp +++ b/test_conformance/basic/main.cpp @@ -1,5 +1,5 @@ // -// Copyright (c) 2017 The Khronos Group Inc. +// Copyright (c) 2023 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -22,14 +22,15 @@ #include #include #include + +#include + #include "harness/testHarness.h" #include "procs.h" test_definition test_list[] = { ADD_TEST(hostptr), - ADD_TEST(fpmath_float), - ADD_TEST(fpmath_float2), - ADD_TEST(fpmath_float4), + ADD_TEST(fpmath), ADD_TEST(intmath_int), ADD_TEST(intmath_int2), ADD_TEST(intmath_int4), @@ -164,9 +165,35 @@ test_definition test_list[] = { }; const int test_num = ARRAY_SIZE( test_list ); +cl_half_rounding_mode halfRoundingMode = CL_HALF_RTE; + +test_status InitCL(cl_device_id device) +{ + if (is_extension_available(device, "cl_khr_fp16")) + { + const cl_device_fp_config fpConfigHalf = + get_default_rounding_mode(device, CL_DEVICE_HALF_FP_CONFIG); + if ((fpConfigHalf & CL_FP_ROUND_TO_NEAREST) != 0) + { + halfRoundingMode = CL_HALF_RTE; + } + else if ((fpConfigHalf & CL_FP_ROUND_TO_ZERO) != 0) + { + halfRoundingMode = CL_HALF_RTZ; + } + else + { + log_error("Error while acquiring half rounding mode"); + return TEST_FAIL; + } + } + + return TEST_PASS; +} int main(int argc, const char *argv[]) { - return runTestHarness(argc, argv, test_num, test_list, false, 0); + return runTestHarnessWithCheck(argc, argv, test_num, test_list, false, 0, + InitCL); } diff --git a/test_conformance/basic/procs.h b/test_conformance/basic/procs.h index c14340de3..9cbc373a3 100644 --- a/test_conformance/basic/procs.h +++ b/test_conformance/basic/procs.h @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2023 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -13,6 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. // + #include "harness/kernelHelpers.h" #include "harness/testHarness.h" #include "harness/errorHelpers.h" @@ -21,9 +22,8 @@ #include "harness/rounding_mode.h" extern int test_hostptr(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_fpmath_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_fpmath_float2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_fpmath_float4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); +extern int test_fpmath(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); extern int test_intmath_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_intmath_int2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_intmath_int4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); diff --git a/test_conformance/basic/test_astype.cpp b/test_conformance/basic/test_astype.cpp index 08a4cb85a..45669a7cb 100644 --- a/test_conformance/basic/test_astype.cpp +++ b/test_conformance/basic/test_astype.cpp @@ -14,6 +14,9 @@ // limitations under the License. // #include "harness/compat.h" +#include "harness/conversions.h" +#include "harness/stringHelpers.h" +#include "harness/typeWrappers.h" #include #include @@ -22,11 +25,7 @@ #include #include -#include "harness/conversions.h" -#include "harness/typeWrappers.h" - #include "procs.h" -#include "utils.h" // clang-format off diff --git a/test_conformance/basic/test_fpmath.cpp b/test_conformance/basic/test_fpmath.cpp new file mode 100644 index 000000000..6719e7281 --- /dev/null +++ b/test_conformance/basic/test_fpmath.cpp @@ -0,0 +1,386 @@ +// +// Copyright (c) 2023 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "harness/compat.h" +#include "harness/rounding_mode.h" +#include "harness/stringHelpers.h" + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "procs.h" + +static const char *fp_kernel_code = R"( +%s +__kernel void test_fp(__global TYPE *srcA, __global TYPE *srcB, __global TYPE *dst) +{ + int tid = get_global_id(0); + + dst[tid] = srcA[tid] OP srcB[tid]; +})"; + +extern cl_half_rounding_mode halfRoundingMode; + +#define HFF(num) cl_half_from_float(num, halfRoundingMode) +#define HTF(num) cl_half_to_float(num) + +template double toDouble(T val) +{ + if (std::is_same::value) + return HTF(val); + else + return val; +} + +bool isHalfNan(cl_half v) +{ + // Extract FP16 exponent and mantissa + uint16_t h_exp = (v >> (CL_HALF_MANT_DIG - 1)) & 0x1F; + uint16_t h_mant = v & 0x3FF; + + // NaN test + return (h_exp == 0x1F && h_mant != 0); +} + +cl_half half_plus(cl_half a, cl_half b) +{ + return HFF(std::plus()(HTF(a), HTF(b))); +} + +cl_half half_minus(cl_half a, cl_half b) +{ + return HFF(std::minus()(HTF(a), HTF(b))); +} + +cl_half half_mult(cl_half a, cl_half b) +{ + return HFF(std::multiplies()(HTF(a), HTF(b))); +} + +template struct TestDef +{ + const char op; + std::function ref; + std::string type_str; + size_t vec_size; +}; + +template +int verify_fp(std::vector (&input)[2], std::vector &output, + const TestDef &test) +{ + auto &inA = input[0]; + auto &inB = input[1]; + for (int i = 0; i < output.size(); i++) + { + bool nan_test = false; + + T r = test.ref(inA[i], inB[i]); + + if (std::is_same::value) + nan_test = !(isHalfNan(r) && isHalfNan(output[i])); + + if (r != output[i] && nan_test) + { + log_error("FP math test for type: %s, vec size: %zu, failed at " + "index %d, %a '%c' %a, expected %a, get %a\n", + test.type_str.c_str(), test.vec_size, i, toDouble(inA[i]), + test.op, toDouble(inB[i]), toDouble(r), + toDouble(output[i])); + return -1; + } + } + + return 0; +} + +template void generate_random_inputs(std::vector (&input)[2]) +{ + RandomSeed seed(gRandomSeed); + + if (std::is_same::value) + { + auto random_generator = [&seed]() { + return get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), + MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), seed); + }; + for (auto &v : input) + std::generate(v.begin(), v.end(), random_generator); + } + else if (std::is_same::value) + { + auto random_generator = [&seed]() { + return get_random_double(-MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63), + MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63), + seed); + }; + for (auto &v : input) + std::generate(v.begin(), v.end(), random_generator); + } + else + { + auto random_generator = [&seed]() { + return HFF(get_random_float(-MAKE_HEX_FLOAT(0x1.0p8f, 0x1, 8), + MAKE_HEX_FLOAT(0x1.0p8f, 0x1, 8), + seed)); + }; + for (auto &v : input) + std::generate(v.begin(), v.end(), random_generator); + } +} + +struct TypesIterator +{ + using TypeIter = std::tuple; + + TypesIterator(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elems) + : context(context), queue(queue), fpConfigHalf(0), fpConfigFloat(0), + num_elements(num_elems) + { + // typeid().name one day + type2name[sizeof(cl_half)] = "half"; + type2name[sizeof(cl_float)] = "float"; + type2name[sizeof(cl_double)] = "double"; + + fp16Support = is_extension_available(deviceID, "cl_khr_fp16"); + fp64Support = is_extension_available(deviceID, "cl_khr_fp64"); + + fpConfigFloat = get_default_rounding_mode(deviceID); + + if (fp16Support) + fpConfigHalf = + get_default_rounding_mode(deviceID, CL_DEVICE_HALF_FP_CONFIG); + + for_each_elem(it); + } + + template int test_fpmath(TestDef &test) + { + constexpr size_t vecSizes[] = { 1, 2, 4, 8, 16 }; + cl_int err = CL_SUCCESS; + + std::ostringstream sstr; + if (std::is_same::value) + sstr << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; + + if (std::is_same::value) + sstr << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + + std::string program_source = + str_sprintf(std::string(fp_kernel_code), sstr.str().c_str()); + + for (unsigned i = 0; i < ARRAY_SIZE(vecSizes); i++) + { + test.vec_size = vecSizes[i]; + + std::ostringstream vecNameStr; + vecNameStr << test.type_str; + if (test.vec_size != 1) vecNameStr << test.vec_size; + + clMemWrapper streams[3]; + clProgramWrapper program; + clKernelWrapper kernel; + + size_t length = sizeof(T) * num_elements * test.vec_size; + + bool isRTZ = false; + RoundingMode oldMode = kDefaultRoundingMode; + + + // If we only support rtz mode + if (std::is_same::value) + { + if (CL_FP_ROUND_TO_ZERO == fpConfigHalf) + { + isRTZ = true; + oldMode = get_round(); + } + } + else if (std::is_same::value) + { + if (CL_FP_ROUND_TO_ZERO == fpConfigFloat) + { + isRTZ = true; + oldMode = get_round(); + } + } + + std::vector inputs[]{ + std::vector(test.vec_size * num_elements), + std::vector(test.vec_size * num_elements) + }; + std::vector output = + std::vector(test.vec_size * num_elements); + + generate_random_inputs(inputs); + + for (int i = 0; i < ARRAY_SIZE(streams); i++) + { + streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, + NULL, &err); + test_error(err, "clCreateBuffer failed."); + } + for (int i = 0; i < ARRAY_SIZE(inputs); i++) + { + err = + clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0, length, + inputs[i].data(), 0, NULL, NULL); + test_error(err, "clEnqueueWriteBuffer failed."); + } + + std::string build_options = "-DTYPE="; + build_options.append(vecNameStr.str()) + .append(" -DOP=") + .append(1, test.op); + + const char *ptr = program_source.c_str(); + err = + create_single_kernel_helper(context, &program, &kernel, 1, &ptr, + "test_fp", build_options.c_str()); + + test_error(err, "create_single_kernel_helper failed"); + + for (int i = 0; i < ARRAY_SIZE(streams); i++) + { + err = + clSetKernelArg(kernel, i, sizeof(streams[i]), &streams[i]); + test_error(err, "clSetKernelArgs failed."); + } + + size_t threads[] = { static_cast(num_elements) }; + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, + 0, NULL, NULL); + test_error(err, "clEnqueueNDRangeKernel failed."); + + err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, + output.data(), 0, NULL, NULL); + test_error(err, "clEnqueueReadBuffer failed."); + + if (isRTZ) set_round(kRoundTowardZero, kfloat); + + err = verify_fp(inputs, output, test); + + if (isRTZ) set_round(oldMode, kfloat); + + test_error(err, "test verification failed"); + log_info("FP '%c' '%s' test passed\n", test.op, + vecNameStr.str().c_str()); + } + + return err; + } + + template int test_fpmath_common() + { + int err = TEST_PASS; + if (std::is_same::value) + { + TestDef tests[] = { { '+', half_plus, type2name[sizeof(T)] }, + { '-', half_minus, type2name[sizeof(T)] }, + { '*', half_mult, type2name[sizeof(T)] } }; + for (auto &test : tests) err |= test_fpmath(test); + } + else + { + TestDef tests[] = { + { '+', std::plus(), type2name[sizeof(T)] }, + { '-', std::minus(), type2name[sizeof(T)] }, + { '*', std::multiplies(), type2name[sizeof(T)] } + }; + for (auto &test : tests) err |= test_fpmath(test); + } + + return err; + } + + template bool skip_type() + { + if (std::is_same::value && !fp64Support) + return true; + else if (std::is_same::value && !fp16Support) + return true; + return false; + } + + template + void iterate_type(const Type &t) + { + bool doTest = !skip_type(); + + if (doTest) + { + if (test_fpmath_common()) + { + throw std::runtime_error("test_fpmath_common failed\n"); + } + } + } + + template + inline typename std::enable_if::type + for_each_elem( + const std::tuple &) // Unused arguments are given no names. + {} + + template + inline typename std::enable_if < Cnt::type + for_each_elem(const std::tuple &t) + { + iterate_type(std::get(t)); + for_each_elem(t); + } + +protected: + TypeIter it; + + cl_context context; + cl_command_queue queue; + + cl_device_fp_config fpConfigHalf; + cl_device_fp_config fpConfigFloat; + + bool fp16Support; + bool fp64Support; + + int num_elements; + std::map type2name; +}; + +int test_fpmath(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elements) +{ + try + { + TypesIterator(device, context, queue, num_elements); + } catch (const std::runtime_error &e) + { + log_error("%s", e.what()); + return TEST_FAIL; + } + + return TEST_PASS; +} diff --git a/test_conformance/basic/test_fpmath_float.cpp b/test_conformance/basic/test_fpmath_float.cpp deleted file mode 100644 index fced0f4ec..000000000 --- a/test_conformance/basic/test_fpmath_float.cpp +++ /dev/null @@ -1,196 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include -#include -#include "harness/rounding_mode.h" - -#include -#include -#include -#include - -#include "procs.h" - -struct TestDef -{ - const char op; - std::function ref; -}; - -static const char *fp_kernel_code = R"( -__kernel void test_fp(__global TYPE *srcA, __global TYPE *srcB, __global TYPE *dst) -{ - int tid = get_global_id(0); - - dst[tid] = srcA[tid] OP srcB[tid]; -})"; - -static int verify_fp(std::vector (&input)[2], std::vector &output, - const TestDef &test) -{ - - auto &inA = input[0]; - auto &inB = input[1]; - for (int i = 0; i < output.size(); i++) - { - float r = test.ref(inA[i], inB[i]); - if (r != output[i]) - { - log_error("FP '%c' float test failed\n", test.op); - return -1; - } - } - - log_info("FP '%c' float test passed\n", test.op); - return 0; -} - - -void generate_random_inputs(std::vector (&input)[2]) -{ - RandomSeed seed(gRandomSeed); - - auto random_generator = [&seed]() { - return get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), - MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), seed); - }; - - for (auto &v : input) - { - std::generate(v.begin(), v.end(), random_generator); - } -} - -template -int test_fpmath(cl_device_id device, cl_context context, cl_command_queue queue, - int num_elements, const std::string type_str, - const TestDef &test) -{ - clMemWrapper streams[3]; - clProgramWrapper program; - clKernelWrapper kernel; - - int err; - - size_t length = sizeof(cl_float) * num_elements * N; - - int isRTZ = 0; - RoundingMode oldMode = kDefaultRoundingMode; - - // If we only support rtz mode - if (CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device)) - { - isRTZ = 1; - oldMode = get_round(); - } - - - std::vector inputs[]{ std::vector(N * num_elements), - std::vector(N * num_elements) }; - std::vector output = std::vector(N * num_elements); - - generate_random_inputs(inputs); - - for (int i = 0; i < ARRAY_SIZE(streams); i++) - { - streams[i] = - clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err); - test_error(err, "clCreateBuffer failed."); - } - for (int i = 0; i < ARRAY_SIZE(inputs); i++) - { - err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0, length, - inputs[i].data(), 0, NULL, NULL); - test_error(err, "clEnqueueWriteBuffer failed."); - } - - std::string build_options = "-DTYPE="; - build_options.append(type_str).append(" -DOP=").append(1, test.op); - - err = create_single_kernel_helper(context, &program, &kernel, 1, - &fp_kernel_code, "test_fp", - build_options.c_str()); - - test_error(err, "create_single_kernel_helper failed"); - - for (int i = 0; i < ARRAY_SIZE(streams); i++) - { - err = clSetKernelArg(kernel, i, sizeof(streams[i]), &streams[i]); - test_error(err, "clSetKernelArgs failed."); - } - - size_t threads[] = { static_cast(num_elements) }; - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, - NULL); - test_error(err, "clEnqueueNDRangeKernel failed."); - - err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, - output.data(), 0, NULL, NULL); - test_error(err, "clEnqueueReadBuffer failed."); - - if (isRTZ) set_round(kRoundTowardZero, kfloat); - - err = verify_fp(inputs, output, test); - - if (isRTZ) set_round(oldMode, kfloat); - - return err; -} - - -template -int test_fpmath_common(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements, - const std::string type_str) -{ - TestDef tests[] = { { '+', std::plus() }, - { '-', std::minus() }, - { '*', std::multiplies() } }; - int err = TEST_PASS; - - for (const auto &test : tests) - { - err |= test_fpmath(device, context, queue, num_elements, type_str, - test); - } - - return err; -} - -int test_fpmath_float(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements) -{ - return test_fpmath_common<1>(device, context, queue, num_elements, "float"); -} - -int test_fpmath_float2(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements) -{ - return test_fpmath_common<2>(device, context, queue, num_elements, - "float2"); -} - -int test_fpmath_float4(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements) -{ - return test_fpmath_common<4>(device, context, queue, num_elements, - "float4"); -}