From f330d79108249c9a23b0c7a1d96497f99dd904d4 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 4 Apr 2023 13:24:40 +0200 Subject: [PATCH 01/11] Added cl_khr_fp16 extension support for commonfns test (issue #142, commonfns) --- test_common/harness/kernelHelpers.cpp | 23 +- test_common/harness/kernelHelpers.h | 4 +- test_conformance/commonfns/CMakeLists.txt | 14 +- test_conformance/commonfns/main.cpp | 74 +- test_conformance/commonfns/procs.h | 9 +- test_conformance/commonfns/test_base.h | 282 ++++++++ test_conformance/commonfns/test_binary_fn.cpp | 418 +++++++---- test_conformance/commonfns/test_clamp.cpp | 446 ++++++------ test_conformance/commonfns/test_degrees.cpp | 470 ------------ test_conformance/commonfns/test_fmax.cpp | 233 ------ test_conformance/commonfns/test_fmaxf.cpp | 244 ------- test_conformance/commonfns/test_fmin.cpp | 238 ------ test_conformance/commonfns/test_fminf.cpp | 236 ------ test_conformance/commonfns/test_max.cpp | 60 -- test_conformance/commonfns/test_maxf.cpp | 64 -- test_conformance/commonfns/test_min.cpp | 56 -- test_conformance/commonfns/test_minf.cpp | 70 -- test_conformance/commonfns/test_mix.cpp | 396 ++++++---- test_conformance/commonfns/test_radians.cpp | 468 ------------ test_conformance/commonfns/test_sign.cpp | 437 ----------- .../commonfns/test_smoothstep.cpp | 527 ++++++++------ .../commonfns/test_smoothstepf.cpp | 259 ------- test_conformance/commonfns/test_step.cpp | 680 ++++++------------ test_conformance/commonfns/test_stepf.cpp | 546 -------------- test_conformance/commonfns/test_unary_fn.cpp | 427 +++++++++++ 25 files changed, 2065 insertions(+), 4616 deletions(-) create mode 100644 test_conformance/commonfns/test_base.h delete mode 100644 test_conformance/commonfns/test_degrees.cpp delete mode 100644 test_conformance/commonfns/test_fmax.cpp delete mode 100644 test_conformance/commonfns/test_fmaxf.cpp delete mode 100644 test_conformance/commonfns/test_fmin.cpp delete mode 100644 test_conformance/commonfns/test_fminf.cpp delete mode 100644 test_conformance/commonfns/test_max.cpp delete mode 100644 test_conformance/commonfns/test_maxf.cpp delete mode 100644 test_conformance/commonfns/test_min.cpp delete mode 100644 test_conformance/commonfns/test_minf.cpp delete mode 100644 test_conformance/commonfns/test_radians.cpp delete mode 100644 test_conformance/commonfns/test_sign.cpp delete mode 100644 test_conformance/commonfns/test_smoothstepf.cpp delete mode 100644 test_conformance/commonfns/test_stepf.cpp create mode 100644 test_conformance/commonfns/test_unary_fn.cpp diff --git a/test_common/harness/kernelHelpers.cpp b/test_common/harness/kernelHelpers.cpp index 13ebcbc96..633b05e5c 100644 --- a/test_common/harness/kernelHelpers.cpp +++ b/test_common/harness/kernelHelpers.cpp @@ -1511,22 +1511,33 @@ size_t get_min_alignment(cl_context context) return align_size; } -cl_device_fp_config get_default_rounding_mode(cl_device_id device) +cl_device_fp_config get_default_rounding_mode(cl_device_id device, + const cl_uint ¶m) { + if (param == CL_DEVICE_DOUBLE_FP_CONFIG) + test_error_ret( + -1, + "FAILURE: CL_DEVICE_DOUBLE_FP_CONFIG not supported by this routine", + 0); + char profileStr[128] = ""; cl_device_fp_config single = 0; - int error = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, - sizeof(single), &single, NULL); + int error = clGetDeviceInfo(device, param, sizeof(single), &single, NULL); if (error) - test_error_ret(error, "Unable to get device CL_DEVICE_SINGLE_FP_CONFIG", - 0); + { + std::string message = std::string("Unable to get device ") + + std::string(param == CL_DEVICE_HALF_FP_CONFIG + ? "CL_DEVICE_HALF_FP_CONFIG" + : "CL_DEVICE_SINGLE_FP_CONFIG"); + test_error_ret(error, message.c_str(), 0); + } if (single & CL_FP_ROUND_TO_NEAREST) return CL_FP_ROUND_TO_NEAREST; if (0 == (single & CL_FP_ROUND_TO_ZERO)) test_error_ret(-1, "FAILURE: device must support either " - "CL_DEVICE_SINGLE_FP_CONFIG or CL_FP_ROUND_TO_NEAREST", + "CL_FP_ROUND_TO_ZERO or CL_FP_ROUND_TO_NEAREST", 0); // Make sure we are an embedded device before allowing a pass diff --git a/test_common/harness/kernelHelpers.h b/test_common/harness/kernelHelpers.h index 4d8f2a8fa..62a07e49b 100644 --- a/test_common/harness/kernelHelpers.h +++ b/test_common/harness/kernelHelpers.h @@ -159,7 +159,9 @@ size_t get_min_alignment(cl_context context); /* Helper to obtain the default rounding mode for single precision computation. * (Double is always CL_FP_ROUND_TO_NEAREST.) Returns 0 on error. */ -cl_device_fp_config get_default_rounding_mode(cl_device_id device); +cl_device_fp_config +get_default_rounding_mode(cl_device_id device, + const cl_uint ¶m = CL_DEVICE_SINGLE_FP_CONFIG); #define PASSIVE_REQUIRE_IMAGE_SUPPORT(device) \ if (checkForImageSupport(device)) \ diff --git a/test_conformance/commonfns/CMakeLists.txt b/test_conformance/commonfns/CMakeLists.txt index 5aa29250d..bea20cf5e 100644 --- a/test_conformance/commonfns/CMakeLists.txt +++ b/test_conformance/commonfns/CMakeLists.txt @@ -3,22 +3,10 @@ set(MODULE_NAME COMMONFNS) set(${MODULE_NAME}_SOURCES main.cpp test_clamp.cpp - test_degrees.cpp - test_max.cpp - test_maxf.cpp - test_min.cpp - test_minf.cpp + test_unary_fn.cpp test_mix.cpp - test_radians.cpp test_step.cpp - test_stepf.cpp test_smoothstep.cpp - test_smoothstepf.cpp - test_sign.cpp - test_fmax.cpp - test_fmin.cpp - test_fmaxf.cpp - test_fminf.cpp test_binary_fn.cpp ) diff --git a/test_conformance/commonfns/main.cpp b/test_conformance/commonfns/main.cpp index b8364d5a7..071de6bc6 100644 --- a/test_conformance/commonfns/main.cpp +++ b/test_conformance/commonfns/main.cpp @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2023 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -13,15 +13,20 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "harness/compat.h" #include #include #include "procs.h" +#include "test_base.h" +#include "harness/kernelHelpers.h" + +std::map BaseFunctionTest::type2name; +cl_half_rounding_mode BaseFunctionTest::halfRoundingMode = CL_HALF_RTE; int g_arrVecSizes[kVectorSizeCount + kStrangeVectorSizeCount]; int g_arrStrangeVectorSizes[kStrangeVectorSizeCount] = {3}; +//-------------------------------------------------------------------------- static void initVecSizes() { int i; for(i = 0; i < kVectorSizeCount; ++i) { @@ -32,32 +37,57 @@ static void initVecSizes() { } } - +//-------------------------------------------------------------------------- test_definition test_list[] = { - ADD_TEST( clamp ), - ADD_TEST( degrees ), - ADD_TEST( fmax ), - ADD_TEST( fmaxf ), - ADD_TEST( fmin ), - ADD_TEST( fminf ), - ADD_TEST( max ), - ADD_TEST( maxf ), - ADD_TEST( min ), - ADD_TEST( minf ), - ADD_TEST( mix ), - ADD_TEST( radians ), - ADD_TEST( step ), - ADD_TEST( stepf ), - ADD_TEST( smoothstep ), - ADD_TEST( smoothstepf ), - ADD_TEST( sign ), + ADD_TEST(clamp), ADD_TEST(degrees), ADD_TEST(fmax), + ADD_TEST(fmaxf), ADD_TEST(fmin), ADD_TEST(fminf), + ADD_TEST(max), ADD_TEST(maxf), ADD_TEST(min), + ADD_TEST(minf), ADD_TEST(mix), ADD_TEST(mixf), + ADD_TEST(radians), ADD_TEST(step), ADD_TEST(stepf), + ADD_TEST(smoothstep), ADD_TEST(smoothstepf), ADD_TEST(sign), }; const int test_num = ARRAY_SIZE( test_list ); +//-------------------------------------------------------------------------- +test_status InitCL(cl_device_id device) +{ + if (is_extension_available(device, "cl_khr_fp16")) + { + const cl_device_fp_config fpConfigHalf = + get_default_rounding_mode(device, CL_DEVICE_HALF_FP_CONFIG); + if ((fpConfigHalf & CL_FP_ROUND_TO_NEAREST) != 0) + { + BaseFunctionTest::halfRoundingMode = CL_HALF_RTE; + } + else if ((fpConfigHalf & CL_FP_ROUND_TO_ZERO) != 0) + { + BaseFunctionTest::halfRoundingMode = CL_HALF_RTZ; + } + else + { + log_error("Error while acquiring half rounding mode"); + return TEST_FAIL; + } + } + + return TEST_PASS; +} + +//-------------------------------------------------------------------------- int main(int argc, const char *argv[]) { initVecSizes(); - return runTestHarness(argc, argv, test_num, test_list, false, 0); + + if (BaseFunctionTest::type2name.empty()) + { + BaseFunctionTest::type2name[sizeof(half)] = "half"; + BaseFunctionTest::type2name[sizeof(float)] = "float"; + BaseFunctionTest::type2name[sizeof(double)] = "double"; + } + + return runTestHarnessWithCheck(argc, argv, test_num, test_list, false, 0, + InitCL); } +//-------------------------------------------------------------------------- diff --git a/test_conformance/commonfns/procs.h b/test_conformance/commonfns/procs.h index dada94f97..c1115ee7c 100644 --- a/test_conformance/commonfns/procs.h +++ b/test_conformance/commonfns/procs.h @@ -37,6 +37,8 @@ extern int test_maxf(cl_device_id device, cl_context context, cl_command_ extern int test_min(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); extern int test_minf(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); extern int test_mix(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); +extern int test_mixf(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); extern int test_radians(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); extern int test_step(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); extern int test_stepf(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); @@ -44,11 +46,4 @@ extern int test_smoothstep(cl_device_id device, cl_context context, cl_co extern int test_smoothstepf(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); extern int test_sign(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); -typedef int (*binary_verify_float_fn)( float *x, float *y, float *out, int numElements, int vecSize ); -typedef int (*binary_verify_double_fn)( double *x, double *y, double *out, int numElements, int vecSize ); - -extern int test_binary_fn( cl_device_id device, cl_context context, cl_command_queue queue, int n_elems, - const char *fnName, bool vectorSecondParam, - binary_verify_float_fn floatVerifyFn, binary_verify_double_fn doubleVerifyFn ); - diff --git a/test_conformance/commonfns/test_base.h b/test_conformance/commonfns/test_base.h new file mode 100644 index 000000000..278e44d55 --- /dev/null +++ b/test_conformance/commonfns/test_base.h @@ -0,0 +1,282 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef _TEST_COMMONFNS_BASE_H +#define _TEST_COMMONFNS_BASE_H + +#include +#include +#include + +#include +#include + +#include "harness/deviceInfo.h" +#include "harness/testHarness.h" +#include "harness/typeWrappers.h" + +//--------------------------------------------------------------------------/ + +template +using VerifyFuncBinary = int (*)(const T *const, const T *const, const T *const, + const int num, const int vs, const int vp); + +//--------------------------------------------------------------------------/ + +template +using VerifyFuncUnary = int (*)(const T *const, const T *const, const int num); + +//--------------------------------------------------------------------------/ + +using half = cl_half; + +//-------------------------------------------------------------------------- + +struct BaseFunctionTest +{ + BaseFunctionTest(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elems, const char *fn, + bool vsp) + : device(device), context(context), queue(queue), num_elems(num_elems), + fnName(fn), vecParam(vsp) + {} + + // Test body returning an OpenCL error code + virtual cl_int Run() = 0; + + cl_device_id device; + cl_context context; + cl_command_queue queue; + + int num_elems; + std::string fnName; + bool vecParam; + + static std::map type2name; + static cl_half_rounding_mode halfRoundingMode; +}; + +//-------------------------------------------------------------------------- + +struct MinTest : BaseFunctionTest +{ + MinTest(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elems, const char *fn, bool vsp) + : BaseFunctionTest(device, context, queue, num_elems, fn, vsp) + {} + + cl_int Run() override; +}; + +//-------------------------------------------------------------------------- + +struct MaxTest : BaseFunctionTest +{ + MaxTest(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elems, const char *fn, bool vsp) + : BaseFunctionTest(device, context, queue, num_elems, fn, vsp) + {} + + cl_int Run() override; +}; + +//-------------------------------------------------------------------------- + +struct ClampTest : BaseFunctionTest +{ + ClampTest(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elems, const char *fn, bool vsp) + : BaseFunctionTest(device, context, queue, num_elems, fn, vsp) + {} + + cl_int Run() override; +}; + +//-------------------------------------------------------------------------- + +struct DegreesTest : BaseFunctionTest +{ + DegreesTest(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elems, const char *fn, bool vsp) + : BaseFunctionTest(device, context, queue, num_elems, fn, vsp) + {} + + cl_int Run() override; +}; + +//-------------------------------------------------------------------------- + +struct RadiansTest : BaseFunctionTest +{ + RadiansTest(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elems, const char *fn, bool vsp) + : BaseFunctionTest(device, context, queue, num_elems, fn, vsp) + {} + + cl_int Run() override; +}; + +//-------------------------------------------------------------------------- + +struct SignTest : BaseFunctionTest +{ + SignTest(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elems, const char *fn, bool vsp) + : BaseFunctionTest(device, context, queue, num_elems, fn, vsp) + {} + + cl_int Run() override; +}; + +//-------------------------------------------------------------------------- + +struct SmoothstepTest : BaseFunctionTest +{ + SmoothstepTest(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elems, const char *fn, + bool vsp) + : BaseFunctionTest(device, context, queue, num_elems, fn, vsp) + {} + + cl_int Run() override; +}; + +//-------------------------------------------------------------------------- + +struct StepTest : BaseFunctionTest +{ + StepTest(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elems, const char *fn, bool vsp) + : BaseFunctionTest(device, context, queue, num_elems, fn, vsp) + {} + + cl_int Run() override; +}; + +//-------------------------------------------------------------------------- + +struct MixTest : BaseFunctionTest +{ + MixTest(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elems, const char *fn, bool vsp) + : BaseFunctionTest(device, context, queue, num_elems, fn, vsp) + {} + + cl_int Run() override; +}; + +//--------------------------------------------------------------------------/ + +template +std::string string_format(const std::string &format, Args... args) +{ + int sformat = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; + if (sformat <= 0) + throw std::runtime_error("string_format: string processing error."); + auto format_size = static_cast(sformat); + std::unique_ptr buffer(new char[format_size]); + std::snprintf(buffer.get(), format_size, format.c_str(), args...); + return std::string(buffer.get(), buffer.get() + format_size - 1); +} + +//-------------------------------------------------------------------------- +template float UlpFn(const T &val, const double &r) +{ + if (std::is_same::value) + { + return Ulp_Error_Half(val, r); + } + else if (std::is_same::value) + { + return Ulp_Error(val, r); + } + else if (std::is_same::value) + { + return Ulp_Error_Double(val, r); + } + else + { + log_error("GeometricsFPTest::UlpError: unsupported data type\n"); + } + + return -1.f; // wrong val +} + +//-------------------------------------------------------------------------- +template inline double conv_to_dbl(const T &val) +{ + if (std::is_same::value) + return (double)cl_half_to_float(val); + else + return (double)val; +} + +//-------------------------------------------------------------------------- +template inline double conv_to_flt(const T &val) +{ + if (std::is_same::value) + return (float)cl_half_to_float(val); + else + return (float)val; +} + +//-------------------------------------------------------------------------- +template inline half conv_to_half(const T &val) +{ + if (std::is_floating_point::value) + return cl_half_from_float(val, BaseFunctionTest::halfRoundingMode); + return 0; +} + +//-------------------------------------------------------------------------- + +template bool isfinite_fp(const T &v) +{ + if (std::is_same::value) + { + // Extract FP16 exponent and mantissa + uint16_t h_exp = (((half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F; + uint16_t h_mant = ((half)v) & 0x3FF; + + // !Inf test + return !(h_exp == 0x1F && h_mant == 0); + } + else + { +#if !defined(_WIN32) + return std::isfinite(v); +#else + return isfinite(v); +#endif + } +} + +//-------------------------------------------------------------------------- + +template +int MakeAndRunTest(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements, + const char *fn = "", bool vsp = false) +{ + auto test_fixture = T(device, context, queue, num_elements, fn, vsp); + + cl_int error = test_fixture.Run(); + test_error_ret(error, "Test Failed", TEST_FAIL); + + return TEST_PASS; +} + +#endif // _TEST_COMPARISONS_FP_H diff --git a/test_conformance/commonfns/test_binary_fn.cpp b/test_conformance/commonfns/test_binary_fn.cpp index b40bf1f65..1865623e8 100644 --- a/test_conformance/commonfns/test_binary_fn.cpp +++ b/test_conformance/commonfns/test_binary_fn.cpp @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2023 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -13,15 +13,20 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "harness/compat.h" #include #include #include #include +#include + +#include "harness/deviceInfo.h" +#include "harness/typeWrappers.h" #include "procs.h" +#include "test_base.h" +//-------------------------------------------------------------------------- const char *binary_fn_code_pattern = "%s\n" /* optional pragma */ "__kernel void test_fn(__global %s%s *x, __global %s%s *y, __global %s%s *dst)\n" @@ -49,216 +54,347 @@ const char *binary_fn_code_pattern_v3_scalar = " vstore3(%s(vload3(tid,x), y[tid] ), tid, dst);\n" "}\n"; -int test_binary_fn( cl_device_id device, cl_context context, cl_command_queue queue, int n_elems, - const char *fnName, bool vectorSecondParam, - binary_verify_float_fn floatVerifyFn, binary_verify_double_fn doubleVerifyFn ) +//-------------------------------------------------------------------------- +template +int test_binary_fn(cl_device_id device, cl_context context, + cl_command_queue queue, int n_elems, + const std::string& fnName, bool vecSecParam, + VerifyFuncBinary verifyFn) { - cl_mem streams[6]; - cl_float *input_ptr[2], *output_ptr; - cl_double *input_ptr_double[2], *output_ptr_double=NULL; - cl_program *program; - cl_kernel *kernel; - size_t threads[1]; - int num_elements; - int err; - int i, j; + clMemWrapper streams[3]; + std::vector input_ptr[2], output_ptr; + + std::vector programs; + std::vector kernels; + int err, i, j; MTdata d; - program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount*2); - kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount*2); + assert(BaseFunctionTest::type2name.find(sizeof(T)) + != BaseFunctionTest::type2name.end()); + auto tname = BaseFunctionTest::type2name[sizeof(T)]; - num_elements = n_elems * (1 << (kTotalVecCount-1)); + programs.resize(kTotalVecCount); + kernels.resize(kTotalVecCount); - int test_double = 0; - if(is_extension_available( device, "cl_khr_fp64" )) - { - log_info("Testing doubles.\n"); - test_double = 1; - } + int num_elements = n_elems * (1 << (kTotalVecCount - 1)); - for( i = 0; i < 2; i++ ) - { - input_ptr[i] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - if (test_double) input_ptr_double[i] = (cl_double*)malloc(sizeof(cl_double) * num_elements); - } - output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements); - if (test_double) output_ptr_double = (cl_double*)malloc(sizeof(cl_double) * num_elements); + for (i = 0; i < 2; i++) input_ptr[i].resize(num_elements); + output_ptr.resize(num_elements); for( i = 0; i < 3; i++ ) { - streams[i] = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, &err); + streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(T) * num_elements, NULL, &err); test_error( err, "clCreateBuffer failed"); } - if (test_double) - for( i = 3; i < 6; i++ ) + std::string pragma_str; + d = init_genrand(gRandomSeed); + if (std::is_same::value) + { + for (j = 0; j < num_elements; j++) { - streams[i] = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_double) * num_elements, NULL, &err); - test_error(err, "clCreateBuffer failed"); + input_ptr[0][j] = get_random_float(-0x20000000, 0x20000000, d); + input_ptr[1][j] = get_random_float(-0x20000000, 0x20000000, d); } - - d = init_genrand( gRandomSeed ); - for( j = 0; j < num_elements; j++ ) + } + else if (std::is_same::value) { - input_ptr[0][j] = get_random_float(-0x20000000, 0x20000000, d); - input_ptr[1][j] = get_random_float(-0x20000000, 0x20000000, d); - if (test_double) + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; + for (j = 0; j < num_elements; j++) { - input_ptr_double[0][j] = get_random_double(-0x20000000, 0x20000000, d); - input_ptr_double[1][j] = get_random_double(-0x20000000, 0x20000000, d); + input_ptr[0][j] = get_random_double(-0x20000000, 0x20000000, d); + input_ptr[1][j] = get_random_double(-0x20000000, 0x20000000, d); } } - free_mtdata(d); d = NULL; - - for( i = 0; i < 2; i++ ) + else if (std::is_same::value) { - err = clEnqueueWriteBuffer( queue, streams[ i ], CL_TRUE, 0, sizeof( cl_float ) * num_elements, input_ptr[ i ], 0, NULL, NULL ); - test_error( err, "Unable to write input buffer" ); - - if (test_double) + const float fval = 0x20000000; + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + for (int j = 0; j < num_elements; j++) { - err = clEnqueueWriteBuffer( queue, streams[ 3 + i ], CL_TRUE, 0, sizeof( cl_double ) * num_elements, input_ptr_double[ i ], 0, NULL, NULL ); - test_error( err, "Unable to write input buffer" ); + input_ptr[0][j] = conv_to_half(get_random_float(-fval, fval, d)); + input_ptr[1][j] = conv_to_half(get_random_float(-fval, fval, d)); } } + free_mtdata(d); + d = NULL; - for( i = 0; i < kTotalVecCount; i++ ) + for (i = 0; i < 2; i++) { - char programSrc[ 10240 ]; - char vecSizeNames[][ 3 ] = { "", "2", "4", "8", "16", "3" }; + err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0, + sizeof(T) * num_elements, + &input_ptr[i].front(), 0, NULL, NULL); + test_error(err, "Unable to write input buffer"); + } - if(i >= kVectorSizeCount) { - // do vec3 print + char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" }; - if(vectorSecondParam) { - sprintf( programSrc,binary_fn_code_pattern_v3, "", "float", "float", "float", fnName ); - } else { - sprintf( programSrc,binary_fn_code_pattern_v3_scalar, "", "float", "float", "float", fnName ); + for (i = 0; i < kTotalVecCount; i++) + { + std::string kernelSource; + if (i >= kVectorSizeCount) + { + if (vecSecParam) + { + std::string str = binary_fn_code_pattern_v3; + kernelSource = + string_format(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), fnName.c_str()); + } + else + { + std::string str = binary_fn_code_pattern_v3_scalar; + kernelSource = + string_format(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), fnName.c_str()); } - } else { - // do regular - sprintf( programSrc, binary_fn_code_pattern, "", "float", vecSizeNames[ i ], "float", vectorSecondParam ? vecSizeNames[ i ] : "", "float", vecSizeNames[ i ], fnName ); } - const char *ptr = programSrc; - err = create_single_kernel_helper( context, &program[ i ], &kernel[ i ], 1, &ptr, "test_fn" ); - test_error( err, "Unable to create kernel" ); - - if (test_double) + else { - if(i >= kVectorSizeCount) { - if(vectorSecondParam) { - sprintf( programSrc, binary_fn_code_pattern_v3, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable", - "double", "double", "double", fnName ); - } else { - - sprintf( programSrc, binary_fn_code_pattern_v3_scalar, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable", - "double", "double", "double", fnName ); - } - } else { - sprintf( programSrc, binary_fn_code_pattern, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable", - "double", vecSizeNames[ i ], "double", vectorSecondParam ? vecSizeNames[ i ] : "", "double", vecSizeNames[ i ], fnName ); - } - ptr = programSrc; - err = create_single_kernel_helper( context, &program[ kTotalVecCount + i ], &kernel[ kTotalVecCount + i ], 1, &ptr, "test_fn" ); - test_error( err, "Unable to create kernel" ); + // do regular + std::string str = binary_fn_code_pattern; + kernelSource = string_format( + str, pragma_str.c_str(), tname.c_str(), vecSizeNames[i], + tname.c_str(), vecSecParam ? vecSizeNames[i] : "", + tname.c_str(), vecSizeNames[i], fnName.c_str()); } - } + const char* programPtr = kernelSource.c_str(); + err = create_single_kernel_helper(context, &programs[i], &kernels[i], 1, + (const char**)&programPtr, "test_fn"); + test_error(err, "Unable to create kernel"); - for( i = 0; i < kTotalVecCount; i++ ) - { for( j = 0; j < 3; j++ ) { - err = clSetKernelArg( kernel[ i ], j, sizeof( streams[ j ] ), &streams[ j ] ); + err = + clSetKernelArg(kernels[i], j, sizeof(streams[j]), &streams[j]); test_error( err, "Unable to set kernel argument" ); } - threads[0] = (size_t)n_elems; + size_t threads = (size_t)n_elems; - err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL ); + err = clEnqueueNDRangeKernel(queue, kernels[i], 1, NULL, &threads, NULL, + 0, NULL, NULL); test_error( err, "Unable to execute kernel" ); - err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL ); + err = clEnqueueReadBuffer(queue, streams[2], true, 0, + sizeof(T) * num_elements, &output_ptr[0], 0, + NULL, NULL); test_error( err, "Unable to read results" ); - - - if( floatVerifyFn( input_ptr[0], input_ptr[1], output_ptr, n_elems, ((g_arrVecSizes[i])) ) ) + if (verifyFn((T*)&input_ptr[0].front(), (T*)&input_ptr[1].front(), + &output_ptr[0], n_elems, g_arrVecSizes[i], + vecSecParam ? 1 : 0)) { - log_error(" float%d%s test failed\n", ((g_arrVecSizes[i])), vectorSecondParam ? "" : ", float"); + log_error("%s %s%d%s test failed\n", fnName.c_str(), tname.c_str(), + ((g_arrVecSizes[i])), + vecSecParam ? "" : std::string(", " + tname).c_str()); err = -1; } else { - log_info(" float%d%s test passed\n", ((g_arrVecSizes[i])), vectorSecondParam ? "" : ", float"); + log_info("%s %s%d%s test passed\n", fnName.c_str(), tname.c_str(), + ((g_arrVecSizes[i])), + vecSecParam ? "" : std::string(", " + tname).c_str()); err = 0; } if (err) break; } + return err; +} + +namespace { - if (test_double) +//-------------------------------------------------------------------------- +template +int max_verify(const T* const x, const T* const y, const T* const out, + int numElements, int vecSize, int vecParam) +{ + for (int i = 0; i < numElements; i++) { - for( i = 0; i < kTotalVecCount; i++ ) + for (int j = 0; j < vecSize; j++) { - for( j = 0; j < 3; j++ ) + int k = i * vecSize + j; + int l = (k * vecParam + i * (1 - vecParam)); + T v = (conv_to_dbl(x[k]) < conv_to_dbl(y[l])) ? y[l] : x[k]; + if (v != out[k]) { - err = clSetKernelArg( kernel[ kTotalVecCount + i ], j, sizeof( streams[ 3 + j ] ), &streams[ 3 + j ] ); - test_error( err, "Unable to set kernel argument" ); + if (std::is_same::value) + log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. " + "(index %d is " + "vector %d, element %d, for vector size %d)\n", + k, conv_to_flt(x[k]), l, conv_to_flt(y[l]), k, + conv_to_flt(out[k]), v, k, i, j, vecSize); + else + log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. " + "(index %d is " + "vector %d, element %d, for vector size %d)\n", + k, x[k], l, y[l], k, out[k], v, k, i, j, vecSize); + return -1; } + } + } + return 0; +} - threads[0] = (size_t)n_elems; - - err = clEnqueueNDRangeKernel( queue, kernel[kTotalVecCount + i], 1, NULL, threads, NULL, 0, NULL, NULL ); - test_error( err, "Unable to execute kernel" ); - - err = clEnqueueReadBuffer( queue, streams[5], CL_TRUE, 0, sizeof(cl_double)*num_elements, (void *)output_ptr_double, 0, NULL, NULL ); - test_error( err, "Unable to read results" ); - - if( doubleVerifyFn( input_ptr_double[0], input_ptr_double[1], output_ptr_double, n_elems, ((g_arrVecSizes[i])))) - { - log_error(" double%d%s test failed\n", ((g_arrVecSizes[i])), vectorSecondParam ? "" : ", double"); - err = -1; - } - else +//-------------------------------------------------------------------------- +template +int min_verify(const T* const x, const T* const y, const T* const out, + int numElements, int vecSize, int vecParam) +{ + for (int i = 0; i < numElements; i++) + { + for (int j = 0; j < vecSize; j++) + { + int k = i * vecSize + j; + int l = (k * vecParam + i * (1 - vecParam)); + T v = (conv_to_dbl(x[k]) > conv_to_dbl(y[l])) ? y[l] : x[k]; + if (v != out[k]) { - log_info(" double%d%s test passed\n", ((g_arrVecSizes[i])), vectorSecondParam ? "" : ", double"); - err = 0; + if (std::is_same::value) + log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. " + "(index %d is " + "vector %d, element %d, for vector size %d)\n", + k, conv_to_flt(x[k]), l, conv_to_flt(y[l]), k, + conv_to_flt(out[k]), v, k, i, j, vecSize); + else + log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. " + "(index %d is " + "vector %d, element %d, for vector size %d)\n", + k, x[k], l, y[l], k, out[k], v, k, i, j, vecSize); + return -1; } - - if (err) - break; } } + return 0; +} +} - for( i = 0; i < ((test_double) ? 6 : 3); i++ ) +//-------------------------------------------------------------------------- +cl_int MaxTest::Run() +{ + cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) { - clReleaseMemObject(streams[i]); + error = test_binary_fn(device, context, queue, num_elems, + fnName.c_str(), vecParam, + max_verify); + test_error(error, "MaxTest::Run failed"); } - for (i=0; i < ((test_double) ? kTotalVecCount * 2 : kTotalVecCount) ; i++) + + error = test_binary_fn(device, context, queue, num_elems, + fnName.c_str(), vecParam, max_verify); + test_error(error, "MaxTest::Run failed"); + + if (is_extension_available(device, "cl_khr_fp64")) { - clReleaseKernel(kernel[i]); - clReleaseProgram(program[i]); + error = test_binary_fn(device, context, queue, num_elems, + fnName.c_str(), vecParam, + max_verify); + test_error(error, "MaxTest::Run failed"); } - free(input_ptr[0]); - free(input_ptr[1]); - free(output_ptr); - free(program); - free(kernel); - if (test_double) + return error; +} + +//-------------------------------------------------------------------------- +cl_int MinTest::Run() +{ + cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) { - free(input_ptr_double[0]); - free(input_ptr_double[1]); - free(output_ptr_double); + error = test_binary_fn(device, context, queue, num_elems, + fnName.c_str(), vecParam, + min_verify); + test_error(error, "MinTest::Run failed"); } - return err; + error = test_binary_fn(device, context, queue, num_elems, + fnName.c_str(), vecParam, min_verify); + test_error(error, "MinTest::Run failed"); + + if (is_extension_available(device, "cl_khr_fp64")) + { + error = test_binary_fn(device, context, queue, num_elems, + fnName.c_str(), vecParam, + min_verify); + test_error(error, "MinTest::Run failed"); + } + + return error; +} + +//-------------------------------------------------------------------------- +int test_min(cl_device_id device, cl_context context, cl_command_queue queue, + int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems, "min", + true); +} + +//-------------------------------------------------------------------------- +int test_minf(cl_device_id device, cl_context context, cl_command_queue queue, + int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems, "min", + false); +} + +//-------------------------------------------------------------------------- + +int test_fmin(cl_device_id device, cl_context context, cl_command_queue queue, + int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems, "fmin", + true); +} + +//-------------------------------------------------------------------------- + +int test_fminf(cl_device_id device, cl_context context, cl_command_queue queue, + int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems, "fmin", + false); +} + +//-------------------------------------------------------------------------- + +int test_max(cl_device_id device, cl_context context, cl_command_queue queue, + int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems, "max", + true); } +//-------------------------------------------------------------------------- + +int test_maxf(cl_device_id device, cl_context context, cl_command_queue queue, + int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems, "max", + false); +} + +//-------------------------------------------------------------------------- + +int test_fmax(cl_device_id device, cl_context context, cl_command_queue queue, + int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems, "fmax", + true); +} + +//-------------------------------------------------------------------------- + +int test_fmaxf(cl_device_id device, cl_context context, cl_command_queue queue, + int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems, "fmax", + false); +} +//-------------------------------------------------------------------------- diff --git a/test_conformance/commonfns/test_clamp.cpp b/test_conformance/commonfns/test_clamp.cpp index bbb836459..2f66c44cb 100644 --- a/test_conformance/commonfns/test_clamp.cpp +++ b/test_conformance/commonfns/test_clamp.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -13,303 +13,319 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "harness/compat.h" #include #include #include #include +#include + +#include "harness/deviceInfo.h" +#include "harness/typeWrappers.h" #include "procs.h" +#include "test_base.h" + #ifndef M_PI -#define M_PI 3.14159265358979323846264338327950288 +#define M_PI 3.14159265358979323846264338327950288 #endif -#define CLAMP_KERNEL( type ) \ - const char *clamp_##type##_kernel_code = \ - EMIT_PRAGMA_DIRECTIVE \ - "__kernel void test_clamp(__global " #type " *x, __global " #type " *minval, __global " #type " *maxval, __global " #type " *dst)\n" \ - "{\n" \ - " int tid = get_global_id(0);\n" \ - "\n" \ - " dst[tid] = clamp(x[tid], minval[tid], maxval[tid]);\n" \ - "}\n"; - -#define CLAMP_KERNEL_V( type, size) \ - const char *clamp_##type##size##_kernel_code = \ - EMIT_PRAGMA_DIRECTIVE \ - "__kernel void test_clamp(__global " #type #size " *x, __global " #type #size " *minval, __global " #type #size " *maxval, __global " #type #size " *dst)\n" \ - "{\n" \ - " int tid = get_global_id(0);\n" \ - "\n" \ - " dst[tid] = clamp(x[tid], minval[tid], maxval[tid]);\n" \ - "}\n"; - -#define CLAMP_KERNEL_V3( type, size) \ - const char *clamp_##type##size##_kernel_code = \ - EMIT_PRAGMA_DIRECTIVE \ - "__kernel void test_clamp(__global " #type " *x, __global " #type " *minval, __global " #type " *maxval, __global " #type " *dst)\n" \ - "{\n" \ - " int tid = get_global_id(0);\n" \ - "\n" \ - " vstore3(clamp(vload3(tid, x), vload3(tid,minval), vload3(tid,maxval)), tid, dst);\n" \ - "}\n"; +//-------------------------------------------------------------------------- + +#define CLAMP_KERNEL(type) \ + const char *clamp_##type##_kernel_code = EMIT_PRAGMA_DIRECTIVE \ + "__kernel void test_clamp(__global " #type " *x, __global " #type \ + " *minval, __global " #type " *maxval, __global " #type " *dst)\n" \ + "{\n" \ + " int tid = get_global_id(0);\n" \ + "\n" \ + " dst[tid] = clamp(x[tid], minval[tid], maxval[tid]);\n" \ + "}\n"; + +#define CLAMP_KERNEL_V(type, size) \ + const char *clamp_##type##size##_kernel_code = EMIT_PRAGMA_DIRECTIVE \ + "__kernel void test_clamp(__global " #type #size \ + " *x, __global " #type #size " *minval, __global " #type #size \ + " *maxval, __global " #type #size " *dst)\n" \ + "{\n" \ + " int tid = get_global_id(0);\n" \ + "\n" \ + " dst[tid] = clamp(x[tid], minval[tid], maxval[tid]);\n" \ + "}\n"; + +#define CLAMP_KERNEL_V3(type, size) \ + const char *clamp_##type##size##_kernel_code = EMIT_PRAGMA_DIRECTIVE \ + "__kernel void test_clamp(__global " #type " *x, __global " #type \ + " *minval, __global " #type " *maxval, __global " #type " *dst)\n" \ + "{\n" \ + " int tid = get_global_id(0);\n" \ + "\n" \ + " vstore3(clamp(vload3(tid, x), vload3(tid,minval), " \ + "vload3(tid,maxval)), tid, dst);\n" \ + "}\n"; + +//-------------------------------------------------------------------------- +#define EMIT_PRAGMA_DIRECTIVE "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n" +CLAMP_KERNEL(half) +CLAMP_KERNEL_V(half, 2) +CLAMP_KERNEL_V(half, 4) +CLAMP_KERNEL_V(half, 8) +CLAMP_KERNEL_V(half, 16) +CLAMP_KERNEL_V3(half, 3) +#undef EMIT_PRAGMA_DIRECTIVE #define EMIT_PRAGMA_DIRECTIVE " " -CLAMP_KERNEL( float ) -CLAMP_KERNEL_V( float, 2 ) -CLAMP_KERNEL_V( float, 4 ) -CLAMP_KERNEL_V( float, 8 ) -CLAMP_KERNEL_V( float, 16 ) -CLAMP_KERNEL_V3( float, 3) +CLAMP_KERNEL(float) +CLAMP_KERNEL_V(float, 2) +CLAMP_KERNEL_V(float, 4) +CLAMP_KERNEL_V(float, 8) +CLAMP_KERNEL_V(float, 16) +CLAMP_KERNEL_V3(float, 3) #undef EMIT_PRAGMA_DIRECTIVE #define EMIT_PRAGMA_DIRECTIVE "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" -CLAMP_KERNEL( double ) -CLAMP_KERNEL_V( double, 2 ) -CLAMP_KERNEL_V( double, 4 ) -CLAMP_KERNEL_V( double, 8 ) -CLAMP_KERNEL_V( double, 16 ) -CLAMP_KERNEL_V3( double, 3 ) +CLAMP_KERNEL(double) +CLAMP_KERNEL_V(double, 2) +CLAMP_KERNEL_V(double, 4) +CLAMP_KERNEL_V(double, 8) +CLAMP_KERNEL_V(double, 16) +CLAMP_KERNEL_V3(double, 3) #undef EMIT_PRAGMA_DIRECTIVE -const char *clamp_float_codes[] = { clamp_float_kernel_code, clamp_float2_kernel_code, clamp_float4_kernel_code, clamp_float8_kernel_code, clamp_float16_kernel_code, clamp_float3_kernel_code }; -const char *clamp_double_codes[] = { clamp_double_kernel_code, clamp_double2_kernel_code, clamp_double4_kernel_code, clamp_double8_kernel_code, clamp_double16_kernel_code, clamp_double3_kernel_code }; -static int verify_clamp(float *x, float *minval, float *maxval, float *outptr, int n) +const char *clamp_half_codes[] = { + clamp_half_kernel_code, clamp_half2_kernel_code, clamp_half4_kernel_code, + clamp_half8_kernel_code, clamp_half16_kernel_code, clamp_half3_kernel_code +}; +const char *clamp_float_codes[] = { + clamp_float_kernel_code, clamp_float2_kernel_code, + clamp_float4_kernel_code, clamp_float8_kernel_code, + clamp_float16_kernel_code, clamp_float3_kernel_code +}; +const char *clamp_double_codes[] = { + clamp_double_kernel_code, clamp_double2_kernel_code, + clamp_double4_kernel_code, clamp_double8_kernel_code, + clamp_double16_kernel_code, clamp_double3_kernel_code +}; + +namespace { + +//-------------------------------------------------------------------------- +template +int verify_clamp(const T *const x, const T *const minval, const T *const maxval, + const T *const outptr, int n) { - float t; - int i; - - for (i=0; i::value) { - t = fminf( fmaxf( x[ i ], minval[ i ] ), maxval[ i ] ); - if (t != outptr[i]) + float t; + for (int i = 0; i < n; i++) { - log_error( "%d) verification error: clamp( %a, %a, %a) = *%a vs. %a\n", i, x[i], minval[i], maxval[i], t, outptr[i] ); - return -1; + t = std::min( + std::max(cl_half_to_float(x[i]), cl_half_to_float(minval[i])), + cl_half_to_float(maxval[i])); + if (t != cl_half_to_float(outptr[i])) + { + log_error( + "%d) verification error: clamp( %a, %a, %a) = *%a vs. %a\n", + i, cl_half_to_float(x[i]), cl_half_to_float(minval[i]), + cl_half_to_float(maxval[i]), t, + cl_half_to_float(outptr[i])); + return -1; + } } } - - return 0; -} - -static int verify_clamp_double(double *x, double *minval, double *maxval, double *outptr, int n) -{ - double t; - int i; - - for (i=0; i +int test_clamp_fn(cl_device_id device, cl_context context, + cl_command_queue queue, int n_elems) { - cl_mem streams[8]; - cl_float *input_ptr[3], *output_ptr; - cl_double *input_ptr_double[3], *output_ptr_double = NULL; - cl_program *program; - cl_kernel *kernel; - size_t threads[1]; - int num_elements; - int err; - int i, j; + clMemWrapper streams[4]; + std::vector input_ptr[3], output_ptr; + + std::vector programs; + std::vector kernels; + + int err, i, j; MTdata d; - program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount*2); - kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount*2); + assert(BaseFunctionTest::type2name.find(sizeof(T)) + != BaseFunctionTest::type2name.end()); + auto tname = BaseFunctionTest::type2name[sizeof(T)]; - num_elements = n_elems * (1 << (kVectorSizeCount-1)); + programs.resize(kTotalVecCount); + kernels.resize(kTotalVecCount); - int test_double = 0; - if(is_extension_available( device, "cl_khr_fp64" )) { - log_info("Testing doubles.\n"); - test_double = 1; - } + int num_elements = n_elems * (1 << (kVectorSizeCount - 1)); + for (i = 0; i < 3; i++) input_ptr[i].resize(num_elements); + output_ptr.resize(num_elements); - // why does this go from 0 to 2?? -- Oh, I see, there are four function - // arguments to the function, and 3 of them are inputs? - for( i = 0; i < 3; i++ ) + for (i = 0; i < 4; i++) { - input_ptr[i] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - if (test_double) input_ptr_double[i] = (cl_double*)malloc(sizeof(cl_double) * num_elements); + streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(T) * num_elements, NULL, &err); + test_error(err, "clCreateBuffer failed"); } - output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements); - if (test_double) output_ptr_double = (cl_double*)malloc(sizeof(cl_double) * num_elements); - // why does this go from 0 to 3? - for( i = 0; i < 4; i++ ) + d = init_genrand(gRandomSeed); + if (std::is_same::value) { - streams[i] = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[0]) + for (j = 0; j < num_elements; j++) { - log_error("clCreateBuffer failed\n"); - return -1; + input_ptr[0][j] = get_random_float(-0x200000, 0x200000, d); + input_ptr[1][j] = get_random_float(-0x200000, 0x200000, d); + input_ptr[2][j] = get_random_float(input_ptr[1][j], 0x200000, d); } } - if (test_double) - for( i = 4; i < 8; i++ ) + else if (std::is_same::value) + { + for (j = 0; j < num_elements; j++) { - streams[i] = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_double) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } + input_ptr[0][j] = get_random_double(-0x20000000, 0x20000000, d); + input_ptr[1][j] = get_random_double(-0x20000000, 0x20000000, d); + input_ptr[2][j] = get_random_double(input_ptr[1][j], 0x20000000, d); } - - d = init_genrand( gRandomSeed ); - for( j = 0; j < num_elements; j++ ) + } + else if (std::is_same::value) { - input_ptr[0][j] = get_random_float(-0x20000000, 0x20000000, d); - input_ptr[1][j] = get_random_float(-0x20000000, 0x20000000, d); - input_ptr[2][j] = get_random_float(input_ptr[1][j], 0x20000000, d); - - if (test_double) { - input_ptr_double[0][j] = get_random_double(-0x20000000, 0x20000000, d); - input_ptr_double[1][j] = get_random_double(-0x20000000, 0x20000000, d); - input_ptr_double[2][j] = get_random_double(input_ptr_double[1][j], 0x20000000, d); + const float fval = 0x200000; + for (j = 0; j < num_elements; j++) + { + input_ptr[0][j] = conv_to_half(get_random_float(-fval, fval, d)); + input_ptr[1][j] = conv_to_half(get_random_float(-fval, fval, d)); + input_ptr[2][j] = conv_to_half( + get_random_float(conv_to_flt(input_ptr[1][j]), fval, d)); } } - free_mtdata(d); d = NULL; + free_mtdata(d); - for( i = 0; i < 3; i++ ) + for (i = 0; i < 3; i++) { - err = clEnqueueWriteBuffer( queue, streams[ i ], CL_TRUE, 0, sizeof( cl_float ) * num_elements, input_ptr[ i ], 0, NULL, NULL ); - test_error( err, "Unable to write input buffer" ); - - if (test_double) { - err = clEnqueueWriteBuffer( queue, streams[ 4 + i ], CL_TRUE, 0, sizeof( cl_double ) * num_elements, input_ptr_double[ i ], 0, NULL, NULL ); - test_error( err, "Unable to write input buffer" ); - } + err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0, + sizeof(T) * num_elements, + &input_ptr[i].front(), 0, NULL, NULL); + test_error(err, "Unable to write input buffer"); } - for( i = 0; i < kTotalVecCount; i++ ) + for (i = 0; i < kTotalVecCount; i++) { - err = create_single_kernel_helper( context, &program[ i ], &kernel[ i ], 1, &clamp_float_codes[ i ], "test_clamp" ); - test_error( err, "Unable to create kernel" ); - - log_info("Just made a program for float, i=%d, size=%d, in slot %d\n", i, g_arrVecSizes[i], i); - fflush(stdout); + if (std::is_same::value) + { + err = create_single_kernel_helper( + context, &programs[i], &kernels[i], 1, &clamp_float_codes[i], + "test_clamp"); + test_error(err, "Unable to create kernel"); + } + else if (std::is_same::value) + { + err = create_single_kernel_helper( + context, &programs[i], &kernels[i], 1, &clamp_double_codes[i], + "test_clamp"); + test_error(err, "Unable to create kernel"); + } + else if (std::is_same::value) + { + err = create_single_kernel_helper( + context, &programs[i], &kernels[i], 1, &clamp_half_codes[i], + "test_clamp"); + test_error(err, "Unable to create kernel"); + } - if (test_double) { - err = create_single_kernel_helper( context, &program[ kTotalVecCount + i ], &kernel[ kTotalVecCount + i ], 1, &clamp_double_codes[ i ], "test_clamp" ); - log_info("Just made a program for double, i=%d, size=%d, in slot %d\n", i, g_arrVecSizes[i], kTotalVecCount+i); + log_info("Just made a program for %s, i=%d, size=%d, in slot %d\n", + tname.c_str(), i, g_arrVecSizes[i], i); fflush(stdout); - test_error( err, "Unable to create kernel" ); - } - } - for( i = 0; i < kTotalVecCount; i++ ) - { - for( j = 0; j < 4; j++ ) + for (j = 0; j < 4; j++) { - err = clSetKernelArg( kernel[ i ], j, sizeof( streams[ j ] ), &streams[ j ] ); - test_error( err, "Unable to set kernel argument" ); + err = + clSetKernelArg(kernels[i], j, sizeof(streams[j]), &streams[j]); + test_error(err, "Unable to set kernel argument"); } - threads[0] = (size_t)n_elems; + size_t threads = (size_t)n_elems; - err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL ); - test_error( err, "Unable to execute kernel" ); + err = clEnqueueNDRangeKernel(queue, kernels[i], 1, NULL, &threads, NULL, + 0, NULL, NULL); + test_error(err, "Unable to execute kernel"); - err = clEnqueueReadBuffer( queue, streams[3], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL ); - test_error( err, "Unable to read results" ); + err = clEnqueueReadBuffer(queue, streams[3], true, 0, + sizeof(T) * num_elements, &output_ptr[0], 0, + NULL, NULL); + test_error(err, "Unable to read results"); - if (verify_clamp(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems*((g_arrVecSizes[i])))) + if (verify_clamp((T *)&input_ptr[0].front(), + (T *)&input_ptr[1].front(), + (T *)&input_ptr[2].front(), (T *)&output_ptr[0], + n_elems * ((g_arrVecSizes[i])))) { - log_error("CLAMP float%d test failed\n", ((g_arrVecSizes[i]))); + log_error("CLAMP %s%d test failed\n", tname.c_str(), + ((g_arrVecSizes[i]))); err = -1; } else { - log_info("CLAMP float%d test passed\n", ((g_arrVecSizes[i]))); + log_info("CLAMP %s%d test passed\n", tname.c_str(), + ((g_arrVecSizes[i]))); err = 0; } - - - if (err) - break; + if (err) break; } - // If the device supports double precision then test that - if (test_double) - { - for( ; i < 2*kTotalVecCount; i++ ) - { - - log_info("Start of test_double loop, i is %d\n", i); - for( j = 0; j < 4; j++ ) - { - err = clSetKernelArg( kernel[i], j, sizeof( streams[j+4] ), &streams[j+4] ); - test_error( err, "Unable to set kernel argument" ); - } - - threads[0] = (size_t)n_elems; - - err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL ); - test_error( err, "Unable to execute kernel" ); - - err = clEnqueueReadBuffer( queue, streams[7], CL_TRUE, 0, sizeof(cl_double)*num_elements, (void *)output_ptr_double, 0, NULL, NULL ); - test_error( err, "Unable to read results" ); + return err; +} - if (verify_clamp_double(input_ptr_double[0], input_ptr_double[1], input_ptr_double[2], output_ptr_double, n_elems*g_arrVecSizes[(i-kTotalVecCount)])) - { - log_error("CLAMP double%d test failed\n", g_arrVecSizes[(i-kTotalVecCount)]); - err = -1; - } - else - { - log_info("CLAMP double%d test passed\n", g_arrVecSizes[(i-kTotalVecCount)]); - err = 0; - } +//-------------------------------------------------------------------------- - if (err) - break; - } +cl_int ClampTest::Run() +{ + cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_clamp_fn(device, context, queue, num_elems); + test_error(error, "ClampTest::Run failed"); } + error = test_clamp_fn(device, context, queue, num_elems); + test_error(error, "ClampTest::Run failed"); - for( i = 0; i < ((test_double) ? 8 : 4); i++ ) - { - clReleaseMemObject(streams[i]); - } - for (i=0; i < ((test_double) ? kTotalVecCount * 2-1 : kTotalVecCount); i++) + if (is_extension_available(device, "cl_khr_fp64")) { - clReleaseKernel(kernel[i]); - clReleaseProgram(program[i]); - } - free(input_ptr[0]); - free(input_ptr[1]); - free(input_ptr[2]); - free(output_ptr); - free(program); - free(kernel); - if (test_double) { - free(input_ptr_double[0]); - free(input_ptr_double[1]); - free(input_ptr_double[2]); - free(output_ptr_double); + error = test_clamp_fn(device, context, queue, num_elems); + test_error(error, "ClampTest::Run failed"); } - return err; + return error; } +//-------------------------------------------------------------------------- + +int test_clamp(cl_device_id device, cl_context context, cl_command_queue queue, + int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems); +} +//-------------------------------------------------------------------------- diff --git a/test_conformance/commonfns/test_degrees.cpp b/test_conformance/commonfns/test_degrees.cpp deleted file mode 100644 index 17311ba83..000000000 --- a/test_conformance/commonfns/test_degrees.cpp +++ /dev/null @@ -1,470 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include - -#include "procs.h" - -#ifndef M_PI -#define M_PI 3.14159265358979323846264338327950288 -#endif - -static int test_degrees_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems); - - -const char *degrees_kernel_code = -"__kernel void test_degrees(__global float *src, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = degrees(src[tid]);\n" -"}\n"; - -const char *degrees2_kernel_code = -"__kernel void test_degrees2(__global float2 *src, __global float2 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = degrees(src[tid]);\n" -"}\n"; - -const char *degrees4_kernel_code = -"__kernel void test_degrees4(__global float4 *src, __global float4 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = degrees(src[tid]);\n" -"}\n"; - -const char *degrees8_kernel_code = -"__kernel void test_degrees8(__global float8 *src, __global float8 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = degrees(src[tid]);\n" -"}\n"; - -const char *degrees16_kernel_code = -"__kernel void test_degrees16(__global float16 *src, __global float16 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = degrees(src[tid]);\n" -"}\n"; - -const char *degrees3_kernel_code = -"__kernel void test_degrees3(__global float *src, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" vstore3(degrees(vload3(tid,src)),tid,dst);\n" -"}\n"; - - -#define MAX_ERR 2.0f - -static int -verify_degrees(float *inptr, float *outptr, int n) -{ - float error, max_error = 0.0f; - double r, max_val = NAN; - int i, j, max_index = 0; - - for (i=0,j=0; i max_error) - { - max_error = error; - max_index = i; - max_val = r; - if( fabsf(error) > MAX_ERR) - { - log_error( "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error ); - return 1; - } - } - } - - log_info( "degrees: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] ); - - return 0; -} - -int -test_degrees(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[2]; - cl_float *input_ptr[1], *output_ptr, *p; - cl_program *program; - cl_kernel *kernel; - size_t threads[1]; - int num_elements; - int err; - int i; - MTdata d; - - program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount); - kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount); - - num_elements = n_elems * (1 << (kTotalVecCount-1)); - - input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - p = input_ptr[0]; - d = init_genrand( gRandomSeed ); - for (i=0; i max_error) - { - max_error = error; - max_index = i; - max_val = r; - if( fabsf(error) > MAX_ERR) - { - log_error( "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error ); - return 1; - } - } - } - - log_info( "degreesd: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] ); - - return 0; -} - -static int -test_degrees_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[2]; - cl_double *input_ptr[1], *output_ptr, *p; - cl_program *program; - cl_kernel *kernel; - size_t threads[1]; - int num_elements; - int err; - int i; - MTdata d; - - program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount); - kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount); - - // TODO: line below is clearly wrong - num_elements = n_elems * (1 << (kTotalVecCount-1)); - - input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements); - output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_double) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_double) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - p = input_ptr[0]; - d = init_genrand( gRandomSeed ); - for (i=0; i -#include -#include -#include - -#include "procs.h" - -static const char *fmax_kernel_code = - "__kernel void test_fmax(__global float *srcA, __global float *srcB, __global float *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = fmax(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmax2_kernel_code = - "__kernel void test_fmax2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = fmax(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmax4_kernel_code = - "__kernel void test_fmax4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = fmax(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmax8_kernel_code = - "__kernel void test_fmax8(__global float8 *srcA, __global float8 *srcB, __global float8 *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = fmax(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmax16_kernel_code = - "__kernel void test_fmax16(__global float16 *srcA, __global float16 *srcB, __global float16 *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = fmax(srcA[tid], srcB[tid]);\n" - "}\n"; - - -static const char *fmax3_kernel_code = - "__kernel void test_fmax3(__global float *srcA, __global float *srcB, __global float *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " vstore3(fmax(vload3(tid,srcA), vload3(tid,srcB)),tid,dst);\n" - "}\n"; - -static int -verify_fmax(float *inptrA, float *inptrB, float *outptr, int n) -{ - float r; - int i; - - for (i=0; i= inptrB[i]) ? inptrA[i] : inptrB[i]; - if (r != outptr[i]) - return -1; - } - - return 0; -} - -int -test_fmax(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[3]; - cl_float *input_ptr[2], *output_ptr, *p; - cl_program *program; - cl_kernel *kernel; - size_t threads[1]; - int num_elements; - int err; - int i; - MTdata d; - - program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount); - kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount); - - num_elements = n_elems * (1 << (kTotalVecCount-1)); - - input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[2]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - d = init_genrand( gRandomSeed ); - p = input_ptr[0]; - for (i=0; i -#include -#include -#include - -#include "procs.h" - -static const char *fmax_kernel_code = - "__kernel void test_fmax(__global float *srcA, __global float *srcB, __global float *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = fmax(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmax2_kernel_code = - "__kernel void test_fmax2(__global float2 *srcA, __global float *srcB, __global float2 *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = fmax(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmax4_kernel_code = - "__kernel void test_fmax4(__global float4 *srcA, __global float *srcB, __global float4 *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = fmax(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmax8_kernel_code = - "__kernel void test_fmax8(__global float8 *srcA, __global float *srcB, __global float8 *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = fmax(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmax16_kernel_code = - "__kernel void test_fmax16(__global float16 *srcA, __global float *srcB, __global float16 *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = fmax(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmax3_kernel_code = - "__kernel void test_fmax3(__global float *srcA, __global float *srcB, __global float *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " vstore3(fmax(vload3(tid,srcA), srcB[tid]),tid,dst);\n" - "}\n"; - -static int -verify_fmax(float *inptrA, float *inptrB, float *outptr, int n, int veclen) -{ - float r; - int i, j; - - for (i=0; i= inptrB[ii]) ? inptrA[i] : inptrB[ii]; - if (r != outptr[i]) { - log_info("Verify noted discrepancy at %d (of %d) (vec %d, pos %d)\n", - i,n,ii,j); - log_info("SHould be %f, is %f\n", r, outptr[i]); - log_info("Taking max of (%f,%f)\n", inptrA[i], inptrB[i]); - return -1; - } - } - } - - return 0; -} - -int -test_fmaxf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[3]; - cl_float *input_ptr[2], *output_ptr, *p; - cl_program *program; - cl_kernel *kernel; - size_t threads[1]; - int num_elements; - int err; - int i; - MTdata d; - - program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount); - kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount); - - num_elements = n_elems * (1 << (kTotalVecCount-1)); - - input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[1] = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[2] = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[2]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - d = init_genrand( gRandomSeed ); - p = input_ptr[0]; - for (i=0; i -#include -#include -#include - -#include "procs.h" - -static const char *fmin_kernel_code = - "__kernel void test_fmin(__global float *srcA, __global float *srcB, __global float *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - "\n" - " dst[tid] = fmin(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmin2_kernel_code = - "__kernel void test_fmin2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - "\n" - " dst[tid] = fmin(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmin4_kernel_code = - "__kernel void test_fmin4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - "\n" - " dst[tid] = fmin(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmin8_kernel_code = - "__kernel void test_fmin8(__global float8 *srcA, __global float8 *srcB, __global float8 *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - "\n" - " dst[tid] = fmin(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmin16_kernel_code = - "__kernel void test_fmin16(__global float16 *srcA, __global float16 *srcB, __global float16 *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - "\n" - " dst[tid] = fmin(srcA[tid], srcB[tid]);\n" - "}\n"; - - -static const char *fmin3_kernel_code = - "__kernel void test_fmin3(__global float *srcA, __global float *srcB, __global float *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " vstore3(fmin(vload3(tid,srcA), vload3(tid,srcB)),tid,dst);\n" - "}\n"; - -int -verify_fmin(float *inptrA, float *inptrB, float *outptr, int n) -{ - float r; - int i; - - for (i=0; i inptrB[i]) ? inptrB[i] : inptrA[i]; - if (r != outptr[i]) - return -1; - } - - return 0; -} - -int -test_fmin(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[3]; - cl_float *input_ptr[2], *output_ptr, *p; - cl_program *program; - cl_kernel *kernel; - size_t threads[1]; - int num_elements; - int err; - int i; - MTdata d; - - program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount); - kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount); - - num_elements = n_elems * (1 << (kTotalVecCount-1));; - - input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[2]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - d = init_genrand( gRandomSeed ); - p = input_ptr[0]; - for (i=0; i -#include -#include -#include - -#include "procs.h" - -static const char *fmin_kernel_code = - "__kernel void test_fmin(__global float *srcA, __global float *srcB, __global float *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = fmin(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmin2_kernel_code = - "__kernel void test_fmin2(__global float2 *srcA, __global float *srcB, __global float2 *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = fmin(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmin4_kernel_code = - "__kernel void test_fmin4(__global float4 *srcA, __global float *srcB, __global float4 *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = fmin(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmin8_kernel_code = - "__kernel void test_fmin8(__global float8 *srcA, __global float *srcB, __global float8 *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = fmin(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmin16_kernel_code = - "__kernel void test_fmin16(__global float16 *srcA, __global float *srcB, __global float16 *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = fmin(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmin3_kernel_code = - "__kernel void test_fmin3(__global float *srcA, __global float *srcB, __global float *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " vstore3(fmin(vload3(tid,srcA), srcB[tid]),tid,dst);\n" - "}\n"; - -static int -verify_fmin(float *inptrA, float *inptrB, float *outptr, int n, int veclen) -{ - float r; - int i, j; - - for (i=0; i inptrB[ii]) ? inptrB[ii] : inptrA[i]; - if (r != outptr[i]) - return -1; - } - } - - return 0; -} - -int -test_fminf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[3]; - cl_float *input_ptr[2], *output_ptr, *p; - cl_program *program; - cl_kernel *kernel; - size_t threads[1]; - int num_elements; - int err; - int i; - MTdata d; - - program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount); - kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount); - - num_elements = n_elems * (1 << (kTotalVecCount-1)); - - input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[2]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - d = init_genrand( gRandomSeed ); - p = input_ptr[0]; - for (i=0; i -#include -#include -#include - -#include "procs.h" - -static int max_verify_float( float *x, float *y, float *out, int numElements, int vecSize ) -{ - for( int i = 0; i < numElements * vecSize; i++ ) - { - float v = ( x[ i ] < y[ i ] ) ? y[ i ] : x[ i ]; - if( v != out[ i ] ) - { - log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is vector %d, element %d, for vector size %d)\n", - i, x[i], i, y[i], i, out[i], v, i, i/vecSize, i%vecSize, vecSize); - return -1; - } - } - return 0; -} - -static int max_verify_double( double *x, double *y, double *out, int numElements, int vecSize ) -{ - for( int i = 0; i < numElements * vecSize; i++ ) - { - double v = ( x[ i ] < y[ i ] ) ? y[ i ] : x[ i ]; - if( v != out[ i ] ) - { - log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is vector %d, element %d, for vector size %d)\n", - i, x[i], i, y[i], i, out[i], v, i, i/vecSize, i%vecSize, vecSize); - return -1; - } - } - return 0; -} - -int test_max(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - return test_binary_fn( device, context, queue, n_elems, "max", true, max_verify_float, max_verify_double ); -} - - diff --git a/test_conformance/commonfns/test_maxf.cpp b/test_conformance/commonfns/test_maxf.cpp deleted file mode 100644 index f96df7ea9..000000000 --- a/test_conformance/commonfns/test_maxf.cpp +++ /dev/null @@ -1,64 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include - -#include "procs.h" - -static int max_verify_float( float *x, float *y, float *out, int numElements, int vecSize ) -{ - for( int i = 0; i < numElements; i++ ) - { - for( int j = 0; j < vecSize; j++ ) - { - float v = ( x[ i * vecSize + j ] < y[ i ] ) ? y[ i ] : x[ i * vecSize + j ]; - if( v != out[ i * vecSize + j ] ) - { - log_error( "Failure for vector size %d at position %d, element %d:\n\t max(%a, %a) = *%a vs %a\n", vecSize, i, j, x[ i * vecSize + j ], y[i], v, out[ i * vecSize + j ] ); - return -1; - } - } - } - return 0; -} - -static int max_verify_double( double *x, double *y, double *out, int numElements, int vecSize ) -{ - for( int i = 0; i < numElements; i++ ) - { - for( int j = 0; j < vecSize; j++ ) - { - double v = ( x[ i * vecSize + j ] < y[ i ] ) ? y[ i ] : x[ i * vecSize + j ]; - if( v != out[ i * vecSize + j ] ) - { - log_error( "Failure for vector size %d at position %d, element %d:\n\t max(%a, %a) = *%a vs %a\n", vecSize, i, j, x[ i * vecSize + j ], y[i], v, out[ i * vecSize + j ] ); - return -1; - } - } - } - return 0; -} - -int test_maxf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - return test_binary_fn( device, context, queue, n_elems, "max", false, max_verify_float, max_verify_double ); -} - - diff --git a/test_conformance/commonfns/test_min.cpp b/test_conformance/commonfns/test_min.cpp deleted file mode 100644 index 707e24b66..000000000 --- a/test_conformance/commonfns/test_min.cpp +++ /dev/null @@ -1,56 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include - -#include "procs.h" - -static int min_verify_float( float *x, float *y, float *out, int numElements, int vecSize ) -{ - for( int i = 0; i < numElements * vecSize; i++ ) - { - float v = ( y[ i ] < x[ i ] ) ? y[ i ] : x[ i ]; - if( v != out[ i ] ) { - log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is vector %d, element %d, for vector size %d)\n", i, x[i], i, y[i], i, out[i], v, i, i/vecSize, i%vecSize, vecSize); - return -1; - } - } - return 0; -} - -static int min_verify_double( double *x, double *y, double *out, int numElements, int vecSize ) -{ - for( int i = 0; i < numElements * vecSize; i++ ) - { - double v = ( y[ i ] < x[ i ] ) ? y[ i ] : x[ i ]; - if( v != out[ i ] ) { - log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is vector %d, element %d, for vector size %d)\n", i, x[i], i, y[i], i, out[i], v, i, i/vecSize, i%vecSize, vecSize); - return -1; - } - } - return 0; -} - -int test_min(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - return test_binary_fn( device, context, queue, n_elems, "min", true, min_verify_float, min_verify_double ); -} - - diff --git a/test_conformance/commonfns/test_minf.cpp b/test_conformance/commonfns/test_minf.cpp deleted file mode 100644 index 71b1fbe0a..000000000 --- a/test_conformance/commonfns/test_minf.cpp +++ /dev/null @@ -1,70 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include - -#include "procs.h" -#include "harness/errorHelpers.h" - -static int min_verify_float( float *x, float *y, float *out, int numElements, int vecSize ) -{ - for( int i = 0; i < numElements; i++ ) - { - for( int j = 0; j < vecSize; j++ ) - { - float v = ( y[ i ] < x[ i * vecSize + j ] ) ? y[ i ] : x[ i * vecSize + j ]; - if( v != out[ i * vecSize + j ] ) - { - log_error( "Failure for vector size %d at position %d, element %d:\n\t min(%a, %a) = *%a vs %a\n", vecSize, i, j, x[ i * vecSize + j ], y[i], v, out[ i * vecSize + j ] ); - return -1; - } - } - } - return 0; -} - -static int min_verify_double( double *x, double *y, double *out, int numElements, int vecSize ) -{ - int maxFail = 1; - int numFails = 0; - for( int i = 0; i < numElements; i++ ) - { - for( int j = 0; j < vecSize; j++ ) - { - double v = ( y[ i ] < x[ i * vecSize + j ] ) ? y[ i ] : x[ i * vecSize + j ]; - if( v != out[ i * vecSize + j ] ) - { - log_error( "Failure for vector size %d at position %d, element %d:\n\t min(%a, %a) = *%a vs %a\n", vecSize, i, j, x[ i * vecSize + j ], y[i], v, out[ i * vecSize + j ] ); - ++numFails; - if(numFails >= maxFail) { - return -1; - } - } - } - } - return 0; -} - -int test_minf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - return test_binary_fn( device, context, queue, n_elems, "min", false, min_verify_float, min_verify_double ); -} - - diff --git a/test_conformance/commonfns/test_mix.cpp b/test_conformance/commonfns/test_mix.cpp index 88f382d3f..32b48613c 100644 --- a/test_conformance/commonfns/test_mix.cpp +++ b/test_conformance/commonfns/test_mix.cpp @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2023 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -13,181 +13,309 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "harness/compat.h" - #include #include #include #include #include "procs.h" +#include "test_base.h" + +//-------------------------------------------------------------------------- + +const char *mix_fn_code_pattern = + "%s\n" /* optional pragma */ + "__kernel void test_fn(__global %s%s *x, __global %s%s *y, __global %s%s " + "*a, __global %s%s *dst)\n" + "{\n" + " int tid = get_global_id(0);\n" + " dst[tid] = mix(x[tid], y[tid], a[tid]);\n" + "}\n"; -const char *mix_kernel_code = -"__kernel void test_mix(__global float *srcA, __global float *srcB, __global float *srcC, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = mix(srcA[tid], srcB[tid], srcC[tid]);\n" -"}\n"; +const char *mix_fn_code_pattern_v3 = + "%s\n" /* optional pragma */ + "__kernel void test_fn(__global %s *x, __global %s *y, __global %s *a, " + "__global %s *dst)\n" + "{\n" + " int tid = get_global_id(0);\n" + "\n" + " vstore3(mix(vload3(tid, x), vload3(tid, y), vload3(tid, a)), tid, " + "dst);\n" + "}\n"; + +const char *mix_fn_code_pattern_v3_scalar = + "%s\n" /* optional pragma */ + "__kernel void test_fn(__global %s *x, __global %s *y, __global %s *a, " + "__global %s *dst)\n" + "{\n" + " int tid = get_global_id(0);\n" + "\n" + " vstore3(mix(vload3(tid, x), vload3(tid, y), a[tid]), tid, dst);\n" + "}\n"; + +//-------------------------------------------------------------------------- #define MAX_ERR 1e-3 -float -verify_mix(float *inptrA, float *inptrB, float *inptrC, float *outptr, int n) +namespace { + +//-------------------------------------------------------------------------- +template +int verify_mix(const T *const inptrX, const T *const inptrY, + const T *const inptrA, const T *const outptr, const int n, + const int veclen, const bool vecParam) { - float r, delta, max_err = 0.0f; - int i; + double r; + float delta = 0.0f; + int i; - for (i=0; i max_err) max_err = delta; + for (i = 0; i < n * veclen; i++) + { + r = conv_to_dbl(inptrX[i]) + + ((conv_to_dbl(inptrY[i]) - conv_to_dbl(inptrX[i])) + * conv_to_dbl(inptrA[i])); + delta = fabs(double(r - conv_to_dbl(outptr[i]))) / r; + if (delta > MAX_ERR) + { + if (std::is_same::value) + log_error("%d) verification error: mix(%a, %a, %a) = *%a " + "vs. %a\n", + i, conv_to_flt(inptrX[i]), conv_to_flt(inptrY[i]), + conv_to_flt(inptrA[i]), r, + conv_to_flt(outptr[i])); + else + log_error("%d) verification error: mix(%a, %a, %a) = *%a " + "vs. %a\n", + i, inptrX[i], inptrY[i], inptrA[i], r, outptr[i]); + return -1; + } + } } - return max_err; + else + { + for (int i = 0; i < n; ++i) + { + int ii = i / veclen; + int vi = i * veclen; + for (int j = 0; j < veclen; ++j, ++vi) + { + r = conv_to_dbl(inptrX[vi]) + + ((conv_to_dbl(inptrY[vi]) - conv_to_dbl(inptrX[vi])) + * conv_to_dbl(inptrA[i])); + delta = fabs(double(r - conv_to_dbl(outptr[vi]))) / r; + if (delta > MAX_ERR) + { + if (std::is_same::value) + log_error( + "{%d, element %d}) verification error: mix(%a, " + "%a, %a) = *%a vs. %a\n", + ii, j, conv_to_flt(inptrX[vi]), + conv_to_flt(inptrY[vi]), conv_to_flt(inptrA[i]), r, + conv_to_flt(outptr[vi])); + else + log_error( + "{%d, element %d}) verification error: mix(%a, " + "%a, %a) = *%a vs. %a\n", + ii, j, inptrX[vi], inptrY[vi], inptrA[i], r, + outptr[vi]); + return -1; + } + } + } + } + + return 0; } +} // namespace -int -test_mix(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) +//-------------------------------------------------------------------------- +template +int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, + int n_elems, bool vecParam) { - cl_mem streams[4]; - cl_float *input_ptr[3], *output_ptr, *p; - cl_program program; - cl_kernel kernel; - size_t lengths[1]; - size_t threads[1]; - float max_err; - int err; - int i; - MTdata d; - - input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - input_ptr[2] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[2]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } + clMemWrapper streams[4]; + std::vector input_ptr[3], output_ptr; - streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[3]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } + std::vector programs; + std::vector kernels; - p = input_ptr[0]; - d = init_genrand( gRandomSeed ); - for (i=0; i::value) { - p[i] = (float) genrand_real1(d); + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; } - free_mtdata(d); d = NULL; - err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL ); - if (err != CL_SUCCESS) + d = init_genrand(gRandomSeed); + if (std::is_same::value) { - log_error("clWriteArray failed\n"); - return -1; + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; + for (i = 0; i < num_elements; i++) + { + input_ptr[0][i] = conv_to_half((float)genrand_real1(d)); + input_ptr[1][i] = conv_to_half((float)genrand_real1(d)); + input_ptr[2][i] = conv_to_half((float)genrand_real1(d)); + } } - err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL ); - if (err != CL_SUCCESS) + else { - log_error("clWriteArray failed\n"); - return -1; + for (i = 0; i < num_elements; i++) + { + input_ptr[0][i] = (T)genrand_real1(d); + input_ptr[1][i] = (T)genrand_real1(d); + input_ptr[2][i] = (T)genrand_real1(d); + } } - err = clEnqueueWriteBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[2], 0, NULL, NULL ); - if (err != CL_SUCCESS) + free_mtdata(d); + + for (i = 0; i < 3; i++) { - log_error("clWriteArray failed\n"); - return -1; + err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0, + sizeof(T) * num_elements, + &input_ptr[i].front(), 0, NULL, NULL); + test_error(err, "Unable to write input buffer"); } - lengths[0] = strlen(mix_kernel_code); - err = create_single_kernel_helper( context, &program, &kernel, 1, &mix_kernel_code, "test_mix" ); - test_error( err, "Unable to create test kernel" ); + char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" }; - err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] ); - err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] ); - err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2] ); - err |= clSetKernelArg(kernel, 3, sizeof streams[3], &streams[3] ); - if (err != CL_SUCCESS) + for (i = 0; i < kTotalVecCount; i++) { - log_error("clSetKernelArgs failed\n"); - return -1; - } + std::string kernelSource; + if (i >= kVectorSizeCount) + { + if (vecParam) + { + std::string str = mix_fn_code_pattern_v3; + kernelSource = + string_format(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), tname.c_str()); + } + else + { + std::string str = mix_fn_code_pattern_v3_scalar; + kernelSource = + string_format(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), tname.c_str()); + } + } + else + { + // regular path + std::string str = mix_fn_code_pattern; + kernelSource = + string_format(str, pragma_str.c_str(), tname.c_str(), + vecSizeNames[i], tname.c_str(), vecSizeNames[i], + tname.c_str(), vecParam ? vecSizeNames[i] : "", + tname.c_str(), vecSizeNames[i]); + } + const char *programPtr = kernelSource.c_str(); + err = + create_single_kernel_helper(context, &programs[i], &kernels[i], 1, + (const char **)&programPtr, "test_fn"); + test_error(err, "Unable to create kernel"); - threads[0] = (size_t)num_elements; - err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL ); - if (err != CL_SUCCESS) - { - log_error("clEnqueueNDRangeKernel failed\n"); - return -1; - } + for (int j = 0; j < 4; j++) + { + err = + clSetKernelArg(kernels[i], j, sizeof(streams[j]), &streams[j]); + test_error(err, "Unable to set kernel argument"); + } - err = clEnqueueReadBuffer( queue, streams[3], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL ); - if (err != CL_SUCCESS) - { - log_error("clEnqueueReadBuffer failed\n"); - return -1; + size_t threads = (size_t)n_elems; + + err = clEnqueueNDRangeKernel(queue, kernels[i], 1, NULL, &threads, NULL, + 0, NULL, NULL); + test_error(err, "Unable to execute kernel"); + + err = clEnqueueReadBuffer(queue, streams[3], true, 0, + sizeof(T) * num_elements, &output_ptr[0], 0, + NULL, NULL); + test_error(err, "Unable to read results"); + + if (verify_mix(&input_ptr[0].front(), &input_ptr[1].front(), + &input_ptr[2].front(), &output_ptr.front(), n_elems, + g_arrVecSizes[i], vecParam)) + { + log_error("mix %s%d%s test failed\n", tname.c_str(), + ((g_arrVecSizes[i])), + vecParam ? "" : std::string(", " + tname).c_str()); + err = -1; + } + else + { + log_info("mix %s%d%s test passed\n", tname.c_str(), + ((g_arrVecSizes[i])), + vecParam ? "" : std::string(", " + tname).c_str()); + err = 0; + } + + if (err) break; } - max_err = verify_mix(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements); - if (max_err > MAX_ERR) + return err; +} + +//-------------------------------------------------------------------------- +cl_int MixTest::Run() +{ + cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) { - log_error("MIX test failed %g max err\n", max_err); - err = -1; + error = test_mix_fn(device, context, queue, num_elems, vecParam); + test_error(error, "MixTest::Run failed"); } - else + + error = test_mix_fn(device, context, queue, num_elems, vecParam); + test_error(error, "MixTest::Run failed"); + + if (is_extension_available(device, "cl_khr_fp64")) { - log_info("MIX test passed %g max err\n", max_err); - err = 0; + error = + test_mix_fn(device, context, queue, num_elems, vecParam); + test_error(error, "MixTest::Run failed"); } - clReleaseMemObject(streams[0]); - clReleaseMemObject(streams[1]); - clReleaseMemObject(streams[2]); - clReleaseMemObject(streams[3]); - clReleaseKernel(kernel); - clReleaseProgram(program); - free(input_ptr[0]); - free(input_ptr[1]); - free(input_ptr[2]); - free(output_ptr); - - return err; + return error; } +//-------------------------------------------------------------------------- +int test_mix(cl_device_id device, cl_context context, cl_command_queue queue, + int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems, "mix", + true); +} +//-------------------------------------------------------------------------- +int test_mixf(cl_device_id device, cl_context context, cl_command_queue queue, + int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems, "mix", + false); +} - - +//-------------------------------------------------------------------------- diff --git a/test_conformance/commonfns/test_radians.cpp b/test_conformance/commonfns/test_radians.cpp deleted file mode 100644 index 2eb0500f2..000000000 --- a/test_conformance/commonfns/test_radians.cpp +++ /dev/null @@ -1,468 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include - -#include "procs.h" - -#ifndef M_PI -#define M_PI 3.14159265358979323846264338327950288 -#endif - -static int test_radians_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems); - - -const char *radians_kernel_code = -"__kernel void test_radians(__global float *src, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = radians(src[tid]);\n" -"}\n"; - -const char *radians2_kernel_code = -"__kernel void test_radians2(__global float2 *src, __global float2 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = radians(src[tid]);\n" -"}\n"; - -const char *radians4_kernel_code = -"__kernel void test_radians4(__global float4 *src, __global float4 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = radians(src[tid]);\n" -"}\n"; - -const char *radians8_kernel_code = -"__kernel void test_radians8(__global float8 *src, __global float8 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = radians(src[tid]);\n" -"}\n"; - -const char *radians16_kernel_code = -"__kernel void test_radians16(__global float16 *src, __global float16 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = radians(src[tid]);\n" -"}\n"; - -const char *radians3_kernel_code = -"__kernel void test_radians3(__global float *src, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" vstore3(radians(vload3(tid,src)),tid,dst);\n" -"}\n"; - - -#define MAX_ERR 2.0f - -static float -verify_radians(float *inptr, float *outptr, int n) -{ - float error, max_error = 0.0f; - double r, max_val = NAN; - int i, j, max_index = 0; - - for (i=0,j=0; i max_error) - { - max_error = error; - max_index = i; - max_val = r; - if( fabsf(error) > MAX_ERR) - { - log_error( "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error ); - return 1; - } - } - } - - log_info( "radians: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] ); - - return 0; -} - - -int -test_radians(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[2]; - cl_float *input_ptr[1], *output_ptr, *p; - cl_program *program; - cl_kernel *kernel; - size_t threads[1]; - int num_elements; - int err; - int i; - MTdata d; - - program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount); - kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount); - - num_elements = n_elems * (1 << (kTotalVecCount-1)); - - input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - p = input_ptr[0]; - d = init_genrand( gRandomSeed ); - for (i=0; i max_error) - { - max_error = error; - max_index = i; - max_val = r; - if( fabsf(error) > MAX_ERR) - { - log_error( "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error ); - return 1; - } - } - } - - log_info( "radiansd: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] ); - - return 0; -} - - -int -test_radians_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[2]; - cl_double *input_ptr[1], *output_ptr, *p; - cl_program *program; - cl_kernel *kernel; - size_t threads[1]; - int num_elements; - int err; - int i; - MTdata d; - - - program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount); - kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount); - - //TODO: line below is clearly wrong - num_elements = n_elems * (1 << (kTotalVecCount-1)); - - input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements); - output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_double) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_double) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - p = input_ptr[0]; - d = init_genrand( gRandomSeed ); - for (i=0; i -#include -#include -#include - -#include "procs.h" - -static int -test_sign_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems); - - -const char *sign_kernel_code = -"__kernel void test_sign(__global float *src, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = sign(src[tid]);\n" -"}\n"; - -const char *sign2_kernel_code = -"__kernel void test_sign2(__global float2 *src, __global float2 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = sign(src[tid]);\n" -"}\n"; - -const char *sign4_kernel_code = -"__kernel void test_sign4(__global float4 *src, __global float4 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = sign(src[tid]);\n" -"}\n"; - -const char *sign8_kernel_code = -"__kernel void test_sign8(__global float8 *src, __global float8 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = sign(src[tid]);\n" -"}\n"; - -const char *sign16_kernel_code = -"__kernel void test_sign16(__global float16 *src, __global float16 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = sign(src[tid]);\n" -"}\n"; - -const char *sign3_kernel_code = -"__kernel void test_sign3(__global float *src, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" vstore3(sign(vload3(tid,src)), tid, dst);\n" -"}\n"; - - - -static int -verify_sign(float *inptr, float *outptr, int n) -{ - float r; - int i; - - for (i=0; i 0.0f) - r = 1.0f; - else if (inptr[i] < 0.0f) - r = -1.0f; - else - r = 0.0f; - if (r != outptr[i]) - return -1; - } - - return 0; -} - -static const char *fn_names[] = { "SIGN float", "SIGN float2", "SIGN float4", "SIGN float8", "SIGN float16", "SIGN float3" }; - -int -test_sign(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[2]; - cl_float *input_ptr[1], *output_ptr, *p; - cl_program program[kTotalVecCount]; - cl_kernel kernel[kTotalVecCount]; - size_t threads[1]; - int num_elements; - int err; - int i; - MTdata d; - - num_elements = n_elems * 16; - - input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - d = init_genrand( gRandomSeed ); - p = input_ptr[0]; - for (i=0; i 0.0) - r = 1.0; - else if (inptr[i] < 0.0) - r = -1.0; - else - r = 0.0f; - if (r != outptr[i]) - return -1; - } - - return 0; -} - -static const char *fn_names_double[] = { "SIGN double", "SIGN double2", "SIGN double4", "SIGN double8", "SIGN double16", "SIGN double3" }; - -int -test_sign_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[2]; - cl_double *input_ptr[1], *output_ptr, *p; - cl_program program[kTotalVecCount]; - cl_kernel kernel[kTotalVecCount]; - size_t threads[1]; - int num_elements; - int err; - int i; - MTdata d; - - num_elements = n_elems * 16; - - input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements); - output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_double) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_double) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - d = init_genrand( gRandomSeed ); - p = input_ptr[0]; - for (i=0; i #include #include #include #include "procs.h" - -static const char *smoothstep_kernel_code = -"__kernel void test_smoothstep(__global float *edge0, __global float *edge1, __global float *x, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n" -"}\n"; - -static const char *smoothstep2_kernel_code = -"__kernel void test_smoothstep2(__global float2 *edge0, __global float2 *edge1, __global float2 *x, __global float2 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n" -"}\n"; - -static const char *smoothstep4_kernel_code = -"__kernel void test_smoothstep4(__global float4 *edge0, __global float4 *edge1, __global float4 *x, __global float4 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n" -"}\n"; - -static const char *smoothstep8_kernel_code = -"__kernel void test_smoothstep8(__global float8 *edge0, __global float8 *edge1, __global float8 *x, __global float8 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n" -"}\n"; - -static const char *smoothstep16_kernel_code = -"__kernel void test_smoothstep16(__global float16 *edge0, __global float16 *edge1, __global float16 *x, __global float16 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n" -"}\n"; - -static const char *smoothstep3_kernel_code = -"__kernel void test_smoothstep3(__global float *edge0, __global float *edge1, __global float *x, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" vstore3(smoothstep(vload3(tid,edge0),vload3(tid,edge1),vload3(tid,x)), tid, dst);\n" -"}\n"; +#include "test_base.h" + +//-------------------------------------------------------------------------- + +const char *smoothstep_fn_code_pattern = + "%s\n" /* optional pragma */ + "__kernel void test_fn(__global %s%s *e0, __global %s%s *e1, __global %s%s " + "*x, __global %s%s *dst)\n" + "{\n" + " int tid = get_global_id(0);\n" + "\n" + " dst[tid] = smoothstep(e0[tid], e1[tid], x[tid]);\n" + "}\n"; + +const char *smoothstep_fn_code_pattern_v3 = + "%s\n" /* optional pragma */ + "__kernel void test_fn(__global %s *e0, __global %s *e1, __global %s *x, " + "__global %s *dst)\n" + "{\n" + " int tid = get_global_id(0);\n" + "\n" + " vstore3(smoothstep(vload3(tid,e0), vload3(tid,e1), vload3(tid,x)), " + "tid, dst);\n" + "}\n"; + +const char *smoothstep_fn_code_pattern_v3_scalar = + "%s\n" /* optional pragma */ + "__kernel void test_fn(__global %s *e0, __global %s *e1, __global %s *x, " + "__global %s *dst)\n" + "{\n" + " int tid = get_global_id(0);\n" + "\n" + " vstore3(smoothstep(e0[tid], e1[tid], vload3(tid,x)), tid, dst);\n" + "}\n"; + +//-------------------------------------------------------------------------- #define MAX_ERR (1e-5f) -static float -verify_smoothstep(float *edge0, float *edge1, float *x, float *outptr, int n) +namespace { + +//-------------------------------------------------------------------------- +template +int verify_smoothstep(const T *const edge0, const T *const edge1, + const T *const x, const T *const outptr, const int n, + const int veclen, const bool vecParam) { - float r, t, delta, max_err = 0.0f; - int i; - - for (i=0; i 1.0f) - t = 1.0f; - r = t * t * (3.0f - 2.0f * t); - delta = (float)fabs(r - outptr[i]); - if (delta > max_err) - max_err = delta; - } - - return max_err; + double r, t; + float delta = 0, max_delta = 0; + + if (vecParam) + { + for (int i = 0; i < n * veclen; i++) + { + t = (conv_to_dbl(x[i]) - conv_to_dbl(edge0[i])) + / (conv_to_dbl(edge1[i]) - conv_to_dbl(edge0[i])); + if (t < 0.0) + t = 0.0; + else if (t > 1.0) + t = 1.0; + r = t * t * (3.0 - 2.0 * t); + delta = (float)fabs(r - conv_to_dbl(outptr[i])); + if (!std::is_same::value) + { + if (delta > MAX_ERR) + { + log_error( + "%d) verification error: smoothstep(%a, %a, %a) = " + "*%a vs. %a\n", + i, x[i], edge0[i], edge1[i], r, outptr[i]); + return -1; + } + } + else + max_delta = std::max(max_delta, delta); + } + } + else + { + for (int i = 0; i < n; ++i) + { + int ii = i / veclen; + int vi = i * veclen; + for (int j = 0; j < veclen; ++j, ++vi) + { + t = (conv_to_dbl(x[vi]) - conv_to_dbl(edge0[i])) + / (conv_to_dbl(edge1[i]) - conv_to_dbl(edge0[i])); + if (t < 0.0) + t = 0.0; + else if (t > 1.0) + t = 1.0; + r = t * t * (3.0 - 2.0 * t); + delta = (float)fabs(r - conv_to_dbl(outptr[vi])); + + if (!std::is_same::value) + { + if (delta > MAX_ERR) + { + log_error("{%d, element %d}) verification error: " + "smoothstep(%a, %a, %a) = *%a vs. %a\n", + ii, j, x[vi], edge0[i], edge1[i], r, + outptr[vi]); + return -1; + } + } + else + max_delta = std::max(max_delta, delta); + } + } + } + + if (std::is_same::value) + log_error("smoothstep half verification result, max delta: %a\n", + max_delta); + + return 0; } -const static char *fn_names[] = { "SMOOTHSTEP float", "SMOOTHSTEP float2", "SMOOTHSTEP float4", "SMOOTHSTEP float8", "SMOOTHSTEP float16", "SMOOTHSTEP float3" }; +} -int -test_smoothstep(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) +//-------------------------------------------------------------------------- +template +int test_smoothstep_fn(cl_device_id device, cl_context context, + cl_command_queue queue, int n_elems, bool vecParam) { - cl_mem streams[4]; - cl_float *input_ptr[3], *output_ptr, *p, *p_edge0; - cl_program program[kTotalVecCount]; - cl_kernel kernel[kTotalVecCount]; - size_t threads[1]; - float max_err; - int num_elements; - int err; - int i; - MTdata d; - - num_elements = n_elems * 16; - - input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - input_ptr[2] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[2]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[3]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - p = input_ptr[0]; - d = init_genrand( gRandomSeed ); - for (i=0; i input_ptr[3], output_ptr; + + std::vector programs; + std::vector kernels; + + int err, i; + MTdata d; + + assert(BaseFunctionTest::type2name.find(sizeof(T)) + != BaseFunctionTest::type2name.end()); + auto tname = BaseFunctionTest::type2name[sizeof(T)]; + + programs.resize(kTotalVecCount); + kernels.resize(kTotalVecCount); + + int num_elements = n_elems * (1 << (kTotalVecCount - 1)); + + for (i = 0; i < 3; i++) input_ptr[i].resize(num_elements); + output_ptr.resize(num_elements); + + for (i = 0; i < 4; i++) { - log_error("clSetKernelArgs failed\n"); - return -1; + streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(T) * num_elements, NULL, &err); + test_error(err, "clCreateBuffer failed"); } - } + std::string pragma_str; + d = init_genrand(gRandomSeed); + if (std::is_same::value) + { + for (i = 0; i < num_elements; i++) + { + input_ptr[0][i] = get_random_float(-0x00200000, 0x00010000, d); + input_ptr[1][i] = get_random_float(input_ptr[0][i], 0x00200000, d); + input_ptr[2][i] = get_random_float(-0x20000000, 0x20000000, d); + } + } + else if (std::is_same::value) + { + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; + for (i = 0; i < num_elements; i++) + { + input_ptr[0][i] = get_random_double(-0x00200000, 0x00010000, d); + input_ptr[1][i] = get_random_double(input_ptr[0][i], 0x00200000, d); + input_ptr[2][i] = get_random_double(-0x20000000, 0x20000000, d); + } + } + else if (std::is_same::value) + { + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + for (i = 0; i < num_elements; i++) + { + input_ptr[0][i] = conv_to_half(get_random_float(-65503, 65503, d)); + input_ptr[1][i] = conv_to_half( + get_random_float(conv_to_flt(input_ptr[0][i]), 65503, d)); + input_ptr[2][i] = conv_to_half(get_random_float(-65503, 65503, d)); + } + } - threads[0] = (size_t)n_elems; - for (i=0; i= kVectorSizeCount) + { + if (vecParam) + { + std::string str = smoothstep_fn_code_pattern_v3; + kernelSource = + string_format(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), tname.c_str()); + } + else + { + std::string str = smoothstep_fn_code_pattern_v3_scalar; + kernelSource = + string_format(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), tname.c_str()); + } + } + else + { + // regular path + std::string str = smoothstep_fn_code_pattern; + kernelSource = + string_format(str, pragma_str.c_str(), tname.c_str(), + vecParam ? vecSizeNames[i] : "", tname.c_str(), + vecParam ? vecSizeNames[i] : "", tname.c_str(), + vecSizeNames[i], tname.c_str(), vecSizeNames[i]); + } + const char *programPtr = kernelSource.c_str(); + err = + create_single_kernel_helper(context, &programs[i], &kernels[i], 1, + (const char **)&programPtr, "test_fn"); + test_error(err, "Unable to create kernel"); + + for (int j = 0; j < 4; j++) + { + err = + clSetKernelArg(kernels[i], j, sizeof(streams[j]), &streams[j]); + test_error(err, "Unable to set kernel argument"); + } + + size_t threads = (size_t)n_elems; + + err = clEnqueueNDRangeKernel(queue, kernels[i], 1, NULL, &threads, NULL, + 0, NULL, NULL); + test_error(err, "Unable to execute kernel"); + + err = clEnqueueReadBuffer(queue, streams[3], true, 0, + sizeof(T) * num_elements, &output_ptr[0], 0, + NULL, NULL); + test_error(err, "Unable to read results"); + + if (verify_smoothstep((T *)&input_ptr[0].front(), + (T *)&input_ptr[1].front(), + (T *)&input_ptr[2].front(), &output_ptr[0], + n_elems, g_arrVecSizes[i], vecParam)) + { + log_error("smoothstep %s%d%s test failed\n", tname.c_str(), + ((g_arrVecSizes[i])), + vecParam ? "" : std::string(", " + tname).c_str()); + err = -1; + } + else + { + log_info("smoothstep %s%d%s test passed\n", tname.c_str(), + ((g_arrVecSizes[i])), + vecParam ? "" : std::string(", " + tname).c_str()); + err = 0; + } + + if (err) break; } - max_err = verify_smoothstep(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems * g_arrVecSizes[i]); + return err; +} - if (max_err > MAX_ERR) +//-------------------------------------------------------------------------- +cl_int SmoothstepTest::Run() +{ + cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) { - log_error("%s test failed %g max err\n", fn_names[i], max_err); - err = -1; + error = test_smoothstep_fn(device, context, queue, num_elems, + vecParam); + test_error(error, "SmoothstepTest::Run failed"); } - else + + error = + test_smoothstep_fn(device, context, queue, num_elems, vecParam); + test_error(error, "SmoothstepTest::Run failed"); + + if (is_extension_available(device, "cl_khr_fp64")) { - log_info("%s test passed %g max err\n", fn_names[i], max_err); - err = 0; + error = test_smoothstep_fn(device, context, queue, num_elems, + vecParam); + test_error(error, "SmoothstepTest::Run failed"); } - if (err) - break; - } - - clReleaseMemObject(streams[0]); - clReleaseMemObject(streams[1]); - clReleaseMemObject(streams[2]); - clReleaseMemObject(streams[3]); - for (i=0; i(device, context, queue, n_elems, + "smoothstep", true); +} + +//-------------------------------------------------------------------------- +int test_smoothstepf(cl_device_id device, cl_context context, + cl_command_queue queue, int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems, + "smoothstep", false); +} +//-------------------------------------------------------------------------- diff --git a/test_conformance/commonfns/test_smoothstepf.cpp b/test_conformance/commonfns/test_smoothstepf.cpp deleted file mode 100644 index ac09e9ec1..000000000 --- a/test_conformance/commonfns/test_smoothstepf.cpp +++ /dev/null @@ -1,259 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include - -#include "procs.h" - -static const char *smoothstep_kernel_code = -"__kernel void test_smoothstep(__global float *edge0, __global float *edge1, __global float *x, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n" -"}\n"; - -static const char *smoothstep2_kernel_code = -"__kernel void test_smoothstep2f(__global float *edge0, __global float *edge1, __global float2 *x, __global float2 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n" -"}\n"; - -static const char *smoothstep4_kernel_code = -"__kernel void test_smoothstep4f(__global float *edge0, __global float *edge1, __global float4 *x, __global float4 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n" -"}\n"; - -#define MAX_ERR (1e-5f) - -float verify_smoothstep(float *edge0, float *edge1, float *x, float *outptr, - int n, int veclen) -{ - float r, t, delta, max_err = 0.0f; - int i, j; - - for (i = 0; i < n; ++i) { - int vi = i * veclen; - for (j = 0; j < veclen; ++j, ++vi) { - t = (x[vi] - edge0[i]) / (edge1[i] - edge0[i]); - if (t < 0.0f) - t = 0.0f; - else if (t > 1.0f) - t = 1.0f; - r = t * t * (3.0f - 2.0f * t); - delta = (float)fabs(r - outptr[vi]); - if (delta > max_err) - max_err = delta; - } - } - return max_err; -} - -const static char *fn_names[] = { "SMOOTHSTEP float", "SMOOTHSTEP float2", "SMOOTHSTEP float4"}; - -int -test_smoothstepf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[4]; - cl_float *input_ptr[3], *output_ptr, *p, *p_edge0; - cl_program program[3]; - cl_kernel kernel[3]; - size_t threads[1]; - float max_err = 0.0f; - int num_elements; - int err; - int i; - MTdata d; - - num_elements = n_elems * 4; - - input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - input_ptr[2] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[2]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[3]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - d = init_genrand( gRandomSeed ); - p = input_ptr[0]; - for (i=0; i MAX_ERR) - { - log_error("%s test failed %g max err\n", fn_names[i], max_err); - err = -1; - } - else - { - log_info("%s test passed %g max err\n", fn_names[i], max_err); - err = 0; - } - - if (err) - break; - } - - clReleaseMemObject(streams[0]); - clReleaseMemObject(streams[1]); - clReleaseMemObject(streams[2]); - clReleaseMemObject(streams[3]); - for (i=0; i<3; i++) - { - clReleaseKernel(kernel[i]); - clReleaseProgram(program[i]); - } - free(input_ptr[0]); - free(input_ptr[1]); - free(input_ptr[2]); - free(output_ptr); - - return err; -} - - diff --git a/test_conformance/commonfns/test_step.cpp b/test_conformance/commonfns/test_step.cpp index ed5bc4182..fb8b96c9c 100644 --- a/test_conformance/commonfns/test_step.cpp +++ b/test_conformance/commonfns/test_step.cpp @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2023 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -13,524 +13,282 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "harness/compat.h" - #include #include #include #include #include "procs.h" - -static int -test_step_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems); - - -const char *step_kernel_code = -"__kernel void test_step(__global float *srcA, __global float *srcB, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = step(srcA[tid], srcB[tid]);\n" -"}\n"; - -const char *step2_kernel_code = -"__kernel void test_step2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = step(srcA[tid], srcB[tid]);\n" -"}\n"; - -const char *step4_kernel_code = -"__kernel void test_step4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = step(srcA[tid], srcB[tid]);\n" -"}\n"; - -const char *step8_kernel_code = -"__kernel void test_step8(__global float8 *srcA, __global float8 *srcB, __global float8 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = step(srcA[tid], srcB[tid]);\n" -"}\n"; - -const char *step16_kernel_code = -"__kernel void test_step16(__global float16 *srcA, __global float16 *srcB, __global float16 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = step(srcA[tid], srcB[tid]);\n" -"}\n"; - -const char *step3_kernel_code = -"__kernel void test_step3(__global float *srcA, __global float *srcB, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" vstore3(step(vload3(tid,srcA), vload3(tid,srcB)),tid,dst);\n" -"}\n"; - - -int -verify_step(float *inptrA, float *inptrB, float *outptr, int n) +#include "test_base.h" + +//-------------------------------------------------------------------------- + +const char *step_fn_code_pattern = "%s\n" /* optional pragma */ + "__kernel void test_fn(__global %s%s *edge, " + "__global %s%s *x, __global %s%s *dst)\n" + "{\n" + " int tid = get_global_id(0);\n" + " dst[tid] = step(edge[tid], x[tid]);\n" + "}\n"; + +const char *step_fn_code_pattern_v3 = + "%s\n" /* optional pragma */ + "__kernel void test_fn(__global %s *edge, __global %s *x, __global %s " + "*dst)\n" + "{\n" + " int tid = get_global_id(0);\n" + " vstore3(step(vload3(tid,edge), vload3(tid,x)), tid, dst);\n" + "}\n"; + +const char *step_fn_code_pattern_v3_scalar = + "%s\n" /* optional pragma */ + "__kernel void test_fn(__global %s *edge, __global %s *x, __global %s " + "*dst)\n" + "{\n" + " int tid = get_global_id(0);\n" + " vstore3(step(edge[tid], vload3(tid,x)), tid, dst);\n" + "}\n"; + +//-------------------------------------------------------------------------- + +namespace { + +template +int verify_step(const T *const inptrA, const T *const inptrB, + const T *const outptr, const int n, const int veclen, + const bool vecParam) { - float r; - int i; - - for (i=0; i::value) + log_error( + "Failure @ {%d, element %d}: step(%a,%a) -> *%a " + "vs %a\n", + ii, j, conv_to_flt(inptrA[ii]), + conv_to_flt(inptrB[i]), r, conv_to_flt(outptr[i])); + else + log_error( + "Failure @ {%d, element %d}: step(%a,%a) -> *%a " + "vs %a\n", + ii, j, inptrA[ii], inptrB[i], r, outptr[i]); + return -1; + } + } } - - if (err) - break; } - clReleaseMemObject(streams[0]); - clReleaseMemObject(streams[1]); - clReleaseMemObject(streams[2]); - for (i=0; i +int test_step_fn(cl_device_id device, cl_context context, + cl_command_queue queue, int n_elems, bool vecParam) +{ + clMemWrapper streams[3]; + std::vector input_ptr[2], output_ptr; - return test_step_double( device, context, queue, n_elems); -} + std::vector programs; + std::vector kernels; + int err, i; + MTdata d; -#pragma mark - - -const char *step_kernel_code_double = -"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" -"__kernel void test_step_double(__global double *srcA, __global double *srcB, __global double *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = step(srcA[tid], srcB[tid]);\n" -"}\n"; - -const char *step2_kernel_code_double = -"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" -"__kernel void test_step2_double(__global double2 *srcA, __global double2 *srcB, __global double2 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = step(srcA[tid], srcB[tid]);\n" -"}\n"; - -const char *step4_kernel_code_double = -"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" -"__kernel void test_step4_double(__global double4 *srcA, __global double4 *srcB, __global double4 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = step(srcA[tid], srcB[tid]);\n" -"}\n"; - -const char *step8_kernel_code_double = -"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" -"__kernel void test_step8_double(__global double8 *srcA, __global double8 *srcB, __global double8 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = step(srcA[tid], srcB[tid]);\n" -"}\n"; - -const char *step16_kernel_code_double = -"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" -"__kernel void test_step16_double(__global double16 *srcA, __global double16 *srcB, __global double16 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = step(srcA[tid], srcB[tid]);\n" -"}\n"; - -const char *step3_kernel_code_double = -"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" -"__kernel void test_step3_double(__global double *srcA, __global double *srcB, __global double *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" vstore3(step(vload3(tid,srcA), vload3(tid,srcB)),tid,dst);\n" -"}\n"; - - -int -verify_step_double(double *inptrA, double *inptrB, double *outptr, int n) -{ - double r; - int i; + assert(BaseFunctionTest::type2name.find(sizeof(T)) + != BaseFunctionTest::type2name.end()); + auto tname = BaseFunctionTest::type2name[sizeof(T)]; + int num_elements = n_elems * (1 << (kTotalVecCount - 1)); - for (i=0; i::value) { - p[i] = get_random_double(-0x40000000, 0x40000000, d); + for (i = 0; i < num_elements; i++) + { + input_ptr[0][i] = get_random_float(-0x40000000, 0x40000000, d); + input_ptr[1][i] = get_random_float(-0x40000000, 0x40000000, d); + } } - p = input_ptr[1]; - for (i=0; i::value) { - p[i] = get_random_double(-0x40000000, 0x40000000, d); + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; + for (i = 0; i < num_elements; i++) + { + input_ptr[0][i] = get_random_double(-0x40000000, 0x40000000, d); + input_ptr[1][i] = get_random_double(-0x40000000, 0x40000000, d); + } } - free_mtdata(d); d = NULL; - - err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[0], 0, NULL, NULL ); - if (err != CL_SUCCESS) + else if (std::is_same::value) { - log_error("clWriteArray failed\n"); - return -1; + const float fval = 0x40000000; + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + for (i = 0; i < num_elements; i++) + { + input_ptr[0][i] = conv_to_half(get_random_float(-fval, fval, d)); + input_ptr[1][i] = conv_to_half(get_random_float(-fval, fval, d)); + } } - err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[1], 0, NULL, NULL ); - if (err != CL_SUCCESS) + free_mtdata(d); + + for (i = 0; i < 2; i++) { - log_error("clWriteArray failed\n"); - return -1; + err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0, + sizeof(T) * num_elements, + &input_ptr[i].front(), 0, NULL, NULL); + test_error(err, "Unable to write input buffer"); } - err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &step_kernel_code_double, "test_step_double" ); - if (err) - return -1; - err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &step2_kernel_code_double, "test_step2_double" ); - if (err) - return -1; - err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &step4_kernel_code_double, "test_step4_double" ); - if (err) - return -1; - err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &step8_kernel_code_double, "test_step8_double" ); - if (err) - return -1; - err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &step16_kernel_code_double, "test_step16_double" ); - if (err) - return -1; - err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &step3_kernel_code_double, "test_step3_double" ); - if (err) - return -1; - - for (i=0; i < kTotalVecCount; i++) + char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" }; + + for (i = 0; i < kTotalVecCount; i++) { - err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] ); - err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] ); - err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] ); - if (err != CL_SUCCESS) + std::string kernelSource; + if (i >= kVectorSizeCount) { - log_error("clSetKernelArgs failed\n"); - return -1; + if (vecParam) + { + std::string str = step_fn_code_pattern_v3; + kernelSource = + string_format(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str()); + } + else + { + std::string str = step_fn_code_pattern_v3_scalar; + kernelSource = + string_format(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str()); + } } - } - - threads[0] = (size_t)n_elems; - for (i=0; i(device, context, queue, num_elems, vecParam); + test_error(error, "StepTest::Run failed"); } - free(input_ptr[0]); - free(input_ptr[1]); - free(output_ptr); - return err; + error = test_step_fn(device, context, queue, num_elems, vecParam); + test_error(error, "StepTest::Run failed"); + + if (is_extension_available(device, "cl_khr_fp64")) + { + error = + test_step_fn(device, context, queue, num_elems, vecParam); + test_error(error, "StepTest::Run failed"); + } + + return error; +} + +//-------------------------------------------------------------------------- +int test_step(cl_device_id device, cl_context context, cl_command_queue queue, + int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems, "step", + true); +} + +//-------------------------------------------------------------------------- +int test_stepf(cl_device_id device, cl_context context, cl_command_queue queue, + int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems, "step", + false); } +//-------------------------------------------------------------------------- diff --git a/test_conformance/commonfns/test_stepf.cpp b/test_conformance/commonfns/test_stepf.cpp deleted file mode 100644 index efada227a..000000000 --- a/test_conformance/commonfns/test_stepf.cpp +++ /dev/null @@ -1,546 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include - -#include "procs.h" - -static int test_stepf_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems); - - -static const char *step_kernel_code = -"__kernel void test_step(__global float *srcA, __global float *srcB, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = step(srcA[tid], srcB[tid]);\n" -"}\n"; - -static const char *step2_kernel_code = -"__kernel void test_step2(__global float *srcA, __global float2 *srcB, __global float2 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = step(srcA[tid], srcB[tid]);\n" -"}\n"; - -static const char *step4_kernel_code = -"__kernel void test_step4(__global float *srcA, __global float4 *srcB, __global float4 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = step(srcA[tid], srcB[tid]);\n" -"}\n"; - -static const char *step8_kernel_code = -"__kernel void test_step8(__global float *srcA, __global float8 *srcB, __global float8 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = step(srcA[tid], srcB[tid]);\n" -"}\n"; - -static const char *step16_kernel_code = -"__kernel void test_step16(__global float *srcA, __global float16 *srcB, __global float16 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = step(srcA[tid], srcB[tid]);\n" -"}\n"; - -static const char *step3_kernel_code = -"__kernel void test_step3(__global float *srcA, __global float *srcB, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" vstore3(step(srcA[tid], vload3(tid,srcB)) ,tid,dst);\n" -"}\n"; - - -static int -verify_step( cl_float *inptrA, cl_float *inptrB, cl_float *outptr, int n, int veclen) -{ - float r; - int i, j; - - for (i=0; i *%a vs %a\n", ii, j, inptrA[ii], inptrB[i], r, outptr[i] ); - return -1; - } - } - } - - return 0; -} - -int test_stepf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[3]; - cl_float *input_ptr[2], *output_ptr, *p; - cl_program program[kTotalVecCount]; - cl_kernel kernel[kTotalVecCount]; - size_t threads[1]; - int num_elements; - int err; - int i; - MTdata d; - num_elements = n_elems * 16; - - input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[2]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - p = input_ptr[0]; - d = init_genrand( gRandomSeed ); - for (i=0; i *%a vs %a\n", ii, j, inptrA[ii], inptrB[i], r, outptr[i] ); - return -1; - } - } - } - - return 0; -} - -int test_stepf_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[3]; - cl_double *input_ptr[2], *output_ptr, *p; - cl_program program[kTotalVecCount]; - cl_kernel kernel[kTotalVecCount]; - size_t threads[1]; - int num_elements; - int err; - int i; - MTdata d; - num_elements = n_elems * 16; - - input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements); - input_ptr[1] = (cl_double*)malloc(sizeof(cl_double) * num_elements); - output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_double) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_double) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_double) * num_elements, NULL, NULL); - if (!streams[2]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - p = input_ptr[0]; - d = init_genrand( gRandomSeed ); - for (i=0; i +#include +#include +#include + +#include + +#include "harness/deviceInfo.h" +#include "harness/typeWrappers.h" + +#include "procs.h" +#include "test_base.h" + +#ifndef M_PI +#define M_PI 3.14159265358979323846264338327950288 +#endif + +//-------------------------------------------------------------------------- +// clang-format off +const char *unary_fn_code_pattern = +"%s\n" /* optional pragma */ +"__kernel void test_fn(__global %s%s *src, __global %s%s *dst)\n" +"{\n" +" int tid = get_global_id(0);\n" +"\n" +" dst[tid] = %s(src[tid]);\n" +"}\n"; + +const char *unary_fn_code_pattern_v3 = +"%s\n" /* optional pragma */ +"__kernel void test_fn(__global %s *src, __global %s *dst)\n" +"{\n" +" int tid = get_global_id(0);\n" +"\n" +" vstore3(%s(vload3(tid,src)), tid, dst);\n" +"}\n"; +// clang-format on +//-------------------------------------------------------------------------- + +#define MAX_ERR 2.0f + +namespace { + +//-------------------------------------------------------------------------- +template +int verify_degrees(const T *const inptr, const T *const outptr, int n) +{ + float error, max_error = 0.0f; + double r, max_val = NAN; + int max_index = 0; + + for (int i = 0, j = 0; i < n; i++, j++) + { + r = (180.0 / M_PI) * conv_to_dbl(inptr[i]); + + if (std::is_same::value) + if (!isfinite_fp(conv_to_half(r)) && !isfinite_fp(outptr[i])) + continue; + + error = UlpFn(outptr[i], r); + + if (fabsf(error) > max_error) + { + max_error = error; + max_index = i; + max_val = r; + if (fabsf(error) > MAX_ERR) + { + if (std::is_same::value) + log_error( + "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, + conv_to_flt(inptr[i]), r, conv_to_flt(outptr[i]), r, + conv_to_flt(outptr[i]), error); + else + log_error( + "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, + inptr[i], r, outptr[i], r, outptr[i], error); + return 1; + } + } + } + + if (std::is_same::value) + log_info("degrees: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", + max_error, max_index, max_val, conv_to_flt(outptr[max_index]), + max_val, conv_to_flt(outptr[max_index])); + else + log_info("degrees: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", + max_error, max_index, max_val, outptr[max_index], max_val, + outptr[max_index]); + + return 0; +} + +//-------------------------------------------------------------------------- +template +int verify_radians(const T *const inptr, const T *const outptr, int n) +{ + float error, max_error = 0.0f; + double r, max_val = NAN; + int max_index = 0; + + for (int i = 0, j = 0; i < n; i++, j++) + { + r = (M_PI / 180.0) * conv_to_dbl(inptr[i]); + + if (std::is_same::value) + if (!isfinite_fp(conv_to_half(r)) && !isfinite_fp(outptr[i])) + continue; + + error = UlpFn(outptr[i], r); + if (fabsf(error) > max_error) + { + max_error = error; + max_index = i; + max_val = r; + if (fabsf(error) > MAX_ERR) + { + if (std::is_same::value) + log_error( + "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, + conv_to_flt(inptr[i]), r, conv_to_flt(outptr[i]), r, + conv_to_flt(outptr[i]), error); + else + log_error( + "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, + inptr[i], r, outptr[i], r, outptr[i], error); + return 1; + } + } + } + + if (std::is_same::value) + log_info("radians: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", + max_error, max_index, max_val, conv_to_flt(outptr[max_index]), + max_val, conv_to_flt(outptr[max_index])); + else + log_info("radians: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", + max_error, max_index, max_val, outptr[max_index], max_val, + outptr[max_index]); + + return 0; +} + +//-------------------------------------------------------------------------- +template +int verify_sign(const T *const inptr, const T *const outptr, int n) +{ + double r = 0; + for (int i = 0; i < n; i++) + { + if (conv_to_dbl(inptr[i]) > 0.0f) + r = 1.0; + else if (conv_to_dbl(inptr[i]) < 0.0f) + r = -1.0; + else + r = 0.0; + if (r != conv_to_dbl(outptr[i])) return -1; + } + return 0; +} + +} + +//-------------------------------------------------------------------------- + +template +int test_unary_fn(cl_device_id device, cl_context context, + cl_command_queue queue, int n_elems, + const std::string &fnName, VerifyFuncUnary verifyFn) +{ + clMemWrapper streams[2]; + std::vector input_ptr, output_ptr; + + std::vector programs; + std::vector kernels; + + int err, i; + MTdata d; + + assert(BaseFunctionTest::type2name.find(sizeof(T)) + != BaseFunctionTest::type2name.end()); + auto tname = BaseFunctionTest::type2name[sizeof(T)]; + + programs.resize(kTotalVecCount); + kernels.resize(kTotalVecCount); + + int num_elements = n_elems * (1 << (kTotalVecCount - 1)); + + input_ptr.resize(num_elements); + output_ptr.resize(num_elements); + + for (i = 0; i < 2; i++) + { + streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(T) * num_elements, NULL, &err); + test_error(err, "clCreateBuffer failed"); + } + + std::string pragma_str; + d = init_genrand(gRandomSeed); + if (std::is_same::value) + { + for (int j = 0; j < num_elements; j++) + { + input_ptr[j] = get_random_float((float)(-100000.f * M_PI), + (float)(100000.f * M_PI), d); + } + } + else if (std::is_same::value) + { + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; + for (int j = 0; j < num_elements; j++) + { + input_ptr[j] = + get_random_double(-100000.0 * M_PI, 100000.0 * M_PI, d); + } + } + else if (std::is_same::value) + { + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + for (int j = 0; j < num_elements; j++) + { + input_ptr[j] = conv_to_half(get_random_float( + (float)(-65503.f * M_PI), (float)(65503.f * M_PI), d)); + } + } + free_mtdata(d); + + err = clEnqueueWriteBuffer(queue, streams[0], true, 0, + sizeof(T) * num_elements, &input_ptr.front(), 0, + NULL, NULL); + if (err != CL_SUCCESS) + { + log_error("clWriteArray failed\n"); + return -1; + } + + for (i = 0; i < kTotalVecCount; i++) + { + std::string kernelSource; + char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" }; + + if (i >= kVectorSizeCount) + { + std::string str = unary_fn_code_pattern_v3; + kernelSource = string_format(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), fnName.c_str()); + } + else + { + std::string str = unary_fn_code_pattern; + kernelSource = string_format(str, pragma_str.c_str(), tname.c_str(), + vecSizeNames[i], tname.c_str(), + vecSizeNames[i], fnName.c_str()); + } + + /* Create kernels */ + const char *programPtr = kernelSource.c_str(); + err = + create_single_kernel_helper(context, &programs[i], &kernels[i], 1, + (const char **)&programPtr, "test_fn"); + + err = clSetKernelArg(kernels[i], 0, sizeof streams[0], &streams[0]); + err |= clSetKernelArg(kernels[i], 1, sizeof streams[1], &streams[1]); + if (err != CL_SUCCESS) + { + log_error("clSetKernelArgs failed\n"); + return -1; + } + + // Line below is troublesome... + size_t threads = (size_t)num_elements / ((g_arrVecSizes[i])); + err = clEnqueueNDRangeKernel(queue, kernels[i], 1, NULL, &threads, NULL, + 0, NULL, NULL); + if (err != CL_SUCCESS) + { + log_error("clEnqueueNDRangeKernel failed\n"); + return -1; + } + + cl_uint dead = 42; + memset_pattern4(&output_ptr[0], &dead, sizeof(T) * num_elements); + err = clEnqueueReadBuffer(queue, streams[1], true, 0, + sizeof(T) * num_elements, &output_ptr[0], 0, + NULL, NULL); + if (err != CL_SUCCESS) + { + log_error("clEnqueueReadBuffer failed\n"); + return -1; + } + + if (verifyFn((T *)&input_ptr.front(), (T *)&output_ptr.front(), + n_elems * (i + 1))) + { + log_error("%s %s%d test failed\n", fnName.c_str(), tname.c_str(), + ((g_arrVecSizes[i]))); + err = -1; + } + else + { + log_info("%s %s%d test passed\n", fnName.c_str(), tname.c_str(), + ((g_arrVecSizes[i]))); + } + + if (err) break; + } + + if (err) return err; + + return err; +} + +//-------------------------------------------------------------------------- +cl_int DegreesTest::Run() +{ + cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_unary_fn(device, context, queue, num_elems, + fnName.c_str(), verify_degrees); + test_error(error, "DegreesTest::Run failed"); + } + + error = test_unary_fn(device, context, queue, num_elems, + fnName.c_str(), verify_degrees); + test_error(error, "DegreesTest::Run failed"); + + if (is_extension_available(device, "cl_khr_fp64")) + { + error = test_unary_fn(device, context, queue, num_elems, + fnName.c_str(), verify_degrees); + test_error(error, "DegreesTest::Run failed"); + } + + return error; +} + +//-------------------------------------------------------------------------- +cl_int RadiansTest::Run() +{ + cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_unary_fn(device, context, queue, num_elems, + fnName.c_str(), verify_radians); + test_error(error, "RadiansTest::Run failed"); + } + + error = test_unary_fn(device, context, queue, num_elems, + fnName.c_str(), verify_radians); + test_error(error, "RadiansTest::Run failed"); + + if (is_extension_available(device, "cl_khr_fp64")) + { + error = test_unary_fn(device, context, queue, num_elems, + fnName.c_str(), verify_radians); + test_error(error, "RadiansTest::Run failed"); + } + + return error; +} + +//-------------------------------------------------------------------------- +cl_int SignTest::Run() +{ + cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_unary_fn(device, context, queue, num_elems, + fnName.c_str(), verify_sign); + test_error(error, "SignTest::Run failed"); + } + + error = test_unary_fn(device, context, queue, num_elems, + fnName.c_str(), verify_sign); + test_error(error, "SignTest::Run failed"); + + if (is_extension_available(device, "cl_khr_fp64")) + { + error = test_unary_fn(device, context, queue, num_elems, + fnName.c_str(), verify_sign); + test_error(error, "SignTest::Run failed"); + } + + return error; +} + +//-------------------------------------------------------------------------- +int test_degrees(cl_device_id device, cl_context context, + cl_command_queue queue, int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems, + "degrees"); +} + +//-------------------------------------------------------------------------- +int test_radians(cl_device_id device, cl_context context, + cl_command_queue queue, int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems, + "radians"); +} + +//-------------------------------------------------------------------------- +int test_sign(cl_device_id device, cl_context context, cl_command_queue queue, + int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems, "sign"); +} + +//-------------------------------------------------------------------------- From 81386e56c438b435d6e023d3f07af74ec6b44e55 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 4 Apr 2023 14:06:40 +0200 Subject: [PATCH 02/11] Added missing header due to presubmit check --- test_conformance/commonfns/test_base.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_conformance/commonfns/test_base.h b/test_conformance/commonfns/test_base.h index 278e44d55..aa17d4858 100644 --- a/test_conformance/commonfns/test_base.h +++ b/test_conformance/commonfns/test_base.h @@ -20,11 +20,11 @@ #include #include #include +#include #include #include -#include "harness/deviceInfo.h" #include "harness/testHarness.h" #include "harness/typeWrappers.h" From 51d7f8a16704c301e9ee534844465e7ba4ebb406 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 23 May 2023 14:25:19 +0200 Subject: [PATCH 03/11] Corrected radians/degrees ulp calculations + cosmetic fixes --- test_conformance/commonfns/test_smoothstep.cpp | 5 +++-- test_conformance/commonfns/test_unary_fn.cpp | 10 ---------- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/test_conformance/commonfns/test_smoothstep.cpp b/test_conformance/commonfns/test_smoothstep.cpp index a21e45f1e..67a31ad2d 100644 --- a/test_conformance/commonfns/test_smoothstep.cpp +++ b/test_conformance/commonfns/test_smoothstep.cpp @@ -136,7 +136,8 @@ int verify_smoothstep(const T *const edge0, const T *const edge1, template int test_smoothstep_fn(cl_device_id device, cl_context context, - cl_command_queue queue, int n_elems, bool vecParam) + cl_command_queue queue, const int n_elems, + const bool vecParam) { clMemWrapper streams[4]; std::vector input_ptr[3], output_ptr; @@ -206,7 +207,7 @@ int test_smoothstep_fn(cl_device_id device, cl_context context, test_error(err, "Unable to write input buffer"); } - char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" }; + const char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" }; for (i = 0; i < kTotalVecCount; i++) { diff --git a/test_conformance/commonfns/test_unary_fn.cpp b/test_conformance/commonfns/test_unary_fn.cpp index ea7e23580..f855cf748 100644 --- a/test_conformance/commonfns/test_unary_fn.cpp +++ b/test_conformance/commonfns/test_unary_fn.cpp @@ -54,16 +54,6 @@ const char *unary_fn_code_pattern_v3 = namespace { -template float UlpFn(const T &val, const double &r) -{ - if (std::is_same::value) - return Ulp_Error_Double(val, r); - else if (std::is_same::value) - return Ulp_Error(val, r); - else if (std::is_same::value) - return Ulp_Error(val, r); -} - template int verify_degrees(const T *const inptr, const T *const outptr, int n) { From 9199c0a502fb66528290fe851366249877a070f8 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 23 May 2023 14:35:23 +0200 Subject: [PATCH 04/11] Corrected presubmit code format --- test_conformance/commonfns/test_smoothstep.cpp | 2 +- test_conformance/commonfns/test_unary_fn.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test_conformance/commonfns/test_smoothstep.cpp b/test_conformance/commonfns/test_smoothstep.cpp index 67a31ad2d..d839139fe 100644 --- a/test_conformance/commonfns/test_smoothstep.cpp +++ b/test_conformance/commonfns/test_smoothstep.cpp @@ -238,7 +238,7 @@ int test_smoothstep_fn(cl_device_id device, cl_context context, vecParam ? vecSizeNames[i] : "", tname.c_str(), vecParam ? vecSizeNames[i] : "", tname.c_str(), vecSizeNames[i], tname.c_str(), vecSizeNames[i]); - } + } const char *programPtr = kernelSource.c_str(); err = diff --git a/test_conformance/commonfns/test_unary_fn.cpp b/test_conformance/commonfns/test_unary_fn.cpp index f855cf748..25fefe0bc 100644 --- a/test_conformance/commonfns/test_unary_fn.cpp +++ b/test_conformance/commonfns/test_unary_fn.cpp @@ -237,12 +237,12 @@ int test_unary_fn(cl_device_id device, cl_context context, err = clEnqueueWriteBuffer(queue, streams[0], true, 0, sizeof(T) * num_elements, &input_ptr.front(), 0, NULL, NULL); - test_error (err, "clEnqueueWriteBuffer failed\n"); + test_error(err, "clEnqueueWriteBuffer failed\n"); for (i = 0; i < kTotalVecCount; i++) { std::string kernelSource; - char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" }; + const char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" }; if (i >= kVectorSizeCount) { From 8df048903cf2968589ff947cdd956dd9d617e060 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 23 May 2023 15:03:55 +0200 Subject: [PATCH 05/11] Corrections related to code review --- test_conformance/commonfns/main.cpp | 9 +++------ test_conformance/commonfns/test_base.h | 2 +- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/test_conformance/commonfns/main.cpp b/test_conformance/commonfns/main.cpp index 61d0d794f..645d3f703 100644 --- a/test_conformance/commonfns/main.cpp +++ b/test_conformance/commonfns/main.cpp @@ -75,12 +75,9 @@ int main(int argc, const char *argv[]) { initVecSizes(); - if (BaseFunctionTest::type2name.empty()) - { - BaseFunctionTest::type2name[sizeof(half)] = "half"; - BaseFunctionTest::type2name[sizeof(float)] = "float"; - BaseFunctionTest::type2name[sizeof(double)] = "double"; - } + BaseFunctionTest::type2name[sizeof(half)] = "half"; + BaseFunctionTest::type2name[sizeof(float)] = "float"; + BaseFunctionTest::type2name[sizeof(double)] = "double"; return runTestHarnessWithCheck(argc, argv, test_num, test_list, false, 0, InitCL); diff --git a/test_conformance/commonfns/test_base.h b/test_conformance/commonfns/test_base.h index 7bb4ab580..18f7d5c9e 100644 --- a/test_conformance/commonfns/test_base.h +++ b/test_conformance/commonfns/test_base.h @@ -179,7 +179,7 @@ template float UlpFn(const T &val, const double &r) } else { - log_error("GeometricsFPTest::UlpError: unsupported data type\n"); + log_error("UlpFn: unsupported data type\n"); } return -1.f; // wrong val From f44d7747cf139342e37fc2cf169aceb16337b024 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Thu, 8 Jun 2023 11:55:25 +0200 Subject: [PATCH 06/11] Moved string format helper to test_common in separate header --- .../harness/stringHelpers.h | 6 ++-- test_conformance/basic/test_astype.cpp | 2 +- test_conformance/commonfns/test_base.h | 12 ------- test_conformance/commonfns/test_binary_fn.cpp | 7 ++-- test_conformance/commonfns/test_mix.cpp | 8 +++-- .../commonfns/test_smoothstep.cpp | 8 +++-- test_conformance/commonfns/test_step.cpp | 8 +++-- test_conformance/commonfns/test_unary_fn.cpp | 5 +-- .../relationals/test_comparisons_fp.cpp | 33 ++++--------------- 9 files changed, 32 insertions(+), 57 deletions(-) rename test_conformance/basic/utils.h => test_common/harness/stringHelpers.h (94%) diff --git a/test_conformance/basic/utils.h b/test_common/harness/stringHelpers.h similarity index 94% rename from test_conformance/basic/utils.h rename to test_common/harness/stringHelpers.h index 3f6bf64db..a02624d6d 100644 --- a/test_conformance/basic/utils.h +++ b/test_common/harness/stringHelpers.h @@ -14,8 +14,8 @@ // limitations under the License. // -#ifndef BASIC_UTILS_H -#define BASIC_UTILS_H +#ifndef STRING_HELPERS_H +#define STRING_HELPERS_H #include #include @@ -38,4 +38,4 @@ inline std::string str_sprintf(const std::string &str, Args... args) return std::string(buffer.get(), buffer.get() + s - 1); } -#endif // BASIC_UTIL_H +#endif // STRING_HELPERS_H diff --git a/test_conformance/basic/test_astype.cpp b/test_conformance/basic/test_astype.cpp index 08a4cb85a..c29050e0a 100644 --- a/test_conformance/basic/test_astype.cpp +++ b/test_conformance/basic/test_astype.cpp @@ -23,10 +23,10 @@ #include #include "harness/conversions.h" +#include "harness/stringHelpers.h" #include "harness/typeWrappers.h" #include "procs.h" -#include "utils.h" // clang-format off diff --git a/test_conformance/commonfns/test_base.h b/test_conformance/commonfns/test_base.h index 18f7d5c9e..be36ed264 100644 --- a/test_conformance/commonfns/test_base.h +++ b/test_conformance/commonfns/test_base.h @@ -151,18 +151,6 @@ struct MixTest : BaseFunctionTest cl_int Run() override; }; -template -std::string string_format(const std::string &format, Args... args) -{ - int sformat = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; - if (sformat <= 0) - throw std::runtime_error("string_format: string processing error."); - auto format_size = static_cast(sformat); - std::unique_ptr buffer(new char[format_size]); - std::snprintf(buffer.get(), format_size, format.c_str(), args...); - return std::string(buffer.get(), buffer.get() + format_size - 1); -} - template float UlpFn(const T &val, const double &r) { if (std::is_same::value) diff --git a/test_conformance/commonfns/test_binary_fn.cpp b/test_conformance/commonfns/test_binary_fn.cpp index d280eb926..e78240a0e 100644 --- a/test_conformance/commonfns/test_binary_fn.cpp +++ b/test_conformance/commonfns/test_binary_fn.cpp @@ -22,6 +22,7 @@ #include "harness/deviceInfo.h" #include "harness/typeWrappers.h" +#include "harness/stringHelpers.h" #include "procs.h" #include "test_base.h" @@ -134,14 +135,14 @@ int test_binary_fn(cl_device_id device, cl_context context, { std::string str = binary_fn_code_pattern_v3; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), + str_sprintf(str, pragma_str.c_str(), tname.c_str(), tname.c_str(), tname.c_str(), fnName.c_str()); } else { std::string str = binary_fn_code_pattern_v3_scalar; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), + str_sprintf(str, pragma_str.c_str(), tname.c_str(), tname.c_str(), tname.c_str(), fnName.c_str()); } } @@ -149,7 +150,7 @@ int test_binary_fn(cl_device_id device, cl_context context, { // do regular std::string str = binary_fn_code_pattern; - kernelSource = string_format( + kernelSource = str_sprintf( str, pragma_str.c_str(), tname.c_str(), vecSizeNames[i], tname.c_str(), vecSecParam ? vecSizeNames[i] : "", tname.c_str(), vecSizeNames[i], fnName.c_str()); diff --git a/test_conformance/commonfns/test_mix.cpp b/test_conformance/commonfns/test_mix.cpp index ff75c3227..160cdf9bd 100644 --- a/test_conformance/commonfns/test_mix.cpp +++ b/test_conformance/commonfns/test_mix.cpp @@ -18,6 +18,8 @@ #include #include +#include "harness/stringHelpers.h" + #include "procs.h" #include "test_base.h" @@ -205,14 +207,14 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, { std::string str = mix_fn_code_pattern_v3; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), + str_sprintf(str, pragma_str.c_str(), tname.c_str(), tname.c_str(), tname.c_str(), tname.c_str()); } else { std::string str = mix_fn_code_pattern_v3_scalar; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), + str_sprintf(str, pragma_str.c_str(), tname.c_str(), tname.c_str(), tname.c_str(), tname.c_str()); } } @@ -221,7 +223,7 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, // regular path std::string str = mix_fn_code_pattern; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), + str_sprintf(str, pragma_str.c_str(), tname.c_str(), vecSizeNames[i], tname.c_str(), vecSizeNames[i], tname.c_str(), vecParam ? vecSizeNames[i] : "", tname.c_str(), vecSizeNames[i]); diff --git a/test_conformance/commonfns/test_smoothstep.cpp b/test_conformance/commonfns/test_smoothstep.cpp index d839139fe..376eb97ad 100644 --- a/test_conformance/commonfns/test_smoothstep.cpp +++ b/test_conformance/commonfns/test_smoothstep.cpp @@ -18,6 +18,8 @@ #include #include +#include "harness/stringHelpers.h" + #include "procs.h" #include "test_base.h" @@ -218,14 +220,14 @@ int test_smoothstep_fn(cl_device_id device, cl_context context, { std::string str = smoothstep_fn_code_pattern_v3; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), + str_sprintf(str, pragma_str.c_str(), tname.c_str(), tname.c_str(), tname.c_str(), tname.c_str()); } else { std::string str = smoothstep_fn_code_pattern_v3_scalar; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), + str_sprintf(str, pragma_str.c_str(), tname.c_str(), tname.c_str(), tname.c_str(), tname.c_str()); } } @@ -234,7 +236,7 @@ int test_smoothstep_fn(cl_device_id device, cl_context context, // regular path std::string str = smoothstep_fn_code_pattern; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), + str_sprintf(str, pragma_str.c_str(), tname.c_str(), vecParam ? vecSizeNames[i] : "", tname.c_str(), vecParam ? vecSizeNames[i] : "", tname.c_str(), vecSizeNames[i], tname.c_str(), vecSizeNames[i]); diff --git a/test_conformance/commonfns/test_step.cpp b/test_conformance/commonfns/test_step.cpp index 7c81f26d2..590445baf 100644 --- a/test_conformance/commonfns/test_step.cpp +++ b/test_conformance/commonfns/test_step.cpp @@ -18,6 +18,8 @@ #include #include +#include "harness/stringHelpers.h" + #include "procs.h" #include "test_base.h" @@ -175,14 +177,14 @@ int test_step_fn(cl_device_id device, cl_context context, { std::string str = step_fn_code_pattern_v3; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), + str_sprintf(str, pragma_str.c_str(), tname.c_str(), tname.c_str(), tname.c_str()); } else { std::string str = step_fn_code_pattern_v3_scalar; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), + str_sprintf(str, pragma_str.c_str(), tname.c_str(), tname.c_str(), tname.c_str()); } } @@ -191,7 +193,7 @@ int test_step_fn(cl_device_id device, cl_context context, // regular path std::string str = step_fn_code_pattern; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), + str_sprintf(str, pragma_str.c_str(), tname.c_str(), vecParam ? vecSizeNames[i] : "", tname.c_str(), vecSizeNames[i], tname.c_str(), vecSizeNames[i]); } diff --git a/test_conformance/commonfns/test_unary_fn.cpp b/test_conformance/commonfns/test_unary_fn.cpp index 25fefe0bc..d706a32ac 100644 --- a/test_conformance/commonfns/test_unary_fn.cpp +++ b/test_conformance/commonfns/test_unary_fn.cpp @@ -21,6 +21,7 @@ #include #include "harness/deviceInfo.h" +#include "harness/stringHelpers.h" #include "harness/typeWrappers.h" #include "procs.h" @@ -247,13 +248,13 @@ int test_unary_fn(cl_device_id device, cl_context context, if (i >= kVectorSizeCount) { std::string str = unary_fn_code_pattern_v3; - kernelSource = string_format(str, pragma_str.c_str(), tname.c_str(), + kernelSource = str_sprintf(str, pragma_str.c_str(), tname.c_str(), tname.c_str(), fnName.c_str()); } else { std::string str = unary_fn_code_pattern; - kernelSource = string_format(str, pragma_str.c_str(), tname.c_str(), + kernelSource = str_sprintf(str, pragma_str.c_str(), tname.c_str(), vecSizeNames[i], tname.c_str(), vecSizeNames[i], fnName.c_str()); } diff --git a/test_conformance/relationals/test_comparisons_fp.cpp b/test_conformance/relationals/test_comparisons_fp.cpp index c3d8f67a3..73ff3dd9e 100644 --- a/test_conformance/relationals/test_comparisons_fp.cpp +++ b/test_conformance/relationals/test_comparisons_fp.cpp @@ -22,6 +22,8 @@ #include #include +#include "harness/stringHelpers.h" + #include #include "test_comparisons_fp.h" @@ -83,29 +85,6 @@ extension, // clang-format on -std::string concat_kernel(const char* sstr[], int num) -{ - std::string res; - for (int i = 0; i < num; i++) res += std::string(sstr[i]); - return res; -} - -template -std::string string_format(const std::string& format, Args... args) -{ - int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) - + 1; // Extra space for '\0' - if (size_s <= 0) - { - throw std::runtime_error("Error during formatting."); - } - auto size = static_cast(size_s); - std::unique_ptr buf(new char[size]); - std::snprintf(buf.get(), size, format.c_str(), args...); - return std::string(buf.get(), - buf.get() + size - 1); // We don't want the '\0' inside -} - template bool verify(const T& A, const T& B) { return F()(A, B); @@ -226,14 +205,14 @@ int RelationalsFPTest::test_equiv_kernel(unsigned int vecSize, auto str = concat_kernel(equivTestKerPat_3, sizeof(equivTestKerPat_3) / sizeof(const char*)); - kernelSource = string_format(str, fnName.c_str(), opName.c_str()); + kernelSource = str_sprintf(str, fnName.c_str(), opName.c_str()); } else { auto str = concat_kernel(equivTestKerPatLessGreater_3, sizeof(equivTestKerPatLessGreater_3) / sizeof(const char*)); - kernelSource = string_format(str, fnName.c_str()); + kernelSource = str_sprintf(str, fnName.c_str()); } } else @@ -243,14 +222,14 @@ int RelationalsFPTest::test_equiv_kernel(unsigned int vecSize, auto str = concat_kernel(equivTestKernPat, sizeof(equivTestKernPat) / sizeof(const char*)); - kernelSource = string_format(str, fnName.c_str(), opName.c_str()); + kernelSource = str_sprintf(str, fnName.c_str(), opName.c_str()); } else { auto str = concat_kernel(equivTestKernPatLessGreater, sizeof(equivTestKernPatLessGreater) / sizeof(const char*)); - kernelSource = string_format(str, fnName.c_str()); + kernelSource = str_sprintf(str, fnName.c_str()); } } From 6de27ceeaf7c6c6f98d79bb6ec7354c1da6b9d36 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Thu, 8 Jun 2023 11:58:57 +0200 Subject: [PATCH 07/11] Added clang format for last commit --- test_conformance/commonfns/test_binary_fn.cpp | 4 ++-- test_conformance/commonfns/test_mix.cpp | 10 +++++----- test_conformance/commonfns/test_smoothstep.cpp | 10 +++++----- test_conformance/commonfns/test_step.cpp | 8 ++++---- test_conformance/commonfns/test_unary_fn.cpp | 6 +++--- 5 files changed, 19 insertions(+), 19 deletions(-) diff --git a/test_conformance/commonfns/test_binary_fn.cpp b/test_conformance/commonfns/test_binary_fn.cpp index e78240a0e..6330a04d3 100644 --- a/test_conformance/commonfns/test_binary_fn.cpp +++ b/test_conformance/commonfns/test_binary_fn.cpp @@ -136,14 +136,14 @@ int test_binary_fn(cl_device_id device, cl_context context, std::string str = binary_fn_code_pattern_v3; kernelSource = str_sprintf(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str(), fnName.c_str()); + tname.c_str(), tname.c_str(), fnName.c_str()); } else { std::string str = binary_fn_code_pattern_v3_scalar; kernelSource = str_sprintf(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str(), fnName.c_str()); + tname.c_str(), tname.c_str(), fnName.c_str()); } } else diff --git a/test_conformance/commonfns/test_mix.cpp b/test_conformance/commonfns/test_mix.cpp index 160cdf9bd..0c586fe89 100644 --- a/test_conformance/commonfns/test_mix.cpp +++ b/test_conformance/commonfns/test_mix.cpp @@ -208,14 +208,14 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, std::string str = mix_fn_code_pattern_v3; kernelSource = str_sprintf(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str(), tname.c_str()); + tname.c_str(), tname.c_str(), tname.c_str()); } else { std::string str = mix_fn_code_pattern_v3_scalar; kernelSource = str_sprintf(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str(), tname.c_str()); + tname.c_str(), tname.c_str(), tname.c_str()); } } else @@ -224,9 +224,9 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, std::string str = mix_fn_code_pattern; kernelSource = str_sprintf(str, pragma_str.c_str(), tname.c_str(), - vecSizeNames[i], tname.c_str(), vecSizeNames[i], - tname.c_str(), vecParam ? vecSizeNames[i] : "", - tname.c_str(), vecSizeNames[i]); + vecSizeNames[i], tname.c_str(), vecSizeNames[i], + tname.c_str(), vecParam ? vecSizeNames[i] : "", + tname.c_str(), vecSizeNames[i]); } const char *programPtr = kernelSource.c_str(); err = diff --git a/test_conformance/commonfns/test_smoothstep.cpp b/test_conformance/commonfns/test_smoothstep.cpp index 376eb97ad..068a33d8c 100644 --- a/test_conformance/commonfns/test_smoothstep.cpp +++ b/test_conformance/commonfns/test_smoothstep.cpp @@ -221,14 +221,14 @@ int test_smoothstep_fn(cl_device_id device, cl_context context, std::string str = smoothstep_fn_code_pattern_v3; kernelSource = str_sprintf(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str(), tname.c_str()); + tname.c_str(), tname.c_str(), tname.c_str()); } else { std::string str = smoothstep_fn_code_pattern_v3_scalar; kernelSource = str_sprintf(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str(), tname.c_str()); + tname.c_str(), tname.c_str(), tname.c_str()); } } else @@ -237,9 +237,9 @@ int test_smoothstep_fn(cl_device_id device, cl_context context, std::string str = smoothstep_fn_code_pattern; kernelSource = str_sprintf(str, pragma_str.c_str(), tname.c_str(), - vecParam ? vecSizeNames[i] : "", tname.c_str(), - vecParam ? vecSizeNames[i] : "", tname.c_str(), - vecSizeNames[i], tname.c_str(), vecSizeNames[i]); + vecParam ? vecSizeNames[i] : "", tname.c_str(), + vecParam ? vecSizeNames[i] : "", tname.c_str(), + vecSizeNames[i], tname.c_str(), vecSizeNames[i]); } const char *programPtr = kernelSource.c_str(); diff --git a/test_conformance/commonfns/test_step.cpp b/test_conformance/commonfns/test_step.cpp index 590445baf..cea3b6588 100644 --- a/test_conformance/commonfns/test_step.cpp +++ b/test_conformance/commonfns/test_step.cpp @@ -178,14 +178,14 @@ int test_step_fn(cl_device_id device, cl_context context, std::string str = step_fn_code_pattern_v3; kernelSource = str_sprintf(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str()); + tname.c_str(), tname.c_str()); } else { std::string str = step_fn_code_pattern_v3_scalar; kernelSource = str_sprintf(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str()); + tname.c_str(), tname.c_str()); } } else @@ -194,8 +194,8 @@ int test_step_fn(cl_device_id device, cl_context context, std::string str = step_fn_code_pattern; kernelSource = str_sprintf(str, pragma_str.c_str(), tname.c_str(), - vecParam ? vecSizeNames[i] : "", tname.c_str(), - vecSizeNames[i], tname.c_str(), vecSizeNames[i]); + vecParam ? vecSizeNames[i] : "", tname.c_str(), + vecSizeNames[i], tname.c_str(), vecSizeNames[i]); } const char *programPtr = kernelSource.c_str(); err = diff --git a/test_conformance/commonfns/test_unary_fn.cpp b/test_conformance/commonfns/test_unary_fn.cpp index d706a32ac..8325c6a4d 100644 --- a/test_conformance/commonfns/test_unary_fn.cpp +++ b/test_conformance/commonfns/test_unary_fn.cpp @@ -249,14 +249,14 @@ int test_unary_fn(cl_device_id device, cl_context context, { std::string str = unary_fn_code_pattern_v3; kernelSource = str_sprintf(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), fnName.c_str()); + tname.c_str(), fnName.c_str()); } else { std::string str = unary_fn_code_pattern; kernelSource = str_sprintf(str, pragma_str.c_str(), tname.c_str(), - vecSizeNames[i], tname.c_str(), - vecSizeNames[i], fnName.c_str()); + vecSizeNames[i], tname.c_str(), + vecSizeNames[i], fnName.c_str()); } /* Create kernels */ From e29b511da4d279fcd257bb64e15242604f7d54c4 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Wed, 14 Jun 2023 09:51:32 +0200 Subject: [PATCH 08/11] Corrections related to code review --- test_conformance/commonfns/test_mix.cpp | 14 +++++++------- test_conformance/commonfns/test_smoothstep.cpp | 1 + test_conformance/commonfns/test_unary_fn.cpp | 2 +- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/test_conformance/commonfns/test_mix.cpp b/test_conformance/commonfns/test_mix.cpp index 0c586fe89..b90778ac7 100644 --- a/test_conformance/commonfns/test_mix.cpp +++ b/test_conformance/commonfns/test_mix.cpp @@ -63,7 +63,7 @@ int verify_mix(const T *const inptrX, const T *const inptrY, const T *const inptrA, const T *const outptr, const int n, const int veclen, const bool vecParam) { - double r; + double r, o; float delta = 0.0f; int i; @@ -74,7 +74,9 @@ int verify_mix(const T *const inptrX, const T *const inptrY, r = conv_to_dbl(inptrX[i]) + ((conv_to_dbl(inptrY[i]) - conv_to_dbl(inptrX[i])) * conv_to_dbl(inptrA[i])); - delta = fabs(double(r - conv_to_dbl(outptr[i]))) / r; + + o = conv_to_dbl(outptr[i]); + delta = fabs(double(r - o)) / r; if (delta > MAX_ERR) { if (std::is_same::value) @@ -139,7 +141,7 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, std::vector kernels; int err, i; - MTdata d; + MTdataHolder d(gRandomSeed); assert(BaseFunctionTest::type2name.find(sizeof(T)) != BaseFunctionTest::type2name.end()); @@ -167,10 +169,9 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; } - d = init_genrand(gRandomSeed); - if (std::is_same::value) + if (std::is_same::value) { - pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; for (i = 0; i < num_elements; i++) { input_ptr[0][i] = conv_to_half((float)genrand_real1(d)); @@ -187,7 +188,6 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, input_ptr[2][i] = (T)genrand_real1(d); } } - free_mtdata(d); for (i = 0; i < 3; i++) { diff --git a/test_conformance/commonfns/test_smoothstep.cpp b/test_conformance/commonfns/test_smoothstep.cpp index 068a33d8c..db6dbedaf 100644 --- a/test_conformance/commonfns/test_smoothstep.cpp +++ b/test_conformance/commonfns/test_smoothstep.cpp @@ -127,6 +127,7 @@ int verify_smoothstep(const T *const edge0, const T *const edge1, } } + // accuracy of smoothstep for cl_khr_fp16 is implementation defined if (std::is_same::value) log_error("smoothstep half verification result, max delta: %a\n", max_delta); diff --git a/test_conformance/commonfns/test_unary_fn.cpp b/test_conformance/commonfns/test_unary_fn.cpp index 8325c6a4d..91b5c215b 100644 --- a/test_conformance/commonfns/test_unary_fn.cpp +++ b/test_conformance/commonfns/test_unary_fn.cpp @@ -231,7 +231,7 @@ int test_unary_fn(cl_device_id device, cl_context context, for (int j = 0; j < num_elements; j++) { input_ptr[j] = conv_to_half(get_random_float( - (float)(-65503.f * M_PI), (float)(65503.f * M_PI), d)); + (float)(-10000.f * M_PI), (float)(10000.f * M_PI), d)); } } From 2c25254391eb4cf2a87aa27bc8bddec3f445aba1 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Wed, 14 Jun 2023 15:45:23 +0200 Subject: [PATCH 09/11] Modified mix verification procedure for half type to only report max error --- test_conformance/commonfns/test_mix.cpp | 77 ++++++++++++------- .../commonfns/test_smoothstep.cpp | 4 +- 2 files changed, 52 insertions(+), 29 deletions(-) diff --git a/test_conformance/commonfns/test_mix.cpp b/test_conformance/commonfns/test_mix.cpp index b90778ac7..b0ffeb0af 100644 --- a/test_conformance/commonfns/test_mix.cpp +++ b/test_conformance/commonfns/test_mix.cpp @@ -64,7 +64,7 @@ int verify_mix(const T *const inptrX, const T *const inptrY, const int veclen, const bool vecParam) { double r, o; - float delta = 0.0f; + float delta = 0.f, max_delta = 0.f; int i; if (vecParam) @@ -77,19 +77,27 @@ int verify_mix(const T *const inptrX, const T *const inptrY, o = conv_to_dbl(outptr[i]); delta = fabs(double(r - o)) / r; - if (delta > MAX_ERR) + if (!std::is_same::value) { - if (std::is_same::value) - log_error("%d) verification error: mix(%a, %a, %a) = *%a " - "vs. %a\n", - i, conv_to_flt(inptrX[i]), conv_to_flt(inptrY[i]), - conv_to_flt(inptrA[i]), r, - conv_to_flt(outptr[i])); - else - log_error("%d) verification error: mix(%a, %a, %a) = *%a " - "vs. %a\n", - i, inptrX[i], inptrY[i], inptrA[i], r, outptr[i]); - return -1; + if (delta > MAX_ERR) + { + if (std::is_same::value) + log_error( + "%d) verification error: mix(%a, %a, %a) = *%a " + "vs. %a\n", + i, conv_to_flt(inptrX[i]), conv_to_flt(inptrY[i]), + conv_to_flt(inptrA[i]), r, conv_to_flt(outptr[i])); + else + log_error( + "%d) verification error: mix(%a, %a, %a) = *%a " + "vs. %a\n", + i, inptrX[i], inptrY[i], inptrA[i], r, outptr[i]); + return -1; + } + } + else + { + max_delta = std::max(max_delta, delta); } } } @@ -105,27 +113,40 @@ int verify_mix(const T *const inptrX, const T *const inptrY, + ((conv_to_dbl(inptrY[vi]) - conv_to_dbl(inptrX[vi])) * conv_to_dbl(inptrA[i])); delta = fabs(double(r - conv_to_dbl(outptr[vi]))) / r; - if (delta > MAX_ERR) + if (!std::is_same::value) { - if (std::is_same::value) - log_error( - "{%d, element %d}) verification error: mix(%a, " - "%a, %a) = *%a vs. %a\n", - ii, j, conv_to_flt(inptrX[vi]), - conv_to_flt(inptrY[vi]), conv_to_flt(inptrA[i]), r, - conv_to_flt(outptr[vi])); - else - log_error( - "{%d, element %d}) verification error: mix(%a, " - "%a, %a) = *%a vs. %a\n", - ii, j, inptrX[vi], inptrY[vi], inptrA[i], r, - outptr[vi]); - return -1; + if (delta > MAX_ERR) + { + if (std::is_same::value) + log_error( + "{%d, element %d}) verification error: mix(%a, " + "%a, %a) = *%a vs. %a\n", + ii, j, conv_to_flt(inptrX[vi]), + conv_to_flt(inptrY[vi]), conv_to_flt(inptrA[i]), + r, conv_to_flt(outptr[vi])); + else + log_error( + "{%d, element %d}) verification error: mix(%a, " + "%a, %a) = *%a vs. %a\n", + ii, j, inptrX[vi], inptrY[vi], inptrA[i], r, + outptr[vi]); + return -1; + } + } + else + { + max_delta = std::max(max_delta, delta); } } } } + // due to the fact that accuracy of mix for cl_khr_fp16 is implementation + // defined this test only reports maximum error without testing maximum + // error threshold + if (std::is_same::value) + log_error("mix half verification result, max delta: %a\n", max_delta); + return 0; } } // namespace diff --git a/test_conformance/commonfns/test_smoothstep.cpp b/test_conformance/commonfns/test_smoothstep.cpp index db6dbedaf..5afc2d0f2 100644 --- a/test_conformance/commonfns/test_smoothstep.cpp +++ b/test_conformance/commonfns/test_smoothstep.cpp @@ -127,7 +127,9 @@ int verify_smoothstep(const T *const edge0, const T *const edge1, } } - // accuracy of smoothstep for cl_khr_fp16 is implementation defined + // due to the fact that accuracy of smoothstep for cl_khr_fp16 is + // implementation defined this test only reports maximum error without + // testing maximum error threshold if (std::is_same::value) log_error("smoothstep half verification result, max delta: %a\n", max_delta); From 84f189aec06b7fcd8856478042df77b162ee219b Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Wed, 14 Jun 2023 15:59:44 +0200 Subject: [PATCH 10/11] Removed redundant condition for logging mix verification --- test_conformance/commonfns/test_mix.cpp | 32 +++++++------------------ 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/test_conformance/commonfns/test_mix.cpp b/test_conformance/commonfns/test_mix.cpp index b0ffeb0af..2a06e43df 100644 --- a/test_conformance/commonfns/test_mix.cpp +++ b/test_conformance/commonfns/test_mix.cpp @@ -81,17 +81,9 @@ int verify_mix(const T *const inptrX, const T *const inptrY, { if (delta > MAX_ERR) { - if (std::is_same::value) - log_error( - "%d) verification error: mix(%a, %a, %a) = *%a " - "vs. %a\n", - i, conv_to_flt(inptrX[i]), conv_to_flt(inptrY[i]), - conv_to_flt(inptrA[i]), r, conv_to_flt(outptr[i])); - else - log_error( - "%d) verification error: mix(%a, %a, %a) = *%a " - "vs. %a\n", - i, inptrX[i], inptrY[i], inptrA[i], r, outptr[i]); + log_error("%d) verification error: mix(%a, %a, %a) = *%a " + "vs. %a\n", + i, inptrX[i], inptrY[i], inptrA[i], r, outptr[i]); return -1; } } @@ -117,19 +109,11 @@ int verify_mix(const T *const inptrX, const T *const inptrY, { if (delta > MAX_ERR) { - if (std::is_same::value) - log_error( - "{%d, element %d}) verification error: mix(%a, " - "%a, %a) = *%a vs. %a\n", - ii, j, conv_to_flt(inptrX[vi]), - conv_to_flt(inptrY[vi]), conv_to_flt(inptrA[i]), - r, conv_to_flt(outptr[vi])); - else - log_error( - "{%d, element %d}) verification error: mix(%a, " - "%a, %a) = *%a vs. %a\n", - ii, j, inptrX[vi], inptrY[vi], inptrA[i], r, - outptr[vi]); + log_error( + "{%d, element %d}) verification error: mix(%a, " + "%a, %a) = *%a vs. %a\n", + ii, j, inptrX[vi], inptrY[vi], inptrA[i], r, + outptr[vi]); return -1; } } From 9d0a4025ccbe903e2e4f0e7d1c0c0a24a8af2e0c Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 20 Jun 2023 14:52:49 +0200 Subject: [PATCH 11/11] Corrected generator limits for half tests --- test_conformance/commonfns/test_binary_fn.cpp | 2 +- test_conformance/commonfns/test_clamp.cpp | 2 +- test_conformance/commonfns/test_step.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test_conformance/commonfns/test_binary_fn.cpp b/test_conformance/commonfns/test_binary_fn.cpp index 6330a04d3..a6c75647d 100644 --- a/test_conformance/commonfns/test_binary_fn.cpp +++ b/test_conformance/commonfns/test_binary_fn.cpp @@ -107,7 +107,7 @@ int test_binary_fn(cl_device_id device, cl_context context, } else if (std::is_same::value) { - const float fval = 0x20000000; + const float fval = CL_HALF_MAX; pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; for (int j = 0; j < num_elements; j++) { diff --git a/test_conformance/commonfns/test_clamp.cpp b/test_conformance/commonfns/test_clamp.cpp index 83e98603b..1bf406770 100644 --- a/test_conformance/commonfns/test_clamp.cpp +++ b/test_conformance/commonfns/test_clamp.cpp @@ -201,7 +201,7 @@ int test_clamp_fn(cl_device_id device, cl_context context, } else if (std::is_same::value) { - const float fval = 0x200000; + const float fval = CL_HALF_MAX; for (j = 0; j < num_elements; j++) { input_ptr[0][j] = conv_to_half(get_random_float(-fval, fval, d)); diff --git a/test_conformance/commonfns/test_step.cpp b/test_conformance/commonfns/test_step.cpp index cea3b6588..1cfa96eab 100644 --- a/test_conformance/commonfns/test_step.cpp +++ b/test_conformance/commonfns/test_step.cpp @@ -149,7 +149,7 @@ int test_step_fn(cl_device_id device, cl_context context, } else if (std::is_same::value) { - const float fval = 0x40000000; + const float fval = CL_HALF_MAX; pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; for (i = 0; i < num_elements; i++) {