From ca64b3e8e595c247e006345921a1234696a8ff2f Mon Sep 17 00:00:00 2001
From: Harald van Dijk <harald.vandijk@codeplay.com>
Date: Thu, 30 May 2024 14:44:28 +0100
Subject: [PATCH] Disable implicit conversion of copysign arguments.

In binary_float.cpp, copysign is special cased. All the reference
functions there take double arguments, except for reference_copysign
which takes float. This commit copies that approach to special case
copysign in binary_double.cpp as well: all the reference functions
there take long double arguments, but this commit changes
reference_copysignl to take double. The rationale for this in
binary_float applies equally to binary_double: conversions of NAN are
not required to preserve its sign bit. On architectures where conversion
of NAN resets the sign bit, copysign fp64 would return incorrect
reference results.
---
 .../math_brute_force/binary_double.cpp        | 25 +++++++++------
 .../math_brute_force/function_list.cpp        | 16 +++++++++-
 .../math_brute_force/function_list.h          |  1 +
 .../math_brute_force/reference_math.cpp       | 32 +++++++++----------
 .../math_brute_force/reference_math.h         |  4 +--
 5 files changed, 49 insertions(+), 29 deletions(-)

diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp
index cd47c76bb4..feeedc471d 100644
--- a/test_conformance/math_brute_force/binary_double.cpp
+++ b/test_conformance/math_brute_force/binary_double.cpp
@@ -219,6 +219,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     cl_double *r;
     cl_double *s;
     cl_double *s2;
+    cl_int copysign_test = 0;
 
     Force64BitFPUPrecision();
 
@@ -377,12 +378,16 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
     if (gSkipCorrectnessTesting) return CL_SUCCESS;
 
+    if (!strcmp(name, "copysign")) copysign_test = 1;
+
+#define ref_func(s, s2) (copysign_test ? func.f_ff_d(s, s2) : func.f_ff(s, s2))
+
     // Calculate the correctly rounded reference result
     r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
     s = (cl_double *)gIn + thread_id * buffer_elements;
     s2 = (cl_double *)gIn2 + thread_id * buffer_elements;
     for (size_t j = 0; j < buffer_elements; j++)
-        r[j] = (cl_double)func.f_ff(s[j], s2[j]);
+        r[j] = (cl_double)ref_func(s[j], s2[j]);
 
     // Read the data back -- no need to wait for the first N-1 buffers but wait
     // for the last buffer. This is an in order queue.
@@ -412,7 +417,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             if (t[j] != q[j])
             {
                 cl_double test = ((cl_double *)q)[j];
-                long double correct = func.f_ff(s[j], s2[j]);
+                long double correct = ref_func(s[j], s2[j]);
                 float err = Bruteforce_Ulp_Error_Double(test, correct);
                 int fail = !(fabsf(err) <= ulps);
 
@@ -449,8 +454,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                         // retry per section 6.5.3.3
                         if (IsDoubleSubnormal(s[j]))
                         {
-                            long double correct2 = func.f_ff(0.0, s2[j]);
-                            long double correct3 = func.f_ff(-0.0, s2[j]);
+                            long double correct2 = ref_func(0.0, s2[j]);
+                            long double correct3 = ref_func(-0.0, s2[j]);
                             float err2 =
                                 Bruteforce_Ulp_Error_Double(test, correct2);
                             float err3 =
@@ -472,10 +477,10 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                             // try with both args as zero
                             if (IsDoubleSubnormal(s2[j]))
                             {
-                                correct2 = func.f_ff(0.0, 0.0);
-                                correct3 = func.f_ff(-0.0, 0.0);
-                                long double correct4 = func.f_ff(0.0, -0.0);
-                                long double correct5 = func.f_ff(-0.0, -0.0);
+                                correct2 = ref_func(0.0, 0.0);
+                                correct3 = ref_func(-0.0, 0.0);
+                                long double correct4 = ref_func(0.0, -0.0);
+                                long double correct5 = ref_func(-0.0, -0.0);
                                 err2 =
                                     Bruteforce_Ulp_Error_Double(test, correct2);
                                 err3 =
@@ -507,8 +512,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                         }
                         else if (IsDoubleSubnormal(s2[j]))
                         {
-                            long double correct2 = func.f_ff(s[j], 0.0);
-                            long double correct3 = func.f_ff(s[j], -0.0);
+                            long double correct2 = ref_func(s[j], 0.0);
+                            long double correct3 = ref_func(s[j], -0.0);
                             float err2 =
                                 Bruteforce_Ulp_Error_Double(test, correct2);
                             float err3 =
diff --git a/test_conformance/math_brute_force/function_list.cpp b/test_conformance/math_brute_force/function_list.cpp
index 917362852c..3fe67e6869 100644
--- a/test_conformance/math_brute_force/function_list.cpp
+++ b/test_conformance/math_brute_force/function_list.cpp
@@ -68,6 +68,8 @@
 #define binaryF_two_results_i NULL
 #define mad_function NULL
 
+#define reference_copysignf NULL
+#define reference_copysign NULL
 #define reference_sqrt NULL
 #define reference_sqrtl NULL
 #define reference_divide NULL
@@ -212,7 +214,19 @@ const Func functionList[] = {
     ENTRY(atan2pi, 6.0f, 6.0f, FTZ_OFF, binaryF),
     ENTRY(cbrt, 2.0f, 4.0f, FTZ_OFF, unaryF),
     ENTRY(ceil, 0.0f, 0.0f, FTZ_OFF, unaryF),
-    ENTRY(copysign, 0.0f, 0.0f, FTZ_OFF, binaryF),
+    { "copysign",
+      "copysign",
+      { (void*)reference_copysignf },
+      { (void*)reference_copysign },
+      { (void*)reference_copysignf },
+      0.0f,
+      0.0f,
+      0.0f,
+      INFINITY,
+      INFINITY,
+      FTZ_OFF,
+      RELAXED_OFF,
+      binaryF },
     ENTRY_EXT(cos, 4.0f, 4.0f, 0.00048828125f, FTZ_OFF, unaryF,
               0.00048828125f), // relaxed ulp 2^-11
     ENTRY(cosh, 4.0f, 4.0f, FTZ_OFF, unaryF),
diff --git a/test_conformance/math_brute_force/function_list.h b/test_conformance/math_brute_force/function_list.h
index 95a2945932..32e3c825e7 100644
--- a/test_conformance/math_brute_force/function_list.h
+++ b/test_conformance/math_brute_force/function_list.h
@@ -52,6 +52,7 @@ union dptr {
     long double (*f_f)(long double);
     long double (*f_u)(cl_ulong);
     int (*i_f)(long double);
+    double (*f_ff_d)(double, double);
     long double (*f_ff)(long double, long double);
     int (*i_ff)(long double, long double);
     long double (*f_fi)(long double, int);
diff --git a/test_conformance/math_brute_force/reference_math.cpp b/test_conformance/math_brute_force/reference_math.cpp
index afa072f8e0..0c69a234f7 100644
--- a/test_conformance/math_brute_force/reference_math.cpp
+++ b/test_conformance/math_brute_force/reference_math.cpp
@@ -691,7 +691,7 @@ double reference_rootn(double x, int i)
     double sign = x;
     x = reference_fabs(x);
     x = reference_exp2(reference_log2(x) / (double)i);
-    return reference_copysignd(x, sign);
+    return reference_copysign(x, sign);
 }
 
 double reference_rsqrt(double x) { return 1.0 / reference_sqrt(x); }
@@ -707,7 +707,7 @@ double reference_sinpi(double x)
         r = 1 - r;
 
     // sinPi zeros have the same sign as x
-    if (r == 0.0) return reference_copysignd(0.0, x);
+    if (r == 0.0) return reference_copysign(0.0, x);
 
     return reference_sin(r * M_PI);
 }
@@ -717,7 +717,7 @@ double reference_relaxed_sinpi(double x) { return reference_sinpi(x); }
 double reference_tanpi(double x)
 {
     // set aside the sign  (allows us to preserve sign of -0)
-    double sign = reference_copysignd(1.0, x);
+    double sign = reference_copysign(1.0, x);
     double z = reference_fabs(x);
 
     // if big and even  -- caution: only works if x only has single precision
@@ -725,7 +725,7 @@ double reference_tanpi(double x)
     {
         if (z == INFINITY) return x - x; // nan
 
-        return reference_copysignd(
+        return reference_copysign(
             0.0, x); // tanpi ( n ) is copysign( 0.0, n)  for even integers n.
     }
 
@@ -739,7 +739,7 @@ double reference_tanpi(double x)
     if ((i & 1) && z == 0.0) sign = -sign;
 
     // track changes to the sign
-    sign *= reference_copysignd(1.0, z); // really should just be an xor
+    sign *= reference_copysign(1.0, z); // really should just be an xor
     z = reference_fabs(z); // remove the sign again
 
     // reduce once more
@@ -1070,7 +1070,7 @@ int reference_signbit(float x) { return 0 != signbit(x); }
 // Missing functions for win32
 
 
-float reference_copysign(float x, float y)
+float reference_copysignf(float x, float y)
 {
     union {
         float f;
@@ -1084,7 +1084,7 @@ float reference_copysign(float x, float y)
 }
 
 
-double reference_copysignd(double x, double y)
+double reference_copysign(double x, double y)
 {
     union {
         double f;
@@ -1101,10 +1101,10 @@ double reference_copysignd(double x, double y)
 double reference_round(double x)
 {
     double absx = reference_fabs(x);
-    if (absx < 0.5) return reference_copysignd(0.0, x);
+    if (absx < 0.5) return reference_copysign(0.0, x);
 
     if (absx < HEX_DBL(+, 1, 0, +, 53))
-        x = reference_trunc(x + reference_copysignd(0.5, x));
+        x = reference_trunc(x + reference_copysign(0.5, x));
 
     return x;
 }
@@ -1115,7 +1115,7 @@ double reference_trunc(double x)
     {
         cl_long l = (cl_long)x;
 
-        return reference_copysignd((double)l, x);
+        return reference_copysign((double)l, x);
     }
 
     return x;
@@ -1132,16 +1132,16 @@ double reference_trunc(double x)
 
 double reference_cbrt(double x)
 {
-    return reference_copysignd(reference_pow(reference_fabs(x), 1.0 / 3.0), x);
+    return reference_copysign(reference_pow(reference_fabs(x), 1.0 / 3.0), x);
 }
 
 double reference_rint(double x)
 {
     if (reference_fabs(x) < HEX_DBL(+, 1, 0, +, 52))
     {
-        double magic = reference_copysignd(HEX_DBL(+, 1, 0, +, 52), x);
+        double magic = reference_copysign(HEX_DBL(+, 1, 0, +, 52), x);
         double rounded = (x + magic) - magic;
-        x = reference_copysignd(rounded, x);
+        x = reference_copysign(rounded, x);
     }
 
     return x;
@@ -1174,7 +1174,7 @@ double reference_asinh(double x)
     double absx = reference_fabs(x);
     if (absx < HEX_DBL(+, 1, 0, -, 28)) return x;
 
-    double sign = reference_copysignd(1.0, x);
+    double sign = reference_copysign(1.0, x);
 
     if (absx > HEX_DBL(+, 1, 0, +, 28))
         return sign
@@ -1206,7 +1206,7 @@ double reference_atanh(double x)
      */
     if (isnan(x)) return x + x;
 
-    double signed_half = reference_copysignd(0.5, x);
+    double signed_half = reference_copysign(0.5, x);
     x = reference_fabs(x);
     if (x > 1.0) return cl_make_nan();
 
@@ -5290,7 +5290,7 @@ double reference_pow(double x, double y)
     __log2_ep(&hi, &lo, fabsx);
     double prod = y * hi;
     double result = reference_exp2(prod);
-    return isOddInt ? reference_copysignd(result, x) : result;
+    return isOddInt ? reference_copysign(result, x) : result;
 }
 
 double reference_sqrt(double x) { return sqrt(x); }
diff --git a/test_conformance/math_brute_force/reference_math.h b/test_conformance/math_brute_force/reference_math.h
index 78b245105e..fb7ad2d7b9 100644
--- a/test_conformance/math_brute_force/reference_math.h
+++ b/test_conformance/math_brute_force/reference_math.h
@@ -87,8 +87,8 @@ double reference_acosh(double x);
 double reference_asinh(double x);
 double reference_atanh(double x);
 double reference_cbrt(double x);
-float reference_copysign(float x, float y);
-double reference_copysignd(double x, double y);
+float reference_copysignf(float x, float y);
+double reference_copysign(double x, double y);
 double reference_exp10(double);
 double reference_exp2(double x);
 double reference_expm1(double x);