Skip to content

Commit

Permalink
Add mapping to OpenCL native_* on -ffast-math
Browse files Browse the repository at this point in the history
Map HIP device builtin functions to corresponding OpenCL native
built-ins on -ffast-math. The rationale is based on `-ffast-math`
specification [1] which gives permission to approximate transcendental
functions (-fapprox-func).

[1] https://clang.llvm.org/docs/UsersManual.html#cmdoption-ffast-math
  • Loading branch information
linehill authored and pvelesko committed Dec 12, 2023
1 parent 6f602e7 commit f513b20
Show file tree
Hide file tree
Showing 4 changed files with 187 additions and 19 deletions.
91 changes: 87 additions & 4 deletions include/hip/devicelib/double_precision/dp_math.hh
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,14 @@ extern "C++" __device__ double atanh(double x); // OpenCL
extern "C++" __device__ double cbrt(double x); // OpenCL
extern "C++" __device__ double ceil(double x); // OpenCL
extern "C++" __device__ double copysign(double x, double y); // OpenCL

#ifdef __FAST_MATH__
extern "C++" __device__ double native_cos(double x); // OpenCL
extern "C++" inline __device__ double cos(double x) { return ::native_cos(x); }
#else
extern "C++" __device__ double cos(double x); // OpenCL
#endif

extern "C++" __device__ double cosh(double x); // OpenCL
extern "C++" __device__ double cospi(double x); // OpenCL

Expand Down Expand Up @@ -69,9 +76,31 @@ extern "C++" inline __device__ double erfinv(double x) {
return ::__ocml_erfinv_f64(x);
}

#ifdef __FAST_MATH__
extern "C++" __device__ double native_exp(double x); // OpenCL
extern "C++" inline __device__ double exp(double x) { return ::native_exp(x); }
#else
extern "C++" __device__ double exp(double x); // OpenCL
#endif

#ifdef __FAST_MATH__
extern "C++" __device__ double native_exp10(double x); // OpenCL
extern "C++" inline __device__ double exp10(double x) {
return ::native_exp10(x);
}
#else
extern "C++" __device__ double exp10(double x); // OpenCL
extern "C++" __device__ double exp2(double x); // OpenCL
#endif

#ifdef __FAST_MATH__
extern "C++" __device__ double native_exp2(double x); // OpenCL
extern "C++" inline __device__ double exp2(double x) {
return ::native_exp2(x);
}
#else
extern "C++" __device__ double exp2(double x); // OpenCL
#endif

extern "C++" __device__ double expm1(double x); // OpenCL
extern "C++" __device__ double fabs(double x); // OpenCL
extern "C++" __device__ double fdim(double x, double y); // OpenCL
Expand Down Expand Up @@ -116,10 +145,33 @@ extern "C++" inline __device__ long long int llround(double x) {
return ::__chip_llround_f64(x);
}

extern "C++" __device__ double log(double x); // OpenCL
#ifdef __FAST_MATH__
extern "C++" __device__ double native_log(double x); // OpenCL
extern "C++" inline __device__ double log(double x) { return ::native_log(x); }
#else
extern "C++" __device__ double log(double x); // OpenCL
#endif

#ifdef __FAST_MATH__
extern "C++" __device__ double native_log10(double x); // OpenCL
extern "C++" inline __device__ double log10(double x) {
return ::native_log10(x);
}
#else
extern "C++" __device__ double log10(double x); // OpenCL
#endif

extern "C++" __device__ double log1p(double x); // OpenCL
extern "C++" __device__ double log2(double x); // OpenCL

#ifdef __FAST_MATH__
extern "C++" __device__ double native_log2(double x); // OpenCL
extern "C++" inline __device__ double log2(double x) {
return ::native_log2(x);
}
#else
extern "C++" __device__ double log2(double x); // OpenCL
#endif

extern "C++" __device__ double logb(double x); // OpenCL

extern "C" __device__ long int __chip_lrint_f64(double x); // Custom
Expand Down Expand Up @@ -214,7 +266,15 @@ extern "C++" inline __device__ double rnorm4d(double a, double b, double c,
}

extern "C++" __device__ double round(double x); // OpenCL

#ifdef __FAST_MATH__
extern "C++" __device__ double native_rsqrt(double x); // OpenCL
extern "C++" inline __device__ double rsqrt(double x) {
return ::native_rsqrt(x);
}
#else
extern "C++" __device__ double rsqrt(double x); // OpenCL
#endif

extern "C" __device__ double __ocml_scalb_f64(double x, double n);
extern "C++" inline __device__ double scalbln(double x, long int n) {
Expand All @@ -229,7 +289,15 @@ extern "C++" inline __device__ double scalbn(double x, int n) {
}

extern "C++" __device__ int signbit ( double a ); // OpenCL

#ifdef __FAST_MATH__
extern "C++" __device__ double native_sin(double x); // OpenCL
extern "C++" inline __device__ double sin(double x) {
return ::native_sin(x);
}
#else
extern "C++" __device__ double sin(double x); // OpenCL
#endif

extern "C++" __device__ double sincos(double x, double *sptr); // OpenCL
extern "C++" inline __device__ void sincos(double x, double *sptr,
Expand All @@ -248,8 +316,23 @@ extern "C++" inline __device__ void sincospi(double x, double *sptr,

extern "C++" __device__ double sinh(double x); // OpenCL
extern "C++" __device__ double sinpi(double x); // OpenCL

#ifdef __FAST_MATH__
extern "C++" __device__ double native_sqrt(double x); // OpenCL
extern "C++" inline __device__ double sqrt(double x) {
return ::native_sqrt(x);
}
#else
extern "C++" __device__ double sqrt(double x); // OpenCL
extern "C++" __device__ double tan(double x); // OpenCL
#endif

#ifdef __FAST_MATH__
extern "C++" __device__ double native_tan(double x); // OpenCL
extern "C++" inline __device__ double tan(double x) { return ::native_tan(x); }
#else
extern "C++" __device__ double tan(double x); // OpenCL
#endif

extern "C++" __device__ double tanh(double x); // OpenCL
extern "C++" __device__ double tgamma(double x); // OpenCL
extern "C++" __device__ double trunc(double x); // OpenCL
Expand Down
107 changes: 92 additions & 15 deletions include/hip/devicelib/single_precision/sp_math.hh
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,14 @@ extern "C++" inline __device__ float copysignf(float x, float y) {
}

extern "C++" __device__ float cos(float x); // OpenCL
extern "C++" inline __device__ float cosf(float x) { return ::cos(x); }
extern "C++" __device__ float native_cos(float x); // OpenCL
extern "C++" inline __device__ float cosf(float x) {
#ifdef __FAST_MATH__
return ::native_cos(x);
#else
return ::cos(x);
#endif
}

extern "C++" __device__ float cosh(float x); // OpenCL
extern "C++" inline __device__ float coshf(float x) { return ::cosh(x); }
Expand Down Expand Up @@ -102,13 +109,34 @@ extern "C" __device__ float __ocml_erfinv_f32(float x); // OCML
extern "C++" inline __device__ float erfinvf(float x) { return ::__ocml_erfinv_f32(x); }

extern "C++" __device__ float exp10(float x); // OpenCL
extern "C++" inline __device__ float exp10f(float x) { return ::exp10(x); }
extern "C++" __device__ float native_exp10(float x); // OpenCL
extern "C++" inline __device__ float exp10f(float x) {
#ifdef __FAST_MATH__
return ::native_exp10(x);
#else
return ::exp10(x);
#endif
}

extern "C++" __device__ float exp2(float x); // OpenCL
extern "C++" inline __device__ float exp2f(float x) { return ::exp2(x); }
extern "C++" __device__ float native_exp2(float x); // OpenCL
extern "C++" inline __device__ float exp2f(float x) {
#ifdef __FAST_MATH__
return ::native_exp2(x);
#else
return ::exp2(x);
#endif
}

extern "C++" __device__ float exp(float x); // OpenCL
extern "C++" inline __device__ float expf(float x) { return ::exp(x); }
extern "C++" __device__ float native_exp(float x); // OpenCL
extern "C++" inline __device__ float expf(float x) {
#ifdef __FAST_MATH__
return ::native_exp(x);
#else
return ::exp(x);
#endif
}

extern "C++" __device__ float expm1(float x); // OpenCL
extern "C++" inline __device__ float expm1f(float x) { return ::expm1(x); }
Expand All @@ -121,12 +149,12 @@ extern "C++" inline __device__ float fdimf(float x, float y) {
return ::fdim(x, y);
}

// extern "C++" __device__ float native_divide(float x, float y); // OpenCL
extern "C++" __device__ float native_divide(float x, float y); // OpenCL
extern "C++" inline __device__ float fdividef(float x, float y) {
#ifdef CHIP_FAST_MATH // TODO check if this is correct
#ifdef __FAST_MATH__
return native_divide(x, y);
#else
return x / y;
return x / y;
#endif
}

Expand Down Expand Up @@ -206,19 +234,40 @@ extern "C++" inline __device__ long long int llroundf(float x) {
}

extern "C++" __device__ float log10(float x); // OpenCL
extern "C++" inline __device__ float log10f(float x) { return ::log10(x); }
extern "C++" __device__ float native_log10(float x); // OpenCL
extern "C++" inline __device__ float log10f(float x) {
#ifdef __FAST_MATH__
return ::native_log10(x);
#else
return ::log10(x);
#endif
}

extern "C++" __device__ float log1p(float x); // OpenCL
extern "C++" inline __device__ float log1pf(float x) { return ::log1p(x); }

extern "C++" __device__ float log2(float x); // OpenCL
extern "C++" inline __device__ float log2f(float x) { return ::log2(x); }
extern "C++" __device__ float native_log2(float x); // OpenCL
extern "C++" inline __device__ float log2f(float x) {
#ifdef __FAST_MATH__
return ::native_log2(x);
#else
return ::log2(x);
#endif
}

extern "C++" __device__ float logb(float x); // OpenCL
extern "C++" inline __device__ float logbf(float x) { return ::logb(x); }

extern "C++" __device__ float log(float x); // OpenCL
extern "C++" inline __device__ float logf(float x) { return ::log(x); }
extern "C++" __device__ float native_log(float x); // OpenCL
extern "C++" inline __device__ float logf(float x) {
#ifdef __FAST_MATH__
return ::native_log(x);
#else
return ::log(x);
#endif
}

extern "C" __device__ long int __chip_lrint_f32(float x); // Custom
extern "C++" inline __device__ long int lrintf(float x) {
Expand Down Expand Up @@ -356,8 +405,15 @@ extern "C++" inline __device__ float roundf(float x) {
return static_cast<float>(::round(x));
}

extern "C++" __device__ float rsqrt(float x); // OpenCL
extern "C++" inline __device__ float rsqrtf(float x) { return ::rsqrt(x); }
extern "C++" __device__ float rsqrt(float x); // OpenCL
extern "C++" __device__ float native_rsqrt(float x); // OpenCL
extern "C++" inline __device__ float rsqrtf(float x) {
#ifdef __FAST_MATH__
return ::native_rsqrt(x);
#else
return ::rsqrt(x);
#endif
}

extern "C" __device__ float __ocml_scalbn_f32(float x, int n); // OCML
extern "C++" inline __device__ float scalbnf(float x, int n) {
Expand Down Expand Up @@ -385,7 +441,14 @@ extern "C++" inline __device__ void sincospif(float x, float *sptr,
}

extern "C++" __device__ float sin(float x); // OpenCL
extern "C++" inline __device__ float sinf(float x) { return ::sin(x); }
extern "C++" __device__ float native_sin(float x); // OpenCL
extern "C++" inline __device__ float sinf(float x) {
#ifdef __FAST_MATH__
return ::native_sin(x);
#else
return ::sin(x);
#endif
}

extern "C++" __device__ float sinh(float x); // OpenCL
extern "C++" inline __device__ float sinhf(float x) { return ::sinh(x); }
Expand All @@ -394,10 +457,24 @@ extern "C++" __device__ float sinpi(float x); // OpenCL
extern "C++" inline __device__ float sinpif(float x) { return ::sinpi(x); }

extern "C++" __device__ float sqrt(float x); // OpenCL
extern "C++" inline __device__ float sqrtf(float x) { return ::sqrt(x); }
extern "C++" __device__ float native_sqrt(float x); // OpenCL
extern "C++" inline __device__ float sqrtf(float x) {
#ifdef __FAST_MATH__
return ::native_sqrt(x);
#else
return ::sqrt(x);
#endif
}

extern "C++" __device__ float tan(float x); // OpenCL
extern "C++" inline __device__ float tanf(float x) { return ::tan(x); }
extern "C++" __device__ float native_tan(float x); // OpenCL
extern "C++" inline __device__ float tanf(float x) {
#ifdef __FAST_MATH__
return ::native_tan(x);
#else
return ::tan(x);
#endif
}

extern "C++" __device__ float tanh(float x); // OpenCL
extern "C++" inline __device__ float tanhf(float x) { return ::tanh(x); }
Expand Down
4 changes: 4 additions & 0 deletions tests/compiler/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -113,3 +113,7 @@ add_hipcc_test(TestLdg.hip HIPCC_OPTIONS -fsyntax-only)
add_hipcc_test(TestSwitchCase.hip HIPCC_OPTIONS -O1 -c)
add_hipcc_test(TestHostSideHIPVectors.hip HIPCC_OPTIONS -fsyntax-only)
add_hipcc_test(TestAlignAttr.hip HIPCC_OPTIONS -fsyntax-only)

# Check __FAST_MATH__ is set for -ffast-math and preprocessor guards
# using it are not hiding errors.
add_hipcc_test(TestFastMath.hip HIPCC_OPTIONS -fsyntax-only -ffast-math)
4 changes: 4 additions & 0 deletions tests/compiler/TestFastMath.hip
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#include <hip/hip_runtime.h>
#ifndef __FAST_MATH__
#error "__FAST_MATH__ macro is not defined with -ffast-math"
#endif

0 comments on commit f513b20

Please sign in to comment.