Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CHIP_ERROR_ON_FAILING_DEVICE_MATH #900

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ option(CHIP_USE_INTEL_USM "When enabled, cl_intel_unified_shared_memory extensio
option(CHIP_BUILD_HIPBLAS "When enabled, hipBLAS will be built and installed along chipStar" OFF)
option(CATCH2_DISCOVER_TESTS_COMPILE_TIME "Discover the tests at compile time" ON)
option(CHIP_SKIP_TESTS_WITH_DOUBLES "Skip tests where kernels use doubles." OFF)
option(CHIP_ERROR_ON_FAILING_DEVICE_MATH "Error on failing device math tests" ON)

# This mitigation might be necessary on some systems with an older runtime.
# This mitigation makes memory resident (disable swapping) on the GPU
Expand Down Expand Up @@ -819,4 +820,4 @@ endif()
include(cmake/docker.cmake)

add_subdirectory(host_math_funcs)
target_link_libraries(CHIP PUBLIC ocml_host_math_funcs)
target_link_libraries(CHIP PUBLIC ocml_host_math_funcs)
2 changes: 2 additions & 0 deletions chipStarConfig.hh.in
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@

#cmakedefine CHIP_FAST_MATH

#cmakedefine CHIP_ERROR_ON_FAILING_DEVICE_MATH

#cmakedefine CHIP_ERROR_IF_NOT_IMPLEMENTED

#cmakedefine CHIP_DEFAULT_JIT_FLAGS "@CHIP_DEFAULT_JIT_FLAGS@"
Expand Down
31 changes: 31 additions & 0 deletions include/hip/devicelib/double_precision/dp_math.hh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,20 @@

#include <hip/devicelib/macros.hh>

#ifdef CHIP_ERROR_ON_FAILING_DEVICE_MATH
// Temporarily include this header to abort on failing math functions
#if defined(__clang__) && defined(__HIP__)
extern "C" {
// A global flag included in all HIP device modules for signaling
// abort request.
extern __attribute__((weak)) __device__ int32_t __chipspv_abort_called;
extern __device__ void __chipspv_abort(int32_t *abort_flag);
}
extern "C" __device__ int printf(const char *fmt, ...)
__attribute__((format(printf, 1, 2)));
#endif
#endif

#if defined __has_builtin && __has_builtin(__builtin_acos)
// Must use 'static' here for the HIP built-ins mapped to compiler
// built-ins where the HIP built-ins' signature coincides with OpenCL
Expand Down Expand Up @@ -126,6 +140,9 @@ extern "C++" __device__ double erfc(double x);

extern "C" __device__ double __ocml_erfcinv_f64(double x); // OCML
extern "C++" inline __device__ double erfcinv(double x) {
#ifdef CHIP_ERROR_ON_FAILING_DEVICE_MATH
printf("Error: erfcinv is known to give bad results\n");
#endif
return ::__ocml_erfcinv_f64(x);
}

Expand Down Expand Up @@ -349,6 +366,9 @@ extern "C++" inline __device__ double normcdf(double x) {

extern "C" __device__ double __ocml_ncdfinv_f64(double x); // OCML
extern "C++" inline __device__ double normcdfinv(double x) {
#ifdef CHIP_ERROR_ON_FAILING_DEVICE_MATH
printf("Error: normcdfinv is known to give bad results\n");
#endif
return ::__ocml_ncdfinv_f64(x);
}

Expand All @@ -362,6 +382,9 @@ extern "C++" __device__ double pow(double x, double y); // OpenCL

extern "C" __device__ double __ocml_rcbrt_f64(double x); // OCML
extern "C++" inline __device__ double rcbrt(double x) {
#ifdef CHIP_ERROR_ON_FAILING_DEVICE_MATH
printf("Error: rcbrt is known to give bad results\n");
#endif
return ::__ocml_rcbrt_f64(x);
}

Expand Down Expand Up @@ -408,9 +431,17 @@ extern "C++" inline __device__ double rsqrt(double x) {
return ::native_rsqrt(x);
}
#else
#ifdef CHIP_ERROR_ON_FAILING_DEVICE_MATH
inline __device__ double rsqrt(double x) {// OpenCL
printf("Error: rsqrt is known to give bad results\n");
return 0;
}
#else
extern "C++" __device__ double rsqrt(double x); // OpenCL
#endif

#endif

extern "C" __device__ double __ocml_scalb_f64(double x, double n);
extern "C++" inline __device__ double scalbln(double x, long int n) {
// No implementatin for scalbln(double, long) in OCML so promote 'n'
Expand Down
39 changes: 36 additions & 3 deletions include/hip/devicelib/single_precision/sp_math.hh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,20 @@

#include <hip/devicelib/macros.hh>

#ifdef CHIP_ERROR_ON_FAILING_DEVICE_MATH
// Temporarily include this header to abort on failing math functions
#if defined(__clang__) && defined(__HIP__)
extern "C" {
// A global flag included in all HIP device modules for signaling
// abort request.
extern __attribute__((weak)) __device__ int32_t __chipspv_abort_called;
extern __device__ void __chipspv_abort(int32_t *abort_flag);
}
extern "C" __device__ int printf(const char *fmt, ...)
__attribute__((format(printf, 1, 2)));
#endif
#endif

/**
* @brief Declare as extern - we state that these funcitons are implemented and
* will be found at link time
Expand Down Expand Up @@ -129,7 +143,11 @@ extern "C++" inline __device__ float erfcf(float x) { return ::erfc(x); }
#endif

extern "C" __device__ float __ocml_erfcinv_f32(float x); // OCML
extern "C++" inline __device__ float erfcinvf(float x) { return ::__ocml_erfcinv_f32(x); }
extern "C++" inline __device__ float erfcinvf(float x) {
#ifdef CHIP_ERROR_ON_FAILING_DEVICE_MATH
printf("Error: erfcinvf is known to give bad results\n");
#endif
return ::__ocml_erfcinv_f32(x); }

extern "C" __device__ float __ocml_erfcx_f32(float x); // OCML
extern "C++" inline __device__ float erfcxf(float x) { return ::__ocml_erfcx_f32(x); }
Expand All @@ -138,7 +156,11 @@ extern "C++" __device__ float erf(float x); // OpenCL
extern "C++" inline __device__ float erff(float x) { return ::erf(x); }

extern "C" __device__ float __ocml_erfinv_f32(float x); // OCML
extern "C++" inline __device__ float erfinvf(float x) { return ::__ocml_erfinv_f32(x); }
extern "C++" inline __device__ float erfinvf(float x) {
#ifdef CHIP_ERROR_ON_FAILING_DEVICE_MATH
printf("Error: erfinvf is known to give bad results\n");
#endif
return ::__ocml_erfinv_f32(x); }

extern "C++" __device__ float exp10(float x); // OpenCL
extern "C++" __device__ float native_exp10(float x); // OpenCL
Expand Down Expand Up @@ -424,6 +446,9 @@ extern "C++" inline __device__ float normcdff(float x) { return ::__ocml_ncdf_f3

extern "C" __device__ float __ocml_ncdfinv_f32(float x); // OCML
extern "C++" inline __device__ float normcdfinvf(float x) {
#ifdef CHIP_ERROR_ON_FAILING_DEVICE_MATH
printf("Error: normcdfinvf is known to give bad results\n");
#endif
return ::__ocml_ncdfinv_f32(x);
}

Expand All @@ -449,7 +474,12 @@ extern "C++" inline __device__ float powf(float x, float y) {
#endif

extern "C" __device__ float __ocml_rcbrt_f32(float x); // OCML
extern "C++" inline __device__ float rcbrtf(float x) { return ::__ocml_rcbrt_f32(x); }
extern "C++" inline __device__ float rcbrtf(float x) {
#ifdef CHIP_ERROR_ON_FAILING_DEVICE_MATH
printf("Error: rcbrtf is known to give bad results\n");
#endif
return ::__ocml_rcbrt_f32(x);
}

extern "C++" __device__ float remainder(float x, float y); // OpenCL
extern "C++" inline __device__ float remainderf(float x, float y) {
Expand Down Expand Up @@ -508,6 +538,9 @@ extern "C++" inline __device__ float roundf(float x) {
extern "C++" __device__ float rsqrt(float x); // OpenCL
extern "C++" __device__ float native_rsqrt(float x); // OpenCL
extern "C++" inline __device__ float rsqrtf(float x) {
#ifdef CHIP_ERROR_ON_FAILING_DEVICE_MATH
printf("Error: rsqrtf is known to give bad results\n");
#endif
#ifdef __FAST_MATH__
return ::native_rsqrt(x);
#else
Expand Down
Loading