Skip to content

Commit

Permalink
[SYCL] Add __imf_rcp64h to intel math libdevice (#11610)
Browse files Browse the repository at this point in the history
Some deep learning framework uses '__nv_rcp64h' in CUDA backend. We need
to provide equivalent functionality in DPC++ compiler.

---------

Signed-off-by: jinge90 <ge.jin@intel.com>
  • Loading branch information
jinge90 committed Feb 27, 2024
1 parent e72b85c commit ce70cb5
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 0 deletions.
17 changes: 17 additions & 0 deletions libdevice/imf/imf_fp64_dl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,21 @@ DEVICE_EXTERN_C_INLINE double __devicelib_imf_fmin(double a, double b) {
return __fmin(a, b);
}

DEVICE_EXTERN_C_INLINE double __devicelib_imf_rcp64h(double x) {
uint64_t x_bits = __builtin_bit_cast(uint64_t, x);
uint32_t x_exp = (x_bits & 0x7FFF'FFFF'FFFF'FFFF) >> 52;
uint64_t x_mant = (x_bits & 0x000F'FFFF'FFFF'FFFF);
if (x_exp == 0)
x = ((x_bits >> 63) ? -0.0 : 0.0);
else if ((x_exp == 0x7FF) && (x_mant != 0)) {
x_bits = x_bits & 0x7FFF'FFFF'FFFF'FFFF;
return __builtin_bit_cast(double, x_bits);
}
uint64_t temp1 = __builtin_bit_cast(uint64_t, 1.0 / x);
if (((temp1 & 0x7FFF'FFFF'FFFF'FFFF) >> 52) == 0)
return ((temp1 >> 63) ? -0.0 : 0.0);
temp1 &= 0xFFFF'FFFF'0000'0000;
return __builtin_bit_cast(double, temp1);
}

#endif /*__LIBDEVICE_IMF_ENABLED__*/
6 changes: 6 additions & 0 deletions libdevice/imf_wrapper_fp64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,12 @@ double __imf_copysign(double x, double y) {
return __devicelib_imf_copysign(x, y);
}

DEVICE_EXTERN_C_INLINE
double __devicelib_imf_rcp64h(double);

DEVICE_EXTERN_C_INLINE
double __imf_rcp64h(double x) { return __devicelib_imf_rcp64h(x); }

DEVICE_EXTERN_C_INLINE
_iml_half_internal __devicelib_imf_double2half(double);

Expand Down
1 change: 1 addition & 0 deletions llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,7 @@ SYCLDeviceLibFuncMap SDLMap = {
{"__devicelib_imf_fabs", DeviceLibExt::cl_intel_devicelib_imf_fp64},
{"__devicelib_imf_trunc", DeviceLibExt::cl_intel_devicelib_imf_fp64},
{"__devicelib_imf_rint", DeviceLibExt::cl_intel_devicelib_imf_fp64},
{"__devicelib_imf_rcp64h", DeviceLibExt::cl_intel_devicelib_imf_fp64},
{"__devicelib_imf_nearbyint", DeviceLibExt::cl_intel_devicelib_imf_fp64},
{"__devicelib_imf_inv", DeviceLibExt::cl_intel_devicelib_imf_fp64},
{"__devicelib_imf_sqrt", DeviceLibExt::cl_intel_devicelib_imf_fp64},
Expand Down
1 change: 1 addition & 0 deletions sycl/include/sycl/builtins.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,7 @@ extern __DPCPP_SYCL_EXTERNAL double __imf_floor(double x);
extern __DPCPP_SYCL_EXTERNAL double __imf_ceil(double x);
extern __DPCPP_SYCL_EXTERNAL double __imf_trunc(double x);
extern __DPCPP_SYCL_EXTERNAL double __imf_rint(double x);
extern __DPCPP_SYCL_EXTERNAL double __imf_rcp64h(double x);
extern __DPCPP_SYCL_EXTERNAL double __imf_nearbyint(double x);
extern __DPCPP_SYCL_EXTERNAL double __imf_sqrt(double x);
extern __DPCPP_SYCL_EXTERNAL double __imf_rsqrt(double x);
Expand Down
6 changes: 6 additions & 0 deletions sycl/include/sycl/ext/intel/math.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ _iml_half_internal __imf_rsqrtf16(_iml_half_internal);
float __imf_truncf(float);
double __imf_trunc(double);
_iml_half_internal __imf_truncf16(_iml_half_internal);
double __imf_rcp64h(double);
};

namespace sycl {
Expand Down Expand Up @@ -233,6 +234,11 @@ std::enable_if_t<std::is_same_v<Tp, sycl::half2>, sycl::half2> trunc(Tp x) {
return sycl::half2{trunc(x.s0()), trunc(x.s1())};
}

template <typename Tp>
std::enable_if_t<std::is_same_v<Tp, double>, double> rcp64h(Tp x) {
return __imf_rcp64h(x);
}

} // namespace ext::intel::math
} // namespace _V1
} // namespace sycl
23 changes: 23 additions & 0 deletions sycl/test-e2e/DeviceLib/imf_fp64_test2.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// REQUIRES: aspect-fp64
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

// RUN: %{build} -fno-builtin -fsycl-device-lib-jit-link -o %t.out
// RUN: %{run} %t.out
//
// UNSUPPORTED: cuda || hip
// end INTEL_CUSTOMIZATION

#include "imf_utils.hpp"
#include <sycl/ext/intel/math.hpp>

int main(int, char **) {
sycl::queue device_queue(sycl::default_selector_v);
std::initializer_list<double> input_vals = {3.0, -7.0 / 2.0, 14.0 / 15.0};
std::initializer_list<unsigned long long> ref_vals = {
0x3fd5555500000000, 0xbfd2492400000000, 0x3ff1249200000000};

test(device_queue, input_vals, ref_vals,
FT(unsigned long long, sycl::ext::intel::math::rcp64h));
std::cout << "sycl::ext::intel::math::rcp64h passes." << std::endl;
}

0 comments on commit ce70cb5

Please sign in to comment.