diff --git a/libdevice/imf/imf_fp64_dl.cpp b/libdevice/imf/imf_fp64_dl.cpp index 5ba57320aec97..37fbd906f71eb 100644 --- a/libdevice/imf/imf_fp64_dl.cpp +++ b/libdevice/imf/imf_fp64_dl.cpp @@ -26,4 +26,21 @@ DEVICE_EXTERN_C_INLINE double __devicelib_imf_fmin(double a, double b) { return __fmin(a, b); } +DEVICE_EXTERN_C_INLINE double __devicelib_imf_rcp64h(double x) { + uint64_t x_bits = __builtin_bit_cast(uint64_t, x); + uint32_t x_exp = (x_bits & 0x7FFF'FFFF'FFFF'FFFF) >> 52; + uint64_t x_mant = (x_bits & 0x000F'FFFF'FFFF'FFFF); + if (x_exp == 0) + x = ((x_bits >> 63) ? -0.0 : 0.0); + else if ((x_exp == 0x7FF) && (x_mant != 0)) { + x_bits = x_bits & 0x7FFF'FFFF'FFFF'FFFF; + return __builtin_bit_cast(double, x_bits); + } + uint64_t temp1 = __builtin_bit_cast(uint64_t, 1.0 / x); + if (((temp1 & 0x7FFF'FFFF'FFFF'FFFF) >> 52) == 0) + return ((temp1 >> 63) ? -0.0 : 0.0); + temp1 &= 0xFFFF'FFFF'0000'0000; + return __builtin_bit_cast(double, temp1); +} + #endif /*__LIBDEVICE_IMF_ENABLED__*/ diff --git a/libdevice/imf_wrapper_fp64.cpp b/libdevice/imf_wrapper_fp64.cpp index 7fa60f0011468..10cf98e844774 100644 --- a/libdevice/imf_wrapper_fp64.cpp +++ b/libdevice/imf_wrapper_fp64.cpp @@ -362,6 +362,12 @@ double __imf_copysign(double x, double y) { return __devicelib_imf_copysign(x, y); } +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_rcp64h(double); + +DEVICE_EXTERN_C_INLINE +double __imf_rcp64h(double x) { return __devicelib_imf_rcp64h(x); } + DEVICE_EXTERN_C_INLINE _iml_half_internal __devicelib_imf_double2half(double); diff --git a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp b/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp index 9c52b8b524dd8..8f8dd8267c771 100644 --- a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp +++ b/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp @@ -445,6 +445,7 @@ SYCLDeviceLibFuncMap SDLMap = { {"__devicelib_imf_fabs", DeviceLibExt::cl_intel_devicelib_imf_fp64}, {"__devicelib_imf_trunc", DeviceLibExt::cl_intel_devicelib_imf_fp64}, {"__devicelib_imf_rint", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_rcp64h", DeviceLibExt::cl_intel_devicelib_imf_fp64}, {"__devicelib_imf_nearbyint", DeviceLibExt::cl_intel_devicelib_imf_fp64}, {"__devicelib_imf_inv", DeviceLibExt::cl_intel_devicelib_imf_fp64}, {"__devicelib_imf_sqrt", DeviceLibExt::cl_intel_devicelib_imf_fp64}, diff --git a/sycl/include/sycl/builtins.hpp b/sycl/include/sycl/builtins.hpp index 87a4e84122d83..5af657bdf2322 100644 --- a/sycl/include/sycl/builtins.hpp +++ b/sycl/include/sycl/builtins.hpp @@ -335,6 +335,7 @@ extern __DPCPP_SYCL_EXTERNAL double __imf_floor(double x); extern __DPCPP_SYCL_EXTERNAL double __imf_ceil(double x); extern __DPCPP_SYCL_EXTERNAL double __imf_trunc(double x); extern __DPCPP_SYCL_EXTERNAL double __imf_rint(double x); +extern __DPCPP_SYCL_EXTERNAL double __imf_rcp64h(double x); extern __DPCPP_SYCL_EXTERNAL double __imf_nearbyint(double x); extern __DPCPP_SYCL_EXTERNAL double __imf_sqrt(double x); extern __DPCPP_SYCL_EXTERNAL double __imf_rsqrt(double x); diff --git a/sycl/include/sycl/ext/intel/math.hpp b/sycl/include/sycl/ext/intel/math.hpp index 372362f894cc6..1b3be573d0a16 100644 --- a/sycl/include/sycl/ext/intel/math.hpp +++ b/sycl/include/sycl/ext/intel/math.hpp @@ -54,6 +54,7 @@ _iml_half_internal __imf_rsqrtf16(_iml_half_internal); float __imf_truncf(float); double __imf_trunc(double); _iml_half_internal __imf_truncf16(_iml_half_internal); +double __imf_rcp64h(double); }; namespace sycl { @@ -233,6 +234,11 @@ std::enable_if_t, sycl::half2> trunc(Tp x) { return sycl::half2{trunc(x.s0()), trunc(x.s1())}; } +template +std::enable_if_t, double> rcp64h(Tp x) { + return __imf_rcp64h(x); +} + } // namespace ext::intel::math } // namespace _V1 } // namespace sycl diff --git a/sycl/test-e2e/DeviceLib/imf_fp64_test2.cpp b/sycl/test-e2e/DeviceLib/imf_fp64_test2.cpp new file mode 100644 index 0000000000000..e16d2efa7c874 --- /dev/null +++ b/sycl/test-e2e/DeviceLib/imf_fp64_test2.cpp @@ -0,0 +1,23 @@ +// REQUIRES: aspect-fp64 +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +// RUN: %{build} -fno-builtin -fsycl-device-lib-jit-link -o %t.out +// RUN: %{run} %t.out +// +// UNSUPPORTED: cuda || hip +// end INTEL_CUSTOMIZATION + +#include "imf_utils.hpp" +#include + +int main(int, char **) { + sycl::queue device_queue(sycl::default_selector_v); + std::initializer_list input_vals = {3.0, -7.0 / 2.0, 14.0 / 15.0}; + std::initializer_list ref_vals = { + 0x3fd5555500000000, 0xbfd2492400000000, 0x3ff1249200000000}; + + test(device_queue, input_vals, ref_vals, + FT(unsigned long long, sycl::ext::intel::math::rcp64h)); + std::cout << "sycl::ext::intel::math::rcp64h passes." << std::endl; +}