Skip to content

Commit

Permalink
[SYCL] Add rcp for fp32 and fp64 with rounding mode supported (#11768)
Browse files Browse the repository at this point in the history
This PR adds frcp_rd/n/u/z and drcp_rd/n/u/z to sycl::ext::intel::math
which corresponds to CUDA math's __frcp_r* and __drcp_r*
  • Loading branch information
jinge90 authored Nov 6, 2023
1 parent 69f4e16 commit 9a4719b
Show file tree
Hide file tree
Showing 6 changed files with 115 additions and 0 deletions.
12 changes: 12 additions & 0 deletions libdevice/imf_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1931,21 +1931,33 @@ float __devicelib_imf_fdiv_rd(float, float);
DEVICE_EXTERN_C_INLINE
float __imf_fdiv_rd(float x, float y) { return __devicelib_imf_fdiv_rd(x, y); }

DEVICE_EXTERN_C_INLINE
float __imf_frcp_rd(float x) { return __devicelib_imf_fdiv_rd(1.0f, x); }

DEVICE_EXTERN_C_INLINE
float __devicelib_imf_fdiv_rn(float, float);

DEVICE_EXTERN_C_INLINE
float __imf_fdiv_rn(float x, float y) { return __devicelib_imf_fdiv_rn(x, y); }

DEVICE_EXTERN_C_INLINE
float __imf_frcp_rn(float x) { return __devicelib_imf_fdiv_rn(1.0f, x); }

DEVICE_EXTERN_C_INLINE
float __devicelib_imf_fdiv_ru(float, float);

DEVICE_EXTERN_C_INLINE
float __imf_fdiv_ru(float x, float y) { return __devicelib_imf_fdiv_ru(x, y); }

DEVICE_EXTERN_C_INLINE
float __imf_frcp_ru(float x) { return __devicelib_imf_fdiv_ru(1.0f, x); }

DEVICE_EXTERN_C_INLINE
float __devicelib_imf_fdiv_rz(float, float);

DEVICE_EXTERN_C_INLINE
float __imf_fdiv_rz(float x, float y) { return __devicelib_imf_fdiv_rz(x, y); }

DEVICE_EXTERN_C_INLINE
float __imf_frcp_rz(float x) { return __devicelib_imf_fdiv_rz(1.0f, x); }
#endif // __LIBDEVICE_IMF_ENABLED__
12 changes: 12 additions & 0 deletions libdevice/imf_wrapper_fp64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,9 @@ double __imf_ddiv_rd(double x, double y) {
return __devicelib_imf_ddiv_rd(x, y);
}

DEVICE_EXTERN_C_INLINE
double __imf_drcp_rd(double x) { return __devicelib_imf_ddiv_rd(1.0, x); }

DEVICE_EXTERN_C_INLINE
double __devicelib_imf_ddiv_rn(double, double);

Expand All @@ -490,6 +493,9 @@ double __imf_ddiv_rn(double x, double y) {
return __devicelib_imf_ddiv_rn(x, y);
}

DEVICE_EXTERN_C_INLINE
double __imf_drcp_rn(double x) { return __devicelib_imf_ddiv_rn(1.0, x); }

DEVICE_EXTERN_C_INLINE
double __devicelib_imf_ddiv_ru(double, double);

Expand All @@ -498,11 +504,17 @@ double __imf_ddiv_ru(double x, double y) {
return __devicelib_imf_ddiv_ru(x, y);
}

DEVICE_EXTERN_C_INLINE
double __imf_drcp_ru(double x) { return __devicelib_imf_ddiv_ru(1.0, x); }

DEVICE_EXTERN_C_INLINE
double __devicelib_imf_ddiv_rz(double, double);

DEVICE_EXTERN_C_INLINE
double __imf_ddiv_rz(double x, double y) {
return __devicelib_imf_ddiv_rz(x, y);
}

DEVICE_EXTERN_C_INLINE
double __imf_drcp_rz(double x) { return __devicelib_imf_ddiv_rz(1.0, x); }
#endif // __LIBDEVICE_IMF_ENABLED__
8 changes: 8 additions & 0 deletions sycl/include/sycl/builtins.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,10 @@ extern __DPCPP_SYCL_EXTERNAL float __imf_fdiv_rd(float x, float y);
extern __DPCPP_SYCL_EXTERNAL float __imf_fdiv_rn(float x, float y);
extern __DPCPP_SYCL_EXTERNAL float __imf_fdiv_ru(float x, float y);
extern __DPCPP_SYCL_EXTERNAL float __imf_fdiv_rz(float x, float y);
extern __DPCPP_SYCL_EXTERNAL float __imf_frcp_rd(float x);
extern __DPCPP_SYCL_EXTERNAL float __imf_frcp_rn(float x);
extern __DPCPP_SYCL_EXTERNAL float __imf_frcp_ru(float x);
extern __DPCPP_SYCL_EXTERNAL float __imf_frcp_rz(float x);
extern __DPCPP_SYCL_EXTERNAL int __imf_float2int_rd(float x);
extern __DPCPP_SYCL_EXTERNAL int __imf_float2int_rn(float x);
extern __DPCPP_SYCL_EXTERNAL int __imf_float2int_ru(float x);
Expand Down Expand Up @@ -336,6 +340,10 @@ extern __DPCPP_SYCL_EXTERNAL double __imf_ddiv_rd(double x, double y);
extern __DPCPP_SYCL_EXTERNAL double __imf_ddiv_rn(double x, double y);
extern __DPCPP_SYCL_EXTERNAL double __imf_ddiv_ru(double x, double y);
extern __DPCPP_SYCL_EXTERNAL double __imf_ddiv_rz(double x, double y);
extern __DPCPP_SYCL_EXTERNAL double __imf_drcp_rd(double x);
extern __DPCPP_SYCL_EXTERNAL double __imf_drcp_rn(double x);
extern __DPCPP_SYCL_EXTERNAL double __imf_drcp_ru(double x);
extern __DPCPP_SYCL_EXTERNAL double __imf_drcp_rz(double x);
extern __DPCPP_SYCL_EXTERNAL float __imf_double2float_rd(double x);
extern __DPCPP_SYCL_EXTERNAL float __imf_double2float_rn(double x);
extern __DPCPP_SYCL_EXTERNAL float __imf_double2float_ru(double x);
Expand Down
24 changes: 24 additions & 0 deletions sycl/include/sycl/ext/intel/math/imf_rounding_math.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ float __imf_fdiv_rz(float, float);
float __imf_fdiv_rn(float, float);
float __imf_fdiv_ru(float, float);
float __imf_fdiv_rd(float, float);
float __imf_frcp_rz(float);
float __imf_frcp_rn(float);
float __imf_frcp_ru(float);
float __imf_frcp_rd(float);

double __imf_dadd_rz(double, double);
double __imf_dadd_rn(double, double);
Expand All @@ -44,6 +48,10 @@ double __imf_ddiv_rz(double, double);
double __imf_ddiv_rn(double, double);
double __imf_ddiv_ru(double, double);
double __imf_ddiv_rd(double, double);
double __imf_drcp_rz(double);
double __imf_drcp_rn(double);
double __imf_drcp_ru(double);
double __imf_drcp_rd(double);
};

namespace sycl {
Expand Down Expand Up @@ -114,6 +122,14 @@ template <typename Tp = float> Tp fdiv_rz(Tp x, Tp y) {
return __imf_fdiv_rz(x, y);
}

template <typename Tp = float> Tp frcp_rd(Tp x) { return __imf_frcp_rd(x); }

template <typename Tp = float> Tp frcp_rn(Tp x) { return __imf_frcp_rn(x); }

template <typename Tp = float> Tp frcp_ru(Tp x) { return __imf_frcp_ru(x); }

template <typename Tp = float> Tp frcp_rz(Tp x) { return __imf_frcp_rz(x); }

template <typename Tp = double> Tp dadd_rd(Tp x, Tp y) {
return __imf_dadd_rd(x, y);
}
Expand Down Expand Up @@ -177,6 +193,14 @@ template <typename Tp = double> Tp ddiv_ru(Tp x, Tp y) {
template <typename Tp = double> Tp ddiv_rz(Tp x, Tp y) {
return __imf_ddiv_rz(x, y);
}

template <typename Tp = double> Tp drcp_rd(Tp x) { return __imf_drcp_rd(x); }

template <typename Tp = double> Tp drcp_rn(Tp x) { return __imf_drcp_rn(x); }

template <typename Tp = double> Tp drcp_ru(Tp x) { return __imf_drcp_ru(x); }

template <typename Tp = double> Tp drcp_rz(Tp x) { return __imf_drcp_rz(x); }
} // namespace ext::intel::math
} // namespace _V1
} // namespace sycl
30 changes: 30 additions & 0 deletions sycl/test-e2e/DeviceLib/imf_fp32_rounding_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,5 +121,35 @@ int main(int, char **) {
std::cout << "sycl::ext::intel::math::fdiv_rz passes." << std::endl;
}

{
std::initializer_list<float> input_vals = {
0x1.ba90e6p+1, 0x1.4p+1, 0x1.ea77e6p-2, 0x1.e8330ap+19,
-0x1.4ffd68p+5, -0x1.443084p-15, 0x1.605fb2p+6, -0x1.2eb718p-7};
std::initializer_list<unsigned> ref_vals_rd = {
0x3e9414f5, 0x3ecccccc, 0x40059e85, 0x35863d80,
0xbcc30db3, 0xc6ca2743, 0x3c39fbfb, 0xc2d87e72};
std::initializer_list<unsigned> ref_vals_rn = {
0x3e9414f5, 0x3ecccccd, 0x40059e85, 0x35863d80,
0xbcc30db2, 0xc6ca2743, 0x3c39fbfc, 0xc2d87e71};
std::initializer_list<unsigned> ref_vals_ru = {
0x3e9414f6, 0x3ecccccd, 0x40059e86, 0x35863d81,
0xbcc30db2, 0xc6ca2742, 0x3c39fbfc, 0xc2d87e71};
std::initializer_list<unsigned> ref_vals_rz = {
0x3e9414f5, 0x3ecccccc, 0x40059e85, 0x35863d80,
0xbcc30db2, 0xc6ca2742, 0x3c39fbfb, 0xc2d87e71};
test(device_queue, input_vals, ref_vals_rd,
FT(unsigned, sycl::ext::intel::math::frcp_rd));
std::cout << "sycl::ext::intel::math::frcp_rd passes." << std::endl;
test(device_queue, input_vals, ref_vals_rn,
FT(unsigned, sycl::ext::intel::math::frcp_rn));
std::cout << "sycl::ext::intel::math::frcp_rn passes." << std::endl;
test(device_queue, input_vals, ref_vals_ru,
FT(unsigned, sycl::ext::intel::math::frcp_ru));
std::cout << "sycl::ext::intel::math::frcp_ru passes." << std::endl;
test(device_queue, input_vals, ref_vals_rz,
FT(unsigned, sycl::ext::intel::math::frcp_rz));
std::cout << "sycl::ext::intel::math::frcp_rz passes." << std::endl;
}

return 0;
}
29 changes: 29 additions & 0 deletions sycl/test-e2e/DeviceLib/imf_fp64_rounding_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,5 +146,34 @@ int main(int, char **) {
std::cout << "sycl::ext::intel::math::ddiv_rz passes." << std::endl;
}

{
std::initializer_list<double> input_vals1 = {
0x1p+2, 0x1.fbd37afb0f8edp-1, 0x1.9238e38e38e35p+6, 0x1.7p+3};
std::initializer_list<unsigned long long> ref_vals_rd = {
0x3fd0000000000000, 0x3ff021aa6a60809b, 0x3f845de9ef97e71e,
0x3fb642c8590b2164};
std::initializer_list<unsigned long long> ref_vals_rn = {
0x3fd0000000000000, 0x3ff021aa6a60809c, 0x3f845de9ef97e71f,
0x3fb642c8590b2164};
std::initializer_list<unsigned long long> ref_vals_ru = {
0x3fd0000000000000, 0x3ff021aa6a60809c, 0x3f845de9ef97e71f,
0x3fb642c8590b2165};
std::initializer_list<unsigned long long> ref_vals_rz = {
0x3fd0000000000000, 0x3ff021aa6a60809b, 0x3f845de9ef97e71e,
0x3fb642c8590b2164};
test(device_queue, input_vals1, ref_vals_rd,
FT(unsigned long long, sycl::ext::intel::math::drcp_rd));
std::cout << "sycl::ext::intel::math::drcp_rd passes." << std::endl;
test(device_queue, input_vals1, ref_vals_rn,
FT(unsigned long long, sycl::ext::intel::math::drcp_rn));
std::cout << "sycl::ext::intel::math::drcp_rn passes." << std::endl;
test(device_queue, input_vals1, ref_vals_ru,
FT(unsigned long long, sycl::ext::intel::math::drcp_ru));
std::cout << "sycl::ext::intel::math::drcp_ru passes." << std::endl;
test(device_queue, input_vals1, ref_vals_rz,
FT(unsigned long long, sycl::ext::intel::math::drcp_rz));
std::cout << "sycl::ext::intel::math::drcp_rz passes." << std::endl;
}

return 0;
}

0 comments on commit 9a4719b

Please sign in to comment.