From af65855fa6b6df0eded078bd3dbe3bf4a6a2b2e3 Mon Sep 17 00:00:00 2001 From: fineg74 <61437305+fineg74@users.noreply.github.com> Date: Mon, 13 May 2024 23:49:48 -0700 Subject: [PATCH] [SYCL][ESIMD]Replace use of intrinsics with spirv functions (#13553) --- .../ext/intel/esimd/detail/math_intrin.hpp | 46 +++++++ sycl/include/sycl/ext/intel/esimd/math.hpp | 126 ++++++++++++++---- sycl/test/esimd/math_impl.cpp | 12 +- sycl/test/esimd/sycl_half_math_ops.cpp | 2 +- 4 files changed, 152 insertions(+), 34 deletions(-) diff --git a/sycl/include/sycl/ext/intel/esimd/detail/math_intrin.hpp b/sycl/include/sycl/ext/intel/esimd/detail/math_intrin.hpp index 59228e6e85a39..e5fc036f5275c 100644 --- a/sycl/include/sycl/ext/intel/esimd/detail/math_intrin.hpp +++ b/sycl/include/sycl/ext/intel/esimd/detail/math_intrin.hpp @@ -26,6 +26,52 @@ #define __ESIMD_cpp_vec_t(T, SZ) \ __ESIMD_DNS::vector_type_t<__ESIMD_DNS::__cpp_t, SZ> +// The following spirv intrinsics declarations are put here to avoid unintended +// use by other targets where it causes run time failures due to the fact that +// they are implemented for INTEL GPU only. +template extern __DPCPP_SYCL_EXTERNAL T __spirv_ocl_native_exp2(T); +template +extern __DPCPP_SYCL_EXTERNAL __ESIMD_raw_vec_t(T, N) + __spirv_ocl_native_exp2(__ESIMD_raw_vec_t(T, N)); + +template +extern __DPCPP_SYCL_EXTERNAL T __spirv_ocl_native_recip(T); +template +extern __DPCPP_SYCL_EXTERNAL __ESIMD_raw_vec_t(T, N) + __spirv_ocl_native_recip(__ESIMD_raw_vec_t(T, N)); + +template extern __DPCPP_SYCL_EXTERNAL T __spirv_ocl_native_cos(T); +template +extern __DPCPP_SYCL_EXTERNAL __ESIMD_raw_vec_t(T, N) + __spirv_ocl_native_cos(__ESIMD_raw_vec_t(T, N)); + +template extern __DPCPP_SYCL_EXTERNAL T __spirv_ocl_native_log2(T); +template +extern __DPCPP_SYCL_EXTERNAL __ESIMD_raw_vec_t(T, N) + __spirv_ocl_native_log2(__ESIMD_raw_vec_t(T, N)); + +template +extern __DPCPP_SYCL_EXTERNAL T __spirv_ocl_native_rsqrt(T); +template +extern __DPCPP_SYCL_EXTERNAL __ESIMD_raw_vec_t(T, N) + __spirv_ocl_native_rsqrt(__ESIMD_raw_vec_t(T, N)); + +template extern __DPCPP_SYCL_EXTERNAL T __spirv_ocl_native_sin(T); +template +extern __DPCPP_SYCL_EXTERNAL __ESIMD_raw_vec_t(T, N) + __spirv_ocl_native_sin(__ESIMD_raw_vec_t(T, N)); + +template extern __DPCPP_SYCL_EXTERNAL T __spirv_ocl_native_sqrt(T); +template +extern __DPCPP_SYCL_EXTERNAL __ESIMD_raw_vec_t(T, N) + __spirv_ocl_native_sqrt(__ESIMD_raw_vec_t(T, N)); + +template +extern __DPCPP_SYCL_EXTERNAL T __spirv_ocl_native_powr(T, T); +template +__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N) + __spirv_ocl_native_powr(__ESIMD_raw_vec_t(T, N), __ESIMD_raw_vec_t(T, N)); + // saturation intrinsics template __ESIMD_INTRIN __ESIMD_raw_vec_t(T0, SZ) diff --git a/sycl/include/sycl/ext/intel/esimd/math.hpp b/sycl/include/sycl/ext/intel/esimd/math.hpp index d8d4764f5af68..bb46b4a34d534 100644 --- a/sycl/include/sycl/ext/intel/esimd/math.hpp +++ b/sycl/include/sycl/ext/intel/esimd/math.hpp @@ -102,11 +102,11 @@ __esimd_abs_common_internal(simd src0) { } template -ESIMD_NODEBUG - ESIMD_INLINE std::enable_if_t::value && - detail::is_esimd_scalar::value, - TRes> - __esimd_abs_common_internal(TArg src0) { + +__ESIMD_API std::enable_if_t::value && + detail::is_esimd_scalar::value, + TRes> +__esimd_abs_common_internal(TArg src0) { simd Src0 = src0; simd Result = __esimd_abs_common_internal(Src0); return convert(Result)[0]; @@ -342,67 +342,98 @@ std::enable_if_t::value, T>(min)(T src0, T src1, /// @addtogroup sycl_esimd_math_ext /// @{ +#if defined(__SYCL_DEVICE_ONLY__) +#define __ESIMD_VECTOR_IMPL(T, name, iname) \ + __ESIMD_DNS::vector_type_t<__ESIMD_DNS::__raw_t, N> res = \ + __spirv_ocl_native_##iname<__ESIMD_DNS::__raw_t, N>(src.data()); \ + if constexpr (std::is_same_v) \ + return res; \ + else \ + return esimd::saturate(simd(res)); +#define __ESIMD_SCALAR_IMPL(T, name, iname) \ + __ESIMD_DNS::__raw_t res = \ + __spirv_ocl_native_##iname<__ESIMD_DNS::__raw_t>(src); \ + if constexpr (std::is_same_v) \ + return res; \ + else \ + return esimd::saturate(simd(res))[0]; +#else +#define __ESIMD_VECTOR_IMPL(T, name, iname) return 0; +#define __ESIMD_SCALAR_IMPL(T, name, iname) return 0; +#endif // __SYCL_DEVICE_ONLY__ + #define __ESIMD_UNARY_INTRINSIC_DEF(COND, name, iname) \ /** Vector version. */ \ template > \ __ESIMD_API simd name(simd src, Sat sat = {}) { \ - __ESIMD_DNS::vector_type_t<__ESIMD_DNS::__raw_t, N> res = \ - __esimd_##iname(src.data()); \ - if constexpr (std::is_same_v) \ - return res; \ - else \ - return esimd::saturate(simd(res)); \ + __ESIMD_VECTOR_IMPL(T, name, iname) \ } \ \ /** Scalar version. */ \ template > \ __ESIMD_API T name(T src, Sat sat = {}) { \ - simd src_vec = src; \ - simd res = name(src_vec, sat); \ - return res[0]; \ + __ESIMD_SCALAR_IMPL(T, name, iname) \ } -#define __ESIMD_EMATH_COND \ - detail::is_generic_floating_point_v && (sizeof(T) <= 4) - #define __ESIMD_EMATH_IEEE_COND \ detail::is_generic_floating_point_v && (sizeof(T) >= 4) +#define __ESIMD_EMATH_SPIRV_COND \ + std::is_same_v || std::is_same_v + /// Inversion - calculates (1/x). Supports \c half and \c float. /// Precision: 1 ULP. -__ESIMD_UNARY_INTRINSIC_DEF(__ESIMD_EMATH_COND, inv, inv) +__ESIMD_UNARY_INTRINSIC_DEF(__ESIMD_EMATH_SPIRV_COND, inv, recip) /// Logarithm base 2. Supports \c half and \c float. /// Precision depending on argument range: /// - [0.5..2]: absolute error is 2^-21 or less /// - (0..0.5) or (2..+INF]: relative error is 2^-21 or less -__ESIMD_UNARY_INTRINSIC_DEF(__ESIMD_EMATH_COND, log2, log) +__ESIMD_UNARY_INTRINSIC_DEF(__ESIMD_EMATH_SPIRV_COND, log2, log2) /// Exponent base 2. Supports \c half and \c float. /// Precision: 4 ULP. -__ESIMD_UNARY_INTRINSIC_DEF(__ESIMD_EMATH_COND, exp2, exp) +__ESIMD_UNARY_INTRINSIC_DEF(__ESIMD_EMATH_SPIRV_COND, exp2, exp2) /// Square root. Is not IEEE754-compatible. Supports \c half, \c float and /// \c double. Precision: 4 ULP. __ESIMD_UNARY_INTRINSIC_DEF(detail::is_generic_floating_point_v, sqrt, sqrt) /// IEEE754-compliant square root. Supports \c float and \c double. -__ESIMD_UNARY_INTRINSIC_DEF(__ESIMD_EMATH_IEEE_COND, sqrt_ieee, ieee_sqrt) +template > +__ESIMD_API simd sqrt_ieee(simd src, Sat sat = {}) { + __ESIMD_DNS::vector_type_t<__ESIMD_DNS::__raw_t, N> res = + __esimd_ieee_sqrt(src.data()); + if constexpr (std::is_same_v) + return res; + else + return esimd::saturate(simd(res)); +} + +/** Scalar version. */ +template > +__ESIMD_API T sqrt_ieee(T src, Sat sat = {}) { + simd src_vec = src; + simd res = sqrt_ieee(src_vec, sat); + return res[0]; +} /// Square root reciprocal - calculates 1/sqrt(x). /// Supports \c half and \c float. /// Precision: 4 ULP. -__ESIMD_UNARY_INTRINSIC_DEF(__ESIMD_EMATH_COND, rsqrt, rsqrt) +__ESIMD_UNARY_INTRINSIC_DEF(__ESIMD_EMATH_SPIRV_COND, rsqrt, rsqrt) /// Sine. Supports \c half and \c float. /// Absolute error: \c 0.0008 or less for the range [-32767*pi, 32767*pi]. -__ESIMD_UNARY_INTRINSIC_DEF(__ESIMD_EMATH_COND, sin, sin) +__ESIMD_UNARY_INTRINSIC_DEF(__ESIMD_EMATH_SPIRV_COND, sin, sin) /// Cosine. Supports \c half and \c float. /// Absolute error: \c 0.0008 or less for the range [-32767*pi, 32767*pi]. -__ESIMD_UNARY_INTRINSIC_DEF(__ESIMD_EMATH_COND, cos, cos) +__ESIMD_UNARY_INTRINSIC_DEF(__ESIMD_EMATH_SPIRV_COND, cos, cos) template __ESIMD_API std::enable_if_t, simd> @@ -424,6 +455,8 @@ rsqrt(T src, Sat sat = {}) { } #undef __ESIMD_UNARY_INTRINSIC_DEF +#undef __ESIMD_VECTOR_IMPL +#undef __ESIMD_SCALAR_IMPL #define __ESIMD_BINARY_INTRINSIC_DEF(COND, name, iname) \ /** (vector, vector) version. */ \ @@ -457,15 +490,54 @@ rsqrt(T src, Sat sat = {}) { /// Power - calculates \c src0 in power of \c src1. Note available in DG2, PVC. /// Supports \c half and \c float. -/// TODO document accuracy etc. -__ESIMD_BINARY_INTRINSIC_DEF(__ESIMD_EMATH_COND, pow, pow) +template > +__ESIMD_API simd pow(simd src0, simd src1, Sat sat = {}) { +#if defined(__SYCL_DEVICE_ONLY__) + using RawVecT = __ESIMD_DNS::vector_type_t<__ESIMD_DNS::__raw_t, N>; + RawVecT src1_raw_conv = detail::convert_vector(src1.data()); + RawVecT res_raw = __spirv_ocl_native_powr<__ESIMD_DNS::__raw_t, N>( + src0.data(), src1_raw_conv); + if constexpr (std::is_same_v) + return res_raw; + else + return esimd::saturate(simd(res_raw)); +#else + return 0; +#endif // __SYCL_DEVICE_ONLY__ +} + +/** (vector, scalar) version. */ +template > +__ESIMD_API simd pow(simd src0, U src1, Sat sat = {}) { + return pow(src0, simd(src1), sat); +} + +/** (scalar, scalar) version. */ +template > +__ESIMD_API T pow(T src0, U src1, Sat sat = {}) { +#if defined(__SYCL_DEVICE_ONLY__) + using ResT = __ESIMD_DNS::__raw_t; + ResT src1_raw_conv = detail::convert_scalar(src1); + ResT res_raw = + __spirv_ocl_native_powr<__ESIMD_DNS::__raw_t>(src0, src1_raw_conv); + if constexpr (std::is_same_v) + return res_raw; + else + return esimd::saturate(simd(res_raw))[0]; +#else + return 0; +#endif // __SYCL_DEVICE_ONLY__ +} /// IEEE754-compliant floating-point division. Supports \c float and \c double. __ESIMD_BINARY_INTRINSIC_DEF(__ESIMD_EMATH_IEEE_COND, div_ieee, ieee_div) #undef __ESIMD_BINARY_INTRINSIC_DEF -#undef __ESIMD_EMATH_COND #undef __ESIMD_EMATH_IEEE_COND +#undef __ESIMD_EMATH_SPIRV_COND /// @} sycl_esimd_math_ext diff --git a/sycl/test/esimd/math_impl.cpp b/sycl/test/esimd/math_impl.cpp index 291cdf0aee82d..c3588264a33d3 100644 --- a/sycl/test/esimd/math_impl.cpp +++ b/sycl/test/esimd/math_impl.cpp @@ -31,13 +31,13 @@ SYCL_ESIMD_FUNCTION SYCL_EXTERNAL simd sycl_math(simd x) { SYCL_ESIMD_FUNCTION SYCL_EXTERNAL simd esimd_math(simd x) { simd v = 0; - //CHECK: call spir_func noundef <16 x float> @_Z11__esimd_cos + //CHECK: call spir_func noundef <16 x float> @_Z22__spirv_ocl_native_cos{{[^\(]*}} v = esimd::cos(x); - //CHECK: call spir_func noundef <16 x float> @_Z11__esimd_sin + //CHECK: call spir_func noundef <16 x float> @_Z22__spirv_ocl_native_sin{{[^\(]*}} v = esimd::sin(v); - //CHECK: call spir_func noundef <16 x float> @_Z11__esimd_log + //CHECK: call spir_func noundef <16 x float> @_Z23__spirv_ocl_native_log2{{[^\(]*}} v = esimd::log2(v); - //CHECK: call spir_func noundef <16 x float> @_Z11__esimd_exp + //CHECK: call spir_func noundef <16 x float> @_Z23__spirv_ocl_native_exp2{{[^\(]*}} v = esimd::exp2(v); return v; } @@ -47,9 +47,9 @@ esimd_math(simd x) { SYCL_ESIMD_FUNCTION SYCL_EXTERNAL simd esimd_math_emu(simd x) { simd v = 0; - //CHECK: call spir_func noundef <16 x float> @_Z11__esimd_log + //CHECK: call spir_func noundef <16 x float> @_Z23__spirv_ocl_native_log2{{[^\(]*}} v = esimd::log(x); - //CHECK: call spir_func noundef <16 x float> @_Z11__esimd_exp + //CHECK: call spir_func noundef <16 x float> @_Z23__spirv_ocl_native_exp2{{[^\(]*}} v = esimd::exp(v); return v; } diff --git a/sycl/test/esimd/sycl_half_math_ops.cpp b/sycl/test/esimd/sycl_half_math_ops.cpp index 6bfa67b85f741..38e645c703ebc 100644 --- a/sycl/test/esimd/sycl_half_math_ops.cpp +++ b/sycl/test/esimd/sycl_half_math_ops.cpp @@ -17,7 +17,7 @@ SYCL_EXTERNAL auto test_ext_math_op(simd val) SYCL_ESIMD_FUNCTION // CHECK: define dso_local spir_func <8 x half> @_Z16test_ext_math_op{{[^\(]*}}( // CHECK: <8 x half> %[[VAL_VEC:[a-zA-Z0-9_\.]+]]){{.*}} { return esimd::cos(val); -// CHECK: %[[RES:[a-zA-Z0-9_\.]+]] = call <8 x half> @llvm.genx.cos.v8f16(<8 x half> %[[VAL_VEC]]) +// CHECK: %[[RES:[a-zA-Z0-9_\.]+]] = call spir_func noundef <8 x half> @_Z22__spirv_ocl_native_cos{{[^\(]*}}(<8 x half> noundef %[[VAL_VEC]]) // CHECK-NEXT: ret <8 x half> %[[RES]] // CHECK-LABEL: } }