From bdb9536eca582a8530bdcb2d11f986d5d63093f5 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Tue, 6 Feb 2024 15:32:27 -0800 Subject: [PATCH] [SYCL] Split sycl::clamp implementation in two SYCL2020, revision 8 defines two separate clamp functions - once in integer functions and once in common functions. Follow the same in the implementation so that clamp's handling is uniform with other common/integer functions. --- .../sycl/detail/builtins/common_functions.inc | 25 +++---------------- .../detail/builtins/integer_functions.inc | 9 +++++++ sycl/source/builtins/common_functions.cpp | 12 ++------- sycl/source/builtins/integer_functions.cpp | 4 +++ 4 files changed, 19 insertions(+), 31 deletions(-) diff --git a/sycl/include/sycl/detail/builtins/common_functions.inc b/sycl/include/sycl/detail/builtins/common_functions.inc index fb10964934cf0..022cab78e51db 100644 --- a/sycl/include/sycl/detail/builtins/common_functions.inc +++ b/sycl/include/sycl/detail/builtins/common_functions.inc @@ -72,32 +72,15 @@ min(T x, detail::get_elem_type_t y) { detail::simplify_if_swizzle_t{y}); } -#undef BUILTIN_COMMON - -#ifdef __SYCL_DEVICE_ONLY__ -DEVICE_IMPL_TEMPLATE(THREE_ARGS, clamp, builtin_enable_generic_t, - [](auto... xs) { - using ElemTy = detail::get_elem_type_t; - if constexpr (std::is_integral_v) { - if constexpr (std::is_signed_v) { - return __spirv_ocl_s_clamp(xs...); - } else { - return __spirv_ocl_u_clamp(xs...); - } - } else { - return __spirv_ocl_fclamp(xs...); - } - }) -#else -HOST_IMPL_TEMPLATE(THREE_ARGS, clamp, builtin_enable_generic_t, common, - default_ret_type) -#endif +BUILTIN_COMMON(THREE_ARGS, clamp, __spirv_ocl_fclamp) template -detail::builtin_enable_generic_non_scalar_t +detail::builtin_enable_common_non_scalar_t clamp(T x, detail::get_elem_type_t y, detail::get_elem_type_t z) { return clamp(detail::simplify_if_swizzle_t{x}, detail::simplify_if_swizzle_t{y}, detail::simplify_if_swizzle_t{z}); } + +#undef BUILTIN_COMMON } // namespace _V1 } // namespace sycl diff --git a/sycl/include/sycl/detail/builtins/integer_functions.inc b/sycl/include/sycl/detail/builtins/integer_functions.inc index dfeb815e52494..455abf07aa250 100644 --- a/sycl/include/sycl/detail/builtins/integer_functions.inc +++ b/sycl/include/sycl/detail/builtins/integer_functions.inc @@ -122,6 +122,15 @@ min(T x, detail::get_elem_type_t y) { detail::simplify_if_swizzle_t{y}); } +BUILTIN_GENINT_SU(THREE_ARGS, clamp) +template +detail::builtin_enable_integer_non_scalar_t +clamp(T x, detail::get_elem_type_t y, detail::get_elem_type_t z) { + return clamp(detail::simplify_if_swizzle_t{x}, + detail::simplify_if_swizzle_t{y}, + detail::simplify_if_swizzle_t{z}); +} + BUILTIN_GENINT(ONE_ARG, clz) BUILTIN_GENINT(ONE_ARG, ctz) BUILTIN_GENINT(ONE_ARG, popcount) diff --git a/sycl/source/builtins/common_functions.cpp b/sycl/source/builtins/common_functions.cpp index 09742649ca24d..30d0645f2b517 100644 --- a/sycl/source/builtins/common_functions.cpp +++ b/sycl/source/builtins/common_functions.cpp @@ -63,16 +63,8 @@ BUILTIN_COMMON(TWO_ARGS, max, BUILTIN_COMMON(TWO_ARGS, min, [](auto x, auto y) -> decltype(x) { return (y < x ? y : x); }) -// clamp is implemented for INTEGER_TYPES as well, so expand/inline -// BUILTIN_COMMON manually. -HOST_IMPL(clamp, [](auto x, auto y, auto z) -> decltype(x) { - using ElemTy = detail::get_elem_type_t; - if constexpr (std::is_integral_v) { - return std::min(std::max(x, y), z); - } else { - return std::fmin(std::fmax(x, y), z); - } +BUILTIN_COMMON(THREE_ARGS, clamp, [](auto x, auto y, auto z) -> decltype(x) { + return std::fmin(std::fmax(x, y), z); }) -EXPORT_SCALAR_AND_VEC_1_16(THREE_ARGS, clamp, INTEGER_TYPES, FP_TYPES) } // namespace _V1 } // namespace sycl diff --git a/sycl/source/builtins/integer_functions.cpp b/sycl/source/builtins/integer_functions.cpp index 26c4dd9a5788f..381d6f1fa0a10 100644 --- a/sycl/source/builtins/integer_functions.cpp +++ b/sycl/source/builtins/integer_functions.cpp @@ -214,6 +214,10 @@ BUILTIN_GENINT_SU(TWO_ARGS, max, BUILTIN_GENINT_SU(TWO_ARGS, min, [](auto x, auto y) -> decltype(x) { return y < x ? y : x; }) +BUILTIN_GENINT_SU(THREE_ARGS, clamp, [](auto x, auto y, auto z) -> decltype(x) { + return std::min(std::max(x, y), z); +}) + template static inline constexpr T __clz_impl(T x, T m, T n = 0) { return (x & m) ? n : __clz_impl(x, T(m >> 1), ++n); }