From 2b2a3d749a8eca7735f585fb07bb22b22bf0e31e Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Fri, 29 Mar 2024 09:58:36 -0700 Subject: [PATCH 01/79] Made preview-breaking-changes default --- .../include/sycl/detail/builtins/builtins.hpp | 245 +++++++ .../sycl/detail/generic_type_traits.hpp | 8 +- sycl/include/sycl/detail/vector_traits.hpp | 7 - sycl/include/sycl/half_type.hpp | 25 - sycl/include/sycl/types.hpp | 603 +----------------- .../Basic/sycl_2020_images/common.hpp | 2 +- .../array/read_write_unsampled_array.cpp | 4 +- .../bindless_images/bindless_helpers.hpp | 4 +- .../bindless_images/image_get_info.cpp | 4 +- .../bindless_images/mipmap/mipmap_read_1D.cpp | 4 +- .../bindless_images/mipmap/mipmap_read_2D.cpp | 4 +- .../bindless_images/mipmap/mipmap_read_3D.cpp | 4 +- sycl/test-e2e/bindless_images/read_1D.cpp | 4 +- sycl/test-e2e/bindless_images/read_2D.cpp | 4 +- .../bindless_images/read_2D_dynamic.cpp | 4 +- sycl/test-e2e/bindless_images/read_3D.cpp | 4 +- .../bindless_images/read_norm_types.cpp | 4 +- .../test-e2e/bindless_images/read_sampled.cpp | 4 +- .../bindless_images/read_write_1D.cpp | 4 +- .../read_write_1D_subregion.cpp | 4 +- .../bindless_images/read_write_2D.cpp | 4 +- .../read_write_2D_subregion.cpp | 4 +- .../bindless_images/read_write_3D.cpp | 4 +- .../read_write_3D_subregion.cpp | 4 +- .../bindless_images/read_write_unsampled.cpp | 4 +- sycl/test-e2e/bindless_images/sampling_2D.cpp | 5 +- .../sampling_2D_USM_shared.cpp | 5 +- .../bindless_images/sampling_2D_half.cpp | 5 +- sycl/test-e2e/bindless_images/sampling_3D.cpp | 4 +- .../sampling_unique_addr_modes.cpp | 4 +- .../user_types/mipmap_read_user_type_2D.cpp | 4 +- .../user_types/read_write_user_type.cpp | 4 +- .../user_types/user_types_common.hpp | 4 +- sycl/test/basic_tests/types.cpp | 19 - sycl/test/conf.txt | 1 + .../annotated_ptr/annotated_ptr.cpp | 4 - sycl/test_bfloat.cpp | 27 + 37 files changed, 369 insertions(+), 683 deletions(-) create mode 100644 sycl/include/sycl/detail/builtins/builtins.hpp create mode 100644 sycl/test/conf.txt create mode 100644 sycl/test_bfloat.cpp diff --git a/sycl/include/sycl/detail/builtins/builtins.hpp b/sycl/include/sycl/detail/builtins/builtins.hpp new file mode 100644 index 0000000000000..6ba7b50f05226 --- /dev/null +++ b/sycl/include/sycl/detail/builtins/builtins.hpp @@ -0,0 +1,245 @@ +//==------------------- builtins.hpp ---------------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Implement SYCL builtin functions. This implementation is mainly driven by the +// requirement of not including anywhere in the SYCL headers (i.e. from +// within ), because it pollutes global namespace. Note that we +// can avoid that using MSVC's STL as the pollution happens even from +// / and other headers that have to be included per the SYCL +// specification. As such, an alternative approach might be to use math +// intrinsics with GCC/clang-based compilers and use when using MSVC as +// a host compiler. That hasn't been tried/investigated. +// +// Current implementation splits builtins into several files following the SYCL +// 2020 (revision 8) split into common/math/geometric/relational/etc. functions. +// For each set, the implementation is split into a user-visible +// include/sycl/detail/builtins/*_functions.hpp providing full device-side +// implementation as well as defining user-visible APIs and defining ABI +// implemented under source/builtins/*_functions.cpp for the host side. We +// provide both scalar/vector overloads through symbols in the SYCL runtime +// library due to the limitation above (for scalars) and due to +// performance reasons for vector overloads (to be able to benefit from +// vectorization). +// +// Providing declaration for the host side symbols contained in the library +// comes with its own challenges. One is compilation time - blindly providing +// all those declarations takes significant time (about 10% slowdown for +// "clang++ -fsycl" when compiling just "#include "). Another +// issue is that return type for templates is part of the mangling (and as such +// SFINAE requirements too). To overcome that we structure host side +// implementation roughly like this (in most cases): +// +// math_function.cpp exports: +// float sycl::__sin_impl(float); +// float1 sycl::__sin_impl(float1); +// float2 sycl::__sin_impl(float2); +// ... +// /* same for other types */ +// +// math_functions.hpp provide an implementation based on the following idea (in +// ::sycl namespace): +// float sin(float x) { +// extern __sin_impl(float); +// return __sin_impl(x); +// } +// template +// enable_if_valid_type sin(T x) { +// if constexpr (marray_or_swizzle) { +// ... +// call sycl::sin(vector_or_scalar) +// } else { +// extern T __sin_impl(T); +// return __sin_impl(x); +// } +// } +// That way we avoid having the full set of explicit declaration for the symbols +// in the library and instead only pay with compile time when those template +// instantiations actually happen. + +#pragma once + +#include + +namespace sycl { +inline namespace _V1 { +namespace detail { +template +inline constexpr bool builtin_same_shape_v = + ((... && is_scalar_arithmetic_v) || (... && is_marray_v) || + (... && is_vec_or_swizzle_v)) && + (... && (num_elements::value == + num_elements::type>::value)); + +template +inline constexpr bool builtin_same_or_swizzle_v = + // Use builtin_same_shape_v to filter out types unrelated to builtins. + builtin_same_shape_v && all_same_v...>; + +namespace builtins { +#ifdef __SYCL_DEVICE_ONLY__ +template auto convert_arg(T &&x) { + using no_cv_ref = std::remove_cv_t>; + if constexpr (is_vec_v) { + using elem_type = get_elem_type_t; + using converted_elem_type = + decltype(convert_arg(std::declval())); + + constexpr auto N = no_cv_ref::size(); + using result_type = std::conditional_t; + // TODO: We should have this bit_cast impl inside vec::convert. + return bit_cast(static_cast(x)); + } else if constexpr (is_swizzle_v) { + return convert_arg(simplify_if_swizzle_t{x}); + } else { + static_assert(is_scalar_arithmetic_v || + is_multi_ptr_v || std::is_pointer_v || + std::is_same_v); + return convertToOpenCLType(std::forward(x)); + } +} + +template auto convert_result(T &&x) { + if constexpr (is_vec_v) { + return bit_cast(x); + } else { + return std::forward(x); + } +} +#endif +} // namespace builtins + +template +auto builtin_marray_impl(FuncTy F, const Ts &...x) { + using ret_elem_type = decltype(F(x[0]...)); + using T = typename first_type::type; + marray Res; + constexpr auto N = T::size(); + for (size_t I = 0; I < N / 2; ++I) { + auto PartialRes = [&]() { + using elem_ty = get_elem_type_t; + if constexpr (std::is_integral_v) + return F(to_vec2(x, I * 2) + .template as, 2>>()...); + else + return F(to_vec2(x, I * 2)...); + }(); + std::memcpy(&Res[I * 2], &PartialRes, sizeof(decltype(PartialRes))); + } + if (N % 2) + Res[N - 1] = F(x[N - 1]...); + return Res; +} + +template +auto builtin_default_host_impl(FuncTy F, const Ts &...x) { + // We implement support for marray/swizzle in the headers and export symbols + // for scalars/vector from the library binary. The reason is that scalar + // implementations mostly depend on which pollutes global namespace, + // so we can't unconditionally include it from the SYCL headers. Vector + // overloads have to be implemented in the library next to scalar overloads in + // order to be vectorizable. + if constexpr ((... || is_marray_v)) { + return builtin_marray_impl(F, x...); + } else { + return F(simplify_if_swizzle_t{x}...); + } +} + +template +auto builtin_delegate_to_scalar(FuncTy F, const Ts &...x) { + using T = typename first_type::type; + static_assert(is_vec_or_swizzle_v || is_marray_v); + + constexpr auto Size = T::size(); + using ret_elem_type = decltype(F(x[0]...)); + std::conditional_t, marray, + vec> + r{}; + + if constexpr (is_marray_v) { + for (size_t i = 0; i < Size; ++i) + r[i] = F(x[i]...); + } else { + loop([&](auto idx) { r[idx] = F(x[idx]...); }); + } + + return r; +} + +template +struct fp_elem_type + : std::bool_constant< + check_type_in_v, float, double, half>> {}; +template +struct float_elem_type + : std::bool_constant, float>> {}; + +template +struct same_basic_shape : std::bool_constant> {}; + +template +struct same_elem_type : std::bool_constant::value && + all_same_v...>> { +}; + +template struct any_shape : std::true_type {}; + +template +struct scalar_only : std::bool_constant> {}; + +template +struct non_scalar_only : std::bool_constant> {}; + +template struct default_ret_type { + using type = T; +}; + +template struct scalar_ret_type { + using type = get_elem_type_t; +}; + +template