diff --git a/sycl/include/sycl/builtins_utils_vec.hpp b/sycl/include/sycl/builtins_utils_vec.hpp index eeaff9450b031..955a9875105ad 100644 --- a/sycl/include/sycl/builtins_utils_vec.hpp +++ b/sycl/include/sycl/builtins_utils_vec.hpp @@ -25,14 +25,11 @@ struct is_valid_elem_type, Ts...> template struct is_valid_elem_type, Ts...> : std::bool_constant> {}; -template class OperationCurrentT, int... Indexes, +template -struct is_valid_elem_type, +struct is_valid_elem_type, Ts...> - : std::bool_constant> { -}; + : std::bool_constant> {}; template struct is_valid_elem_type, Ts...> @@ -48,10 +45,8 @@ template struct num_elements> : std::integral_constant {}; template struct num_elements> : std::integral_constant {}; -template class OperationCurrentT, int... Indexes> -struct num_elements> +template +struct num_elements> : std::integral_constant {}; // Utilty trait for checking that the number of elements in T is in Ns. @@ -64,11 +59,9 @@ constexpr bool is_valid_size_v = is_valid_size::value; // Utility for converting a swizzle to a vector or preserve the type if it isn't // a swizzle. -template class OperationCurrentT, int... Indexes> -struct simplify_if_swizzle> { - using type = vec; +template +struct simplify_if_swizzle> { + using type = vec; }; template @@ -83,14 +76,11 @@ template struct same_size_signed_int> { template struct same_size_signed_int> { using type = vec::type, N>; }; -template class OperationCurrentT, int... Indexes> -struct same_size_signed_int> { +template +struct same_size_signed_int> { // Converts to vec for simplicity. using type = - vec::type, - sizeof...(Indexes)>; + vec::type, sizeof...(Indexes)>; }; template struct same_size_unsigned_int> { @@ -99,14 +89,11 @@ template struct same_size_unsigned_int> { template struct same_size_unsigned_int> { using type = vec::type, N>; }; -template class OperationCurrentT, int... Indexes> -struct same_size_unsigned_int> { +template +struct same_size_unsigned_int> { // Converts to vec for simplicity. using type = - vec::type, - sizeof...(Indexes)>; + vec::type, sizeof...(Indexes)>; }; // Utility trait for changing the element type of a type T. If T is a scalar, @@ -122,16 +109,11 @@ template struct change_elements> { using type = vec::type, N>; }; -template class OperationCurrentT, - int... Indexes> -struct change_elements> { +template +struct change_elements> { // Converts to vec for simplicity. using type = - vec::type, - sizeof...(Indexes)>; + vec::type, sizeof...(Indexes)>; }; template diff --git a/sycl/include/sycl/detail/builtins/helper_macros.hpp b/sycl/include/sycl/detail/builtins/helper_macros.hpp index 6469f35da3e6c..2234e1e1ca217 100644 --- a/sycl/include/sycl/detail/builtins/helper_macros.hpp +++ b/sycl/include/sycl/detail/builtins/helper_macros.hpp @@ -192,12 +192,14 @@ template \ detail::ENABLER(NAME)( \ NUM_ARGS##_TEMPLATE_TYPE_ARG) { \ + /* vec(vector_t) is explicit, cannot rely on implicit conversion: */ \ + using ret_type = detail::ENABLER; \ if constexpr (detail::is_marray_v) { \ - return detail::DELEGATOR( \ + return ret_type{detail::DELEGATOR( \ [](NUM_ARGS##_AUTO_ARG) { return (NS::NAME)(NUM_ARGS##_ARG); }, \ - NUM_ARGS##_ARG); \ + NUM_ARGS##_ARG)}; \ } else { \ - return __VA_ARGS__(NUM_ARGS##_CONVERTED_ARG); \ + return ret_type{__VA_ARGS__(NUM_ARGS##_CONVERTED_ARG)}; \ } \ } diff --git a/sycl/include/sycl/detail/builtins/math_functions.inc b/sycl/include/sycl/detail/builtins/math_functions.inc index 8a5ff1b1e47ab..106d706ed70a0 100644 --- a/sycl/include/sycl/detail/builtins/math_functions.inc +++ b/sycl/include/sycl/detail/builtins/math_functions.inc @@ -254,7 +254,10 @@ auto builtin_delegate_ptr_impl(FuncTy F, PtrTy p, Ts... xs) { detail::NON_SCALAR_ENABLER \ NAME(SYCL_CONCAT(LESS_ONE(NUM_ARGS), _TEMPLATE_TYPE_ARG), PtrTy p) { \ - return detail::NAME##_impl(SYCL_CONCAT(LESS_ONE(NUM_ARGS), _ARG), p); \ + using ret_ty = detail::NON_SCALAR_ENABLER< \ + SYCL_CONCAT(LESS_ONE(NUM_ARGS), _TEMPLATE_TYPE), PtrTy>; \ + return ret_ty{ \ + detail::NAME##_impl(SYCL_CONCAT(LESS_ONE(NUM_ARGS), _ARG), p)}; \ } #if __SYCL_DEVICE_ONLY__ diff --git a/sycl/include/sycl/detail/image_accessor_util.hpp b/sycl/include/sycl/detail/image_accessor_util.hpp index 49dacd704a748..db7adee9e3932 100644 --- a/sycl/include/sycl/detail/image_accessor_util.hpp +++ b/sycl/include/sycl/detail/image_accessor_util.hpp @@ -356,7 +356,7 @@ void convertReadData(const vec PixelData, // Assuming: (float)c / 31.0f; c represents the 5-bit integer. // (float)c / 63.0f; c represents the 6-bit integer. // PixelData.x will be of type std::uint16_t. - ushort4 Temp(PixelData.x()); + ushort4 Temp(PixelData[0]); ushort4 MaskBits(0xF800 /*r:bits 11-15*/, 0x07E0 /*g:bits 5-10*/, 0x001F /*b:bits 0-4*/, 0x0000); ushort4 ShiftBits(11, 5, 0, 0); @@ -372,7 +372,7 @@ void convertReadData(const vec PixelData, // Extracting each 5-bit channel data. // PixelData.x will be of type std::uint16_t. - ushort4 Temp(PixelData.x()); + ushort4 Temp(PixelData[0]); ushort4 MaskBits(0x7C00 /*r:bits 10-14*/, 0x03E0 /*g:bits 5-9*/, 0x001F /*b:bits 0-4*/, 0x0000); ushort4 ShiftBits(10, 5, 0, 0); @@ -383,7 +383,7 @@ void convertReadData(const vec PixelData, case image_channel_type::unorm_int_101010: { // Extracting each 10-bit channel data. // PixelData.x will be of type std::uint32_t. - uint4 Temp(PixelData.x()); + uint4 Temp(PixelData[0]); uint4 MaskBits(0x3FF00000 /*r:bits 20-29*/, 0x000FFC00 /*g:bits 10-19*/, 0x000003FF /*b:bits 0-9*/, 0x00000000); uint4 ShiftBits(20, 10, 0, 0); diff --git a/sycl/include/sycl/detail/named_swizzles_mixin.hpp b/sycl/include/sycl/detail/named_swizzles_mixin.hpp new file mode 100644 index 0000000000000..c7bee0d1b7bb1 --- /dev/null +++ b/sycl/include/sycl/detail/named_swizzles_mixin.hpp @@ -0,0 +1,823 @@ +//==---------------- named_swizzles_mixin.hpp ------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +// This files implements two mixins +// `NamedSwizzlesMixinConst`/`NamedSwizzlesMixinBoth` that abstract away named +// swizzles implementation for SYCL vector and swizzles classes + +#include + +namespace sycl { +inline namespace _V1 { +namespace detail { + +#ifndef SYCL_SIMPLE_SWIZZLES +#define __SYCL_SWIZZLE_MIXIN_SIMPLE_SWIZZLES +#else +// TODO: It might be beneficial to use partial specializations for different Ns, +// instead of making all the named swizzles templates with SFINAE conditions. +#define __SYCL_SWIZZLE_MIXIN_SIMPLE_SWIZZLES \ + /* __swizzled_vec__ XYZW_SWIZZLE() const; */ \ + __SYCL_SWIZZLE_MIXIN_METHOD(N <= 4, xx, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, xy, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xz, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xw, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, yx, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, yy, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yz, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yw, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zx, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zy, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zz, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zw, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wx, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wy, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wz, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ww, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N <= 4, xxx, 0, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, xxy, 0, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xxz, 0, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xxw, 0, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, xyx, 0, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, xyy, 0, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xyz, 0, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xyw, 0, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xzx, 0, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xzy, 0, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xzz, 0, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xzw, 0, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwx, 0, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwy, 0, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwz, 0, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xww, 0, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, yxx, 1, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, yxy, 1, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yxz, 1, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yxw, 1, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, yyx, 1, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, yyy, 1, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yyz, 1, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yyw, 1, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yzx, 1, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yzy, 1, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yzz, 1, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yzw, 1, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywx, 1, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywy, 1, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywz, 1, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yww, 1, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zxx, 2, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zxy, 2, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zxz, 2, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zxw, 2, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zyx, 2, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zyy, 2, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zyz, 2, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zyw, 2, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zzx, 2, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zzy, 2, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zzz, 2, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zzw, 2, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwx, 2, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwy, 2, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwz, 2, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zww, 2, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxx, 3, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxy, 3, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxz, 3, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxw, 3, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyx, 3, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyy, 3, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyz, 3, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyw, 3, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzx, 3, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzy, 3, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzz, 3, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzw, 3, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwx, 3, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwy, 3, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwz, 3, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, www, 3, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N <= 4, xxxx, 0, 0, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, xxxy, 0, 0, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xxxz, 0, 0, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xxxw, 0, 0, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, xxyx, 0, 0, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, xxyy, 0, 0, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xxyz, 0, 0, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xxyw, 0, 0, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xxzx, 0, 0, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xxzy, 0, 0, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xxzz, 0, 0, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xxzw, 0, 0, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xxwx, 0, 0, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xxwy, 0, 0, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xxwz, 0, 0, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xxww, 0, 0, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, xyxx, 0, 1, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, xyxy, 0, 1, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xyxz, 0, 1, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xyxw, 0, 1, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, xyyx, 0, 1, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, xyyy, 0, 1, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xyyz, 0, 1, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xyyw, 0, 1, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xyzx, 0, 1, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xyzy, 0, 1, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xyzz, 0, 1, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xyzw, 0, 1, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xywx, 0, 1, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xywy, 0, 1, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xywz, 0, 1, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xyww, 0, 1, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xzxx, 0, 2, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xzxy, 0, 2, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xzxz, 0, 2, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xzxw, 0, 2, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xzyx, 0, 2, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xzyy, 0, 2, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xzyz, 0, 2, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xzyw, 0, 2, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xzzx, 0, 2, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xzzy, 0, 2, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xzzz, 0, 2, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xzzw, 0, 2, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xzwx, 0, 2, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xzwy, 0, 2, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xzwz, 0, 2, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xzww, 0, 2, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwxx, 0, 3, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwxy, 0, 3, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwxz, 0, 3, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwxw, 0, 3, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwyx, 0, 3, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwyy, 0, 3, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwyz, 0, 3, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwyw, 0, 3, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwzx, 0, 3, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwzy, 0, 3, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwzz, 0, 3, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwzw, 0, 3, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwwx, 0, 3, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwwy, 0, 3, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwwz, 0, 3, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwww, 0, 3, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, yxxx, 1, 0, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, yxxy, 1, 0, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yxxz, 1, 0, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yxxw, 1, 0, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, yxyx, 1, 0, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, yxyy, 1, 0, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yxyz, 1, 0, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yxyw, 1, 0, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yxzx, 1, 0, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yxzy, 1, 0, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yxzz, 1, 0, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yxzw, 1, 0, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yxwx, 1, 0, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yxwy, 1, 0, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yxwz, 1, 0, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yxww, 1, 0, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, yyxx, 1, 1, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, yyxy, 1, 1, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yyxz, 1, 1, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yyxw, 1, 1, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, yyyx, 1, 1, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, yyyy, 1, 1, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yyyz, 1, 1, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yyyw, 1, 1, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yyzx, 1, 1, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yyzy, 1, 1, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yyzz, 1, 1, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yyzw, 1, 1, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yywx, 1, 1, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yywy, 1, 1, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yywz, 1, 1, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yyww, 1, 1, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yzxx, 1, 2, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yzxy, 1, 2, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yzxz, 1, 2, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yzxw, 1, 2, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yzyx, 1, 2, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yzyy, 1, 2, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yzyz, 1, 2, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yzyw, 1, 2, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yzzx, 1, 2, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yzzy, 1, 2, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yzzz, 1, 2, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yzzw, 1, 2, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yzwx, 1, 2, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yzwy, 1, 2, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yzwz, 1, 2, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yzww, 1, 2, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywxx, 1, 3, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywxy, 1, 3, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywxz, 1, 3, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywxw, 1, 3, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywyx, 1, 3, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywyy, 1, 3, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywyz, 1, 3, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywyw, 1, 3, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywzx, 1, 3, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywzy, 1, 3, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywzz, 1, 3, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywzw, 1, 3, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywwx, 1, 3, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywwy, 1, 3, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywwz, 1, 3, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywww, 1, 3, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zxxx, 2, 0, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zxxy, 2, 0, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zxxz, 2, 0, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zxxw, 2, 0, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zxyx, 2, 0, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zxyy, 2, 0, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zxyz, 2, 0, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zxyw, 2, 0, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zxzx, 2, 0, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zxzy, 2, 0, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zxzz, 2, 0, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zxzw, 2, 0, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zxwx, 2, 0, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zxwy, 2, 0, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zxwz, 2, 0, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zxww, 2, 0, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zyxx, 2, 1, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zyxy, 2, 1, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zyxz, 2, 1, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zyxw, 2, 1, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zyyx, 2, 1, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zyyy, 2, 1, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zyyz, 2, 1, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zyyw, 2, 1, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zyzx, 2, 1, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zyzy, 2, 1, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zyzz, 2, 1, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zyzw, 2, 1, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zywx, 2, 1, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zywy, 2, 1, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zywz, 2, 1, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zyww, 2, 1, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zzxx, 2, 2, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zzxy, 2, 2, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zzxz, 2, 2, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zzxw, 2, 2, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zzyx, 2, 2, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zzyy, 2, 2, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zzyz, 2, 2, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zzyw, 2, 2, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zzzx, 2, 2, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zzzy, 2, 2, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zzzz, 2, 2, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zzzw, 2, 2, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zzwx, 2, 2, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zzwy, 2, 2, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zzwz, 2, 2, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zzww, 2, 2, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwxx, 2, 3, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwxy, 2, 3, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwxz, 2, 3, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwxw, 2, 3, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwyx, 2, 3, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwyy, 2, 3, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwyz, 2, 3, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwyw, 2, 3, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwzx, 2, 3, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwzy, 2, 3, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwzz, 2, 3, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwzw, 2, 3, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwwx, 2, 3, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwwy, 2, 3, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwwz, 2, 3, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwww, 2, 3, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxxx, 3, 0, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxxy, 3, 0, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxxz, 3, 0, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxxw, 3, 0, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxyx, 3, 0, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxyy, 3, 0, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxyz, 3, 0, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxyw, 3, 0, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxzx, 3, 0, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxzy, 3, 0, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxzz, 3, 0, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxzw, 3, 0, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxwx, 3, 0, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxwy, 3, 0, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxwz, 3, 0, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxww, 3, 0, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyxx, 3, 1, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyxy, 3, 1, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyxz, 3, 1, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyxw, 3, 1, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyyx, 3, 1, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyyy, 3, 1, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyyz, 3, 1, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyyw, 3, 1, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyzx, 3, 1, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyzy, 3, 1, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyzz, 3, 1, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyzw, 3, 1, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wywx, 3, 1, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wywy, 3, 1, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wywz, 3, 1, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyww, 3, 1, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzxx, 3, 2, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzxy, 3, 2, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzxz, 3, 2, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzxw, 3, 2, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzyx, 3, 2, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzyy, 3, 2, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzyz, 3, 2, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzyw, 3, 2, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzzx, 3, 2, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzzy, 3, 2, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzzz, 3, 2, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzzw, 3, 2, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzwx, 3, 2, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzwy, 3, 2, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzwz, 3, 2, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzww, 3, 2, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwxx, 3, 3, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwxy, 3, 3, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwxz, 3, 3, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwxw, 3, 3, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwyx, 3, 3, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwyy, 3, 3, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwyz, 3, 3, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwyw, 3, 3, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwzx, 3, 3, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwzy, 3, 3, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwzz, 3, 3, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwzw, 3, 3, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwwx, 3, 3, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwwy, 3, 3, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwwz, 3, 3, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwww, 3, 3, 3, 3) \ + \ + /* __swizzled_vec__ RGBA_SWIZZLE() const; */ \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rr, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rg, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rb, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ra, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gr, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gg, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gb, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ga, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, br, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bg, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bb, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ba, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ar, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ag, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ab, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aa, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrr, 0, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrg, 0, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrb, 0, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rra, 0, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rgr, 0, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rgg, 0, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rgb, 0, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rga, 0, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbr, 0, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbg, 0, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbb, 0, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rba, 0, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rar, 0, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rag, 0, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rab, 0, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, raa, 0, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grr, 1, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grg, 1, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grb, 1, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gra, 1, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ggr, 1, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ggg, 1, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ggb, 1, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gga, 1, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbr, 1, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbg, 1, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbb, 1, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gba, 1, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gar, 1, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gag, 1, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gab, 1, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gaa, 1, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brr, 2, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brg, 2, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brb, 2, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bra, 2, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bgr, 2, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bgg, 2, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bgb, 2, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bga, 2, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbr, 2, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbg, 2, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbb, 2, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bba, 2, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bar, 2, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bag, 2, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bab, 2, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, baa, 2, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, arr, 3, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, arg, 3, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, arb, 3, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ara, 3, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, agr, 3, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, agg, 3, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, agb, 3, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aga, 3, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abr, 3, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abg, 3, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abb, 3, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aba, 3, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aar, 3, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aag, 3, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aab, 3, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aaa, 3, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrrr, 0, 0, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrrg, 0, 0, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrrb, 0, 0, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrra, 0, 0, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrgr, 0, 0, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrgg, 0, 0, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrgb, 0, 0, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrga, 0, 0, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrbr, 0, 0, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrbg, 0, 0, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrbb, 0, 0, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrba, 0, 0, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrar, 0, 0, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrag, 0, 0, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrab, 0, 0, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rraa, 0, 0, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rgrr, 0, 1, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rgrg, 0, 1, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rgrb, 0, 1, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rgra, 0, 1, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rggr, 0, 1, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rggg, 0, 1, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rggb, 0, 1, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rgga, 0, 1, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rgbr, 0, 1, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rgbg, 0, 1, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rgbb, 0, 1, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rgba, 0, 1, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rgar, 0, 1, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rgag, 0, 1, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rgab, 0, 1, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rgaa, 0, 1, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbrr, 0, 2, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbrg, 0, 2, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbrb, 0, 2, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbra, 0, 2, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbgr, 0, 2, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbgg, 0, 2, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbgb, 0, 2, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbga, 0, 2, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbbr, 0, 2, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbbg, 0, 2, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbbb, 0, 2, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbba, 0, 2, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbar, 0, 2, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbag, 0, 2, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbab, 0, 2, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbaa, 0, 2, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rarr, 0, 3, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rarg, 0, 3, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rarb, 0, 3, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rara, 0, 3, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ragr, 0, 3, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ragg, 0, 3, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ragb, 0, 3, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, raga, 0, 3, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rabr, 0, 3, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rabg, 0, 3, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rabb, 0, 3, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, raba, 0, 3, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, raar, 0, 3, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, raag, 0, 3, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, raab, 0, 3, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, raaa, 0, 3, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grrr, 1, 0, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grrg, 1, 0, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grrb, 1, 0, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grra, 1, 0, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grgr, 1, 0, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grgg, 1, 0, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grgb, 1, 0, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grga, 1, 0, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grbr, 1, 0, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grbg, 1, 0, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grbb, 1, 0, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grba, 1, 0, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grar, 1, 0, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grag, 1, 0, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grab, 1, 0, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, graa, 1, 0, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ggrr, 1, 1, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ggrg, 1, 1, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ggrb, 1, 1, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ggra, 1, 1, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gggr, 1, 1, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gggg, 1, 1, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gggb, 1, 1, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ggga, 1, 1, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ggbr, 1, 1, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ggbg, 1, 1, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ggbb, 1, 1, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ggba, 1, 1, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ggar, 1, 1, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ggag, 1, 1, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ggab, 1, 1, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ggaa, 1, 1, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbrr, 1, 2, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbrg, 1, 2, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbrb, 1, 2, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbra, 1, 2, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbgr, 1, 2, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbgg, 1, 2, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbgb, 1, 2, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbga, 1, 2, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbbr, 1, 2, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbbg, 1, 2, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbbb, 1, 2, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbba, 1, 2, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbar, 1, 2, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbag, 1, 2, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbab, 1, 2, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbaa, 1, 2, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, garr, 1, 3, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, garg, 1, 3, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, garb, 1, 3, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gara, 1, 3, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gagr, 1, 3, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gagg, 1, 3, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gagb, 1, 3, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gaga, 1, 3, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gabr, 1, 3, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gabg, 1, 3, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gabb, 1, 3, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gaba, 1, 3, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gaar, 1, 3, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gaag, 1, 3, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gaab, 1, 3, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gaaa, 1, 3, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brrr, 2, 0, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brrg, 2, 0, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brrb, 2, 0, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brra, 2, 0, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brgr, 2, 0, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brgg, 2, 0, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brgb, 2, 0, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brga, 2, 0, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brbr, 2, 0, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brbg, 2, 0, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brbb, 2, 0, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brba, 2, 0, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brar, 2, 0, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brag, 2, 0, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brab, 2, 0, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, braa, 2, 0, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bgrr, 2, 1, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bgrg, 2, 1, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bgrb, 2, 1, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bgra, 2, 1, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bggr, 2, 1, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bggg, 2, 1, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bggb, 2, 1, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bgga, 2, 1, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bgbr, 2, 1, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bgbg, 2, 1, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bgbb, 2, 1, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bgba, 2, 1, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bgar, 2, 1, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bgag, 2, 1, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bgab, 2, 1, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bgaa, 2, 1, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbrr, 2, 2, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbrg, 2, 2, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbrb, 2, 2, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbra, 2, 2, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbgr, 2, 2, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbgg, 2, 2, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbgb, 2, 2, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbga, 2, 2, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbbr, 2, 2, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbbg, 2, 2, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbbb, 2, 2, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbba, 2, 2, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbar, 2, 2, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbag, 2, 2, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbab, 2, 2, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbaa, 2, 2, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, barr, 2, 3, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, barg, 2, 3, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, barb, 2, 3, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bara, 2, 3, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bagr, 2, 3, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bagg, 2, 3, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bagb, 2, 3, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, baga, 2, 3, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, babr, 2, 3, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, babg, 2, 3, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, babb, 2, 3, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, baba, 2, 3, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, baar, 2, 3, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, baag, 2, 3, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, baab, 2, 3, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, baaa, 2, 3, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, arrr, 3, 0, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, arrg, 3, 0, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, arrb, 3, 0, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, arra, 3, 0, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, argr, 3, 0, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, argg, 3, 0, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, argb, 3, 0, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, arga, 3, 0, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, arbr, 3, 0, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, arbg, 3, 0, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, arbb, 3, 0, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, arba, 3, 0, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, arar, 3, 0, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, arag, 3, 0, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, arab, 3, 0, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, araa, 3, 0, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, agrr, 3, 1, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, agrg, 3, 1, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, agrb, 3, 1, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, agra, 3, 1, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aggr, 3, 1, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aggg, 3, 1, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aggb, 3, 1, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, agga, 3, 1, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, agbr, 3, 1, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, agbg, 3, 1, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, agbb, 3, 1, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, agba, 3, 1, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, agar, 3, 1, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, agag, 3, 1, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, agab, 3, 1, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, agaa, 3, 1, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abrr, 3, 2, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abrg, 3, 2, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abrb, 3, 2, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abra, 3, 2, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abgr, 3, 2, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abgg, 3, 2, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abgb, 3, 2, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abga, 3, 2, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abbr, 3, 2, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abbg, 3, 2, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abbb, 3, 2, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abba, 3, 2, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abar, 3, 2, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abag, 3, 2, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abab, 3, 2, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abaa, 3, 2, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aarr, 3, 3, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aarg, 3, 3, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aarb, 3, 3, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aara, 3, 3, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aagr, 3, 3, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aagg, 3, 3, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aagb, 3, 3, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aaga, 3, 3, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aabr, 3, 3, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aabg, 3, 3, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aabb, 3, 3, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aaba, 3, 3, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aaar, 3, 3, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aaag, 3, 3, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aaab, 3, 3, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aaaa, 3, 3, 3, 3) +#endif + +#define __SYCL_SWIZZLE_MIXIN_ALL_SWIZZLES \ + /* __swizzled_vec__ XYZW_ACCESS() const; */ \ + __SYCL_SWIZLLE_MIXIN_SCALAR_ACCESS(N <= 4, x, 0) \ + __SYCL_SWIZLLE_MIXIN_SCALAR_ACCESS(N == 2 || N == 3 || N == 4, y, 1) \ + __SYCL_SWIZLLE_MIXIN_SCALAR_ACCESS(N == 3 || N == 4, z, 2) \ + __SYCL_SWIZLLE_MIXIN_SCALAR_ACCESS(N == 4, w, 3) \ + \ + /* __swizzled_vec__ RGBA_ACCESS() const; */ \ + __SYCL_SWIZLLE_MIXIN_SCALAR_ACCESS(N == 4, r, 0) \ + __SYCL_SWIZLLE_MIXIN_SCALAR_ACCESS(N == 4, g, 1) \ + __SYCL_SWIZLLE_MIXIN_SCALAR_ACCESS(N == 4, b, 2) \ + __SYCL_SWIZLLE_MIXIN_SCALAR_ACCESS(N == 4, a, 3) \ + \ + /* __swizzled_vec__ INDEX_ACCESS() const; */ \ + __SYCL_SWIZLLE_MIXIN_SCALAR_ACCESS(N > 0, s0, 0) \ + __SYCL_SWIZLLE_MIXIN_SCALAR_ACCESS(N > 1, s1, 1) \ + __SYCL_SWIZLLE_MIXIN_SCALAR_ACCESS(N > 2, s2, 2) \ + __SYCL_SWIZLLE_MIXIN_SCALAR_ACCESS(N > 2, s3, 3) \ + __SYCL_SWIZLLE_MIXIN_SCALAR_ACCESS(N > 4, s4, 4) \ + __SYCL_SWIZLLE_MIXIN_SCALAR_ACCESS(N > 4, s5, 5) \ + __SYCL_SWIZLLE_MIXIN_SCALAR_ACCESS(N > 4, s6, 6) \ + __SYCL_SWIZLLE_MIXIN_SCALAR_ACCESS(N > 4, s7, 7) \ + __SYCL_SWIZLLE_MIXIN_SCALAR_ACCESS(N == 16, s8, 8) \ + __SYCL_SWIZLLE_MIXIN_SCALAR_ACCESS(N == 16, s9, 9) \ + __SYCL_SWIZLLE_MIXIN_SCALAR_ACCESS(N == 16, sA, 10) \ + __SYCL_SWIZLLE_MIXIN_SCALAR_ACCESS(N == 16, sB, 11) \ + __SYCL_SWIZLLE_MIXIN_SCALAR_ACCESS(N == 16, sC, 12) \ + __SYCL_SWIZLLE_MIXIN_SCALAR_ACCESS(N == 16, sD, 13) \ + __SYCL_SWIZLLE_MIXIN_SCALAR_ACCESS(N == 16, sE, 14) \ + __SYCL_SWIZLLE_MIXIN_SCALAR_ACCESS(N == 16, sF, 15) \ + \ + /* __swizzled_vec__ lo()/hi() const; */ \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 2, lo, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3, lo, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, lo, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 8, lo, 0, 1, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 16, lo, 0, 1, 2, 3, 4, 5, 6, 7) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 2, hi, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3, hi, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, hi, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 8, hi, 4, 5, 6, 7) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 16, hi, 8, 9, 10, 11, 12, 13, 14, 15) \ + /* __swizzled_vec__ odd()/even() const; */ \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 2, odd, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3, odd, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, odd, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 8, odd, 1, 3, 5, 7) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 16, odd, 1, 3, 5, 7, 9, 11, 13, 15) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 2, even, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3, even, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, even, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 8, even, 0, 2, 4, 6) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 16, even, 0, 2, 4, 6, 8, 10, 12, 14) \ + /* SYCL_SIMPLE_SWIZZLES */ \ + __SYCL_SWIZZLE_MIXIN_SIMPLE_SWIZZLES + +#define __SYCL_SWIZZLE_MIXIN_METHOD_NON_CONST(COND, NAME, ...) \ + template \ + std::enable_if_t< \ + (COND), decltype(std::declval().template swizzle<__VA_ARGS__>())> \ + NAME() { \ + return static_cast(this)->template swizzle<__VA_ARGS__>(); \ + } + +#define __SYCL_SWIZZLE_MIXIN_METHOD_CONST(COND, NAME, ...) \ + template \ + std::enable_if_t<(COND), decltype(std::declval() \ + .template swizzle<__VA_ARGS__>())> \ + NAME() const { \ + return static_cast(this)->template swizzle<__VA_ARGS__>(); \ + } + +#define __SYCL_SWIZLLE_MIXIN_SCALAR_ACCESS_NON_CONST(COND, NAME, INDEX) \ + template \ + std::enable_if_t<(COND), decltype(std::declval()[0])> NAME() { \ + return (*static_cast(this))[INDEX]; \ + } +#define __SYCL_SWIZLLE_MIXIN_SCALAR_ACCESS_CONST(COND, NAME, INDEX) \ + template \ + std::enable_if_t<(COND), decltype(std::declval()[0])> NAME() \ + const { \ + return (*static_cast(this))[INDEX]; \ + } + +template struct NamedSwizzlesMixinConst { +#define __SYCL_SWIZZLE_MIXIN_METHOD(COND, NAME, ...) \ + __SYCL_SWIZZLE_MIXIN_METHOD_CONST(COND, NAME, __VA_ARGS__) + +#define __SYCL_SWIZLLE_MIXIN_SCALAR_ACCESS(COND, NAME, INDEX) \ + __SYCL_SWIZLLE_MIXIN_SCALAR_ACCESS_CONST(COND, NAME, INDEX) + + __SYCL_SWIZZLE_MIXIN_ALL_SWIZZLES + +#undef __SYCL_SWIZLLE_MIXIN_SCALAR_ACCESS +#undef __SYCL_SWIZZLE_MIXIN_METHOD +}; + +template struct NamedSwizzlesMixinBoth { +#define __SYCL_SWIZZLE_MIXIN_METHOD(COND, NAME, ...) \ + __SYCL_SWIZZLE_MIXIN_METHOD_NON_CONST(COND, NAME, __VA_ARGS__) \ + __SYCL_SWIZZLE_MIXIN_METHOD_CONST(COND, NAME, __VA_ARGS__) + +#define __SYCL_SWIZLLE_MIXIN_SCALAR_ACCESS(COND, NAME, INDEX) \ + __SYCL_SWIZLLE_MIXIN_SCALAR_ACCESS_NON_CONST(COND, NAME, INDEX) \ + __SYCL_SWIZLLE_MIXIN_SCALAR_ACCESS_CONST(COND, NAME, INDEX) + + __SYCL_SWIZZLE_MIXIN_ALL_SWIZZLES + +#undef __SYCL_SWIZLLE_MIXIN_SCALAR_ACCESS +#undef __SYCL_SWIZZLE_MIXIN_METHOD +}; + +#undef __SYCL_SWIZZLE_MIXIN_METHOD_CONST +#undef __SYCL_SWIZZLE_MIXIN_METHOD_NON_CONST + +#undef __SYCL_SWIZZLE_MIXIN_SIMPLE_SWIZZLES + +} // namespace detail +} // namespace _V1 +} // namespace sycl diff --git a/sycl/include/sycl/detail/spirv.hpp b/sycl/include/sycl/detail/spirv.hpp index 90436366a20ea..2e122df0decd6 100644 --- a/sycl/include/sycl/detail/spirv.hpp +++ b/sycl/include/sycl/detail/spirv.hpp @@ -871,11 +871,12 @@ EnableIfNativeShuffle Shuffle(GroupT g, T x, id<1> local_id) { return result; } else if constexpr (ext::oneapi::experimental::is_user_constructed_group_v< GroupT>) { - return __spirv_GroupNonUniformShuffle(group_scope::value, - convertToOpenCLType(x), LocalId); + return convertFromOpenCLTypeFor(__spirv_GroupNonUniformShuffle( + group_scope::value, convertToOpenCLType(x), LocalId)); } else { // Subgroup. - return __spirv_SubgroupShuffleINTEL(convertToOpenCLType(x), LocalId); + return convertFromOpenCLTypeFor( + __spirv_SubgroupShuffleINTEL(convertToOpenCLType(x), LocalId)); } #else if constexpr (ext::oneapi::experimental::is_user_constructed_group_v< @@ -908,12 +909,12 @@ EnableIfNativeShuffle ShuffleXor(GroupT g, T x, id<1> mask) { // general, and simple so we go with that. id<1> TargetLocalId = g.get_local_id() ^ mask; uint32_t TargetId = MapShuffleID(g, TargetLocalId); - return __spirv_GroupNonUniformShuffle(group_scope::value, - convertToOpenCLType(x), TargetId); + return convertFromOpenCLTypeFor(__spirv_GroupNonUniformShuffle( + group_scope::value, convertToOpenCLType(x), TargetId)); } else { // Subgroup. - return __spirv_SubgroupShuffleXorINTEL(convertToOpenCLType(x), - static_cast(mask.get(0))); + return convertFromOpenCLTypeFor(__spirv_SubgroupShuffleXorINTEL( + convertToOpenCLType(x), static_cast(mask.get(0)))); } #else if constexpr (ext::oneapi::experimental::is_user_constructed_group_v< @@ -956,12 +957,12 @@ EnableIfNativeShuffle ShuffleDown(GroupT g, T x, uint32_t delta) { if (TargetLocalId[0] + delta < g.get_local_linear_range()) TargetLocalId[0] += delta; uint32_t TargetId = MapShuffleID(g, TargetLocalId); - return __spirv_GroupNonUniformShuffle(group_scope::value, - convertToOpenCLType(x), TargetId); + return convertFromOpenCLTypeFor(__spirv_GroupNonUniformShuffle( + group_scope::value, convertToOpenCLType(x), TargetId)); } else { // Subgroup. - return __spirv_SubgroupShuffleDownINTEL(convertToOpenCLType(x), - convertToOpenCLType(x), delta); + return convertFromOpenCLTypeFor(__spirv_SubgroupShuffleDownINTEL( + convertToOpenCLType(x), convertToOpenCLType(x), delta)); } #else if constexpr (ext::oneapi::experimental::is_user_constructed_group_v< @@ -1000,12 +1001,12 @@ EnableIfNativeShuffle ShuffleUp(GroupT g, T x, uint32_t delta) { if (TargetLocalId[0] >= delta) TargetLocalId[0] -= delta; uint32_t TargetId = MapShuffleID(g, TargetLocalId); - return __spirv_GroupNonUniformShuffle(group_scope::value, - convertToOpenCLType(x), TargetId); + return convertFromOpenCLTypeFor(__spirv_GroupNonUniformShuffle( + group_scope::value, convertToOpenCLType(x), TargetId)); } else { // Subgroup. - return __spirv_SubgroupShuffleUpINTEL(convertToOpenCLType(x), - convertToOpenCLType(x), delta); + return convertFromOpenCLTypeFor(__spirv_SubgroupShuffleUpINTEL( + convertToOpenCLType(x), convertToOpenCLType(x), delta)); } #else if constexpr (ext::oneapi::experimental::is_user_constructed_group_v< diff --git a/sycl/include/sycl/detail/type_traits.hpp b/sycl/include/sycl/detail/type_traits.hpp index 0a20e51bdeec5..fdb2b49e448ee 100644 --- a/sycl/include/sycl/detail/type_traits.hpp +++ b/sycl/include/sycl/detail/type_traits.hpp @@ -25,9 +25,8 @@ template struct is_fixed_size_group : std::false_type {}; template inline constexpr bool is_fixed_size_group_v = is_fixed_size_group::value; -template class OperationCurrentT, int... Indexes> -class SwizzleOp; +template +class __SYCL_EBO Swizzle; } // namespace detail template class group; @@ -165,11 +164,9 @@ template struct get_elem_type_unqual> { template struct get_elem_type_unqual> { using type = T; }; -template class OperationCurrentT, int... Indexes> -struct get_elem_type_unqual> { - using type = typename get_elem_type_unqual>::type; +template +struct get_elem_type_unqual> { + using type = DataT; }; template struct make_signed { template struct make_signed> { using type = vec, N>; }; -template class OperationCurrentT, int... Indexes> -struct make_signed> { - using type = make_signed_t>; +template +struct make_signed> { + using type = vec, sizeof...(Indexes)>; }; template struct make_signed> { using type = marray, N>; @@ -270,11 +265,9 @@ template struct make_unsigned { template struct make_unsigned> { using type = vec, N>; }; -template class OperationCurrentT, int... Indexes> -struct make_unsigned> { - using type = make_unsigned_t>; +template +struct make_unsigned> { + using type = vec, sizeof...(Indexes)>; }; template struct make_unsigned> { using type = marray, N>; @@ -300,10 +293,9 @@ template struct get_vec_size> { // is_swizzle template struct is_swizzle : std::false_type {}; -template class OperationCurrentT, int... Indexes> -struct is_swizzle> : std::true_type {}; +template +struct is_swizzle> + : std::true_type {}; template constexpr bool is_swizzle_v = is_swizzle::value; diff --git a/sycl/include/sycl/detail/vector_arith.hpp b/sycl/include/sycl/detail/vector_arith.hpp deleted file mode 100644 index bde6ce270afb5..0000000000000 --- a/sycl/include/sycl/detail/vector_arith.hpp +++ /dev/null @@ -1,394 +0,0 @@ -//=== vector_arith.hpp --- Implementation of arithmetic ops on sycl::vec ===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include // for half, cl_char, cl_int -#include // for is_sigeninteger, is_s... -#include // for is_contained -#include // for is_floating_point - -#include // bfloat16 - -#include -#include // for enable_if_t, is_same - -namespace sycl { -inline namespace _V1 { - -template class __SYCL_EBO vec; - -namespace detail { - -template class VecAccess; - -// Macros to populate binary operation on sycl::vec. -#if defined(__SYCL_BINOP) || defined(BINOP_BASE) -#error "Undefine __SYCL_BINOP and BINOP_BASE macro" -#endif - -#ifdef __SYCL_DEVICE_ONLY__ -#define BINOP_BASE(BINOP, OPASSIGN, CONVERT, COND) \ - template \ - friend std::enable_if_t<(COND), vec_t> operator BINOP(const vec_t & Lhs, \ - const vec_t & Rhs) { \ - vec_t Ret; \ - if constexpr (vec_t::IsBfloat16) { \ - for (size_t I = 0; I < NumElements; ++I) { \ - Ret[I] = Lhs[I] BINOP Rhs[I]; \ - } \ - } else { \ - auto ExtVecLhs = sycl::bit_cast(Lhs); \ - auto ExtVecRhs = sycl::bit_cast(Rhs); \ - Ret = vec(ExtVecLhs BINOP ExtVecRhs); \ - if constexpr (std::is_same_v && CONVERT) { \ - vec_arith_common::ConvertToDataT(Ret); \ - } \ - } \ - return Ret; \ - } -#else // __SYCL_DEVICE_ONLY__ - -#define BINOP_BASE(BINOP, OPASSIGN, CONVERT, COND) \ - template \ - friend std::enable_if_t<(COND), vec_t> operator BINOP(const vec_t & Lhs, \ - const vec_t & Rhs) { \ - vec_t Ret{}; \ - for (size_t I = 0; I < NumElements; ++I) { \ - Ret[I] = Lhs[I] BINOP Rhs[I]; \ - } \ - return Ret; \ - } -#endif // __SYCL_DEVICE_ONLY__ - -#define __SYCL_BINOP(BINOP, OPASSIGN, CONVERT, COND) \ - BINOP_BASE(BINOP, OPASSIGN, CONVERT, COND) \ - \ - template \ - friend std::enable_if_t<(COND), vec_t> operator BINOP(const vec_t & Lhs, \ - const DataT & Rhs) { \ - return Lhs BINOP vec_t(Rhs); \ - } \ - template \ - friend std::enable_if_t<(COND), vec_t> operator BINOP(const DataT & Lhs, \ - const vec_t & Rhs) { \ - return vec_t(Lhs) BINOP Rhs; \ - } \ - template \ - friend std::enable_if_t<(COND), vec_t> &operator OPASSIGN( \ - vec_t & Lhs, const vec_t & Rhs) { \ - Lhs = Lhs BINOP Rhs; \ - return Lhs; \ - } \ - template \ - friend std::enable_if_t<(Num != 1) && (COND), vec_t &> operator OPASSIGN( \ - vec_t & Lhs, const DataT & Rhs) { \ - Lhs = Lhs BINOP vec_t(Rhs); \ - return Lhs; \ - } - -/**************************************************************** - * vec_arith_common - * / | \ - * / | \ - * vec_arith vec_arith ... vec_arith - * \ | / - * \ | / - * sycl::vec - * - * vec_arith_common is the base class for vec_arith. It contains - * the common math operators of sycl::vec for all types. - * vec_arith is the derived class that contains the math operators - * specialized for certain types. sycl::vec inherits from vec_arith. - * *************************************************************/ -template class vec_arith_common; -template struct vec_helper; - -template -class vec_arith : public vec_arith_common { -protected: - using vec_t = vec; - using ocl_t = detail::select_cl_scalar_integral_signed_t; - template using vec_data = vec_helper; - - // operator!. - friend vec operator!(const vec_t &Rhs) { -#ifdef __SYCL_DEVICE_ONLY__ - if constexpr (!vec_t::IsBfloat16) { - auto extVec = sycl::bit_cast(Rhs); - vec Ret{ - (typename vec::vector_t) !extVec}; - return Ret; - } else -#endif // __SYCL_DEVICE_ONLY__ - { - vec Ret{}; - for (size_t I = 0; I < NumElements; ++I) { - // static_cast will work here as the output of ! operator is either 0 or - // -1. - Ret[I] = static_cast(-1 * (!Rhs[I])); - } - return Ret; - } - } - - // operator +. - friend vec_t operator+(const vec_t &Lhs) { -#ifdef __SYCL_DEVICE_ONLY__ - auto extVec = sycl::bit_cast(Lhs); - return vec_t{+extVec}; -#else - vec_t Ret{}; - for (size_t I = 0; I < NumElements; ++I) - Ret[I] = +Lhs[I]; - return Ret; -#endif - } - - // operator -. - friend vec_t operator-(const vec_t &Lhs) { - vec_t Ret{}; - if constexpr (vec_t::IsBfloat16) { - for (size_t I = 0; I < NumElements; I++) - Ret[I] = -Lhs[I]; - } else { -#ifndef __SYCL_DEVICE_ONLY__ - for (size_t I = 0; I < NumElements; ++I) - Ret[I] = -Lhs[I]; -#else - auto extVec = sycl::bit_cast(Lhs); - Ret = vec_t{-extVec}; - if constexpr (std::is_same_v) { - vec_arith_common::ConvertToDataT(Ret); - } -#endif - } - return Ret; - } - -// Unary operations on sycl::vec -// FIXME: Don't allow Unary operators on vec after -// https://github.com/KhronosGroup/SYCL-CTS/issues/896 gets fixed. -#ifdef __SYCL_UOP -#error "Undefine __SYCL_UOP macro" -#endif -#define __SYCL_UOP(UOP, OPASSIGN) \ - friend vec_t &operator UOP(vec_t & Rhs) { \ - Rhs OPASSIGN DataT{1}; \ - return Rhs; \ - } \ - friend vec_t operator UOP(vec_t &Lhs, int) { \ - vec_t Ret(Lhs); \ - Lhs OPASSIGN DataT{1}; \ - return Ret; \ - } - - __SYCL_UOP(++, +=) - __SYCL_UOP(--, -=) -#undef __SYCL_UOP - - // The logical operations on scalar types results in 0/1, while for vec<>, - // logical operations should result in 0 and -1 (similar to OpenCL vectors). - // That's why, for vec, we need to invert the result of the logical - // operations since we store vec as scalar type on the device. -#if defined(__SYCL_RELLOGOP) || defined(RELLOGOP_BASE) -#error "Undefine __SYCL_RELLOGOP and RELLOGOP_BASE macro." -#endif - -#ifdef __SYCL_DEVICE_ONLY__ -#define RELLOGOP_BASE(RELLOGOP, COND) \ - template \ - friend std::enable_if_t<(COND), vec> operator RELLOGOP( \ - const vec_t & Lhs, const vec_t & Rhs) { \ - vec Ret{}; \ - /* ext_vector_type does not support bfloat16, so for these */ \ - /* we do element-by-element operation on the underlying std::array. */ \ - if constexpr (vec_t::IsBfloat16) { \ - for (size_t I = 0; I < NumElements; ++I) { \ - Ret[I] = static_cast(-(Lhs[I] RELLOGOP Rhs[I])); \ - } \ - } else { \ - auto ExtVecLhs = sycl::bit_cast(Lhs); \ - auto ExtVecRhs = sycl::bit_cast(Rhs); \ - /* Cast required to convert unsigned char ext_vec_type to */ \ - /* char ext_vec_type. */ \ - Ret = vec( \ - (typename vec::vector_t)( \ - ExtVecLhs RELLOGOP ExtVecRhs)); \ - /* For NumElements == 1, we use scalar instead of ext_vector_type. */ \ - if constexpr (NumElements == 1) { \ - Ret *= -1; \ - } \ - } \ - return Ret; \ - } -#else // __SYCL_DEVICE_ONLY__ -#define RELLOGOP_BASE(RELLOGOP, COND) \ - template \ - friend std::enable_if_t<(COND), vec> operator RELLOGOP( \ - const vec_t & Lhs, const vec_t & Rhs) { \ - vec Ret{}; \ - for (size_t I = 0; I < NumElements; ++I) { \ - Ret[I] = static_cast(-(Lhs[I] RELLOGOP Rhs[I])); \ - } \ - return Ret; \ - } -#endif - -#define __SYCL_RELLOGOP(RELLOGOP, COND) \ - RELLOGOP_BASE(RELLOGOP, COND) \ - \ - template \ - friend std::enable_if_t<(COND), vec> operator RELLOGOP( \ - const vec_t & Lhs, const DataT & Rhs) { \ - return Lhs RELLOGOP vec_t(Rhs); \ - } \ - template \ - friend std::enable_if_t<(COND), vec> operator RELLOGOP( \ - const DataT & Lhs, const vec_t & Rhs) { \ - return vec_t(Lhs) RELLOGOP Rhs; \ - } - - // OP is: ==, !=, <, >, <=, >=, &&, || - // vec operatorOP(const vec &Rhs) const; - // vec operatorOP(const DataT &Rhs) const; - __SYCL_RELLOGOP(==, true) - __SYCL_RELLOGOP(!=, true) - __SYCL_RELLOGOP(>, true) - __SYCL_RELLOGOP(<, true) - __SYCL_RELLOGOP(>=, true) - __SYCL_RELLOGOP(<=, true) - - // Only available to integral types. - __SYCL_RELLOGOP(&&, (!detail::is_vgenfloat_v)) - __SYCL_RELLOGOP(||, (!detail::is_vgenfloat_v)) -#undef __SYCL_RELLOGOP -#undef RELLOGOP_BASE - - // Binary operations on sycl::vec<> for all types except std::byte. - __SYCL_BINOP(+, +=, true, true) - __SYCL_BINOP(-, -=, true, true) - __SYCL_BINOP(*, *=, false, true) - __SYCL_BINOP(/, /=, false, true) - - // The following OPs are available only when: DataT != cl_float && - // DataT != cl_double && DataT != cl_half && DataT != BF16. - __SYCL_BINOP(%, %=, false, (!detail::is_vgenfloat_v)) - // Bitwise operations are allowed for std::byte. - __SYCL_BINOP(|, |=, false, (!detail::is_vgenfloat_v)) - __SYCL_BINOP(&, &=, false, (!detail::is_vgenfloat_v)) - __SYCL_BINOP(^, ^=, false, (!detail::is_vgenfloat_v)) - __SYCL_BINOP(>>, >>=, false, (!detail::is_vgenfloat_v)) - __SYCL_BINOP(<<, <<=, true, (!detail::is_vgenfloat_v)) - - // friends - template friend class __SYCL_EBO vec; -}; // class vec_arith<> - -#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) -template -class vec_arith - : public vec_arith_common { -protected: - // NumElements can never be zero. Still using the redundant check to avoid - // incomplete type errors. - using DataT = typename std::conditional_t; - using vec_t = vec; - template using vec_data = vec_helper; - - // Special <<, >> operators for std::byte. - // std::byte is not an arithmetic type and it only supports the following - // overloads of >> and << operators. - // - // 1 template - // constexpr std::byte operator<<( std::byte b, IntegerType shift ) - // noexcept; - friend vec_t operator<<(const vec_t &Lhs, int shift) { - vec_t Ret; - for (size_t I = 0; I < NumElements; ++I) { - Ret[I] = Lhs[I] << shift; - } - return Ret; - } - friend vec_t &operator<<=(vec_t &Lhs, int shift) { - Lhs = Lhs << shift; - return Lhs; - } - - // 2 template - // constexpr std::byte operator>>( std::byte b, IntegerType shift ) - // noexcept; - friend vec_t operator>>(const vec_t &Lhs, int shift) { - vec_t Ret; - for (size_t I = 0; I < NumElements; ++I) { - Ret[I] = Lhs[I] >> shift; - } - return Ret; - } - friend vec_t &operator>>=(vec_t &Lhs, int shift) { - Lhs = Lhs >> shift; - return Lhs; - } - - __SYCL_BINOP(|, |=, false, true) - __SYCL_BINOP(&, &=, false, true) - __SYCL_BINOP(^, ^=, false, true) - - // friends - template friend class __SYCL_EBO vec; -}; -#endif // (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) - -template class vec_arith_common { -protected: - using vec_t = vec; - - static constexpr bool IsBfloat16 = - std::is_same_v; - - // operator~() available only when: dataT != float && dataT != double - // && dataT != half - template - friend std::enable_if_t, vec_t> - operator~(const vec_t &Rhs) { -#ifdef __SYCL_DEVICE_ONLY__ - auto extVec = sycl::bit_cast(Rhs); - vec_t Ret{~extVec}; - if constexpr (std::is_same_v) { - ConvertToDataT(Ret); - } - return Ret; -#else - vec_t Ret{}; - for (size_t I = 0; I < NumElements; ++I) { - Ret[I] = ~Rhs[I]; - } - return Ret; -#endif - } - -#ifdef __SYCL_DEVICE_ONLY__ - using vec_bool_t = vec; - // Required only for std::bool. - static void ConvertToDataT(vec_bool_t &Ret) { - for (size_t I = 0; I < NumElements; ++I) { - Ret[I] = bit_cast(Ret[I]) != 0; - } - } -#endif - - // friends - template friend class __SYCL_EBO vec; -}; - -#undef __SYCL_BINOP -#undef BINOP_BASE - -} // namespace detail -} // namespace _V1 -} // namespace sycl diff --git a/sycl/include/sycl/detail/vector_convert.hpp b/sycl/include/sycl/detail/vector_convert.hpp index c4bd584be80da..2feedb19e3698 100644 --- a/sycl/include/sycl/detail/vector_convert.hpp +++ b/sycl/include/sycl/detail/vector_convert.hpp @@ -885,6 +885,24 @@ using ConvertBoolAndByteT = template template vec vec::convert() const { + auto getValue = [this](int Index) { + using RetType = + typename std::conditional_t, int8_t, +#ifdef __SYCL_DEVICE_ONLY__ + detail::element_type_for_vector_t +#else + DataT +#endif + >; + +#ifdef __SYCL_DEVICE_ONLY__ + if constexpr (std::is_same_v) + return sycl::bit_cast(m_Data[Index]); + else +#endif + return static_cast(m_Data[Index]); + }; + using T = detail::ConvertBoolAndByteT; using R = detail::ConvertBoolAndByteT; using bfloat16 = sycl::ext::oneapi::bfloat16; diff --git a/sycl/include/sycl/ext/oneapi/dot_product.hpp b/sycl/include/sycl/ext/oneapi/dot_product.hpp index 4fda07052e25a..6f6a8e20e4d55 100644 --- a/sycl/include/sycl/ext/oneapi/dot_product.hpp +++ b/sycl/include/sycl/ext/oneapi/dot_product.hpp @@ -54,24 +54,30 @@ int32_t dot_acc(uint32_t pa, int32_t pb, int32_t c) { c; } +namespace detail { +template +int32_t dot_acc(vec a, vec b, int32_t c) { + int32_t res = c; + for (int i = 0; i < 4; ++i) + res += a[i] * b[i]; + return res; +} +} // namespace detail + int32_t dot_acc(vec a, vec b, int32_t c) { - return a.s0() * b.s0() + a.s1() * b.s1() + a.s2() * b.s2() + a.s3() * b.s3() + - c; + return detail::dot_acc(a, b, c); } int32_t dot_acc(vec a, vec b, int32_t c) { - return a.s0() * b.s0() + a.s1() * b.s1() + a.s2() * b.s2() + a.s3() * b.s3() + - c; + return detail::dot_acc(a, b, c); } int32_t dot_acc(vec a, vec b, int32_t c) { - return a.s0() * b.s0() + a.s1() * b.s1() + a.s2() * b.s2() + a.s3() * b.s3() + - c; + return detail::dot_acc(a, b, c); } int32_t dot_acc(vec a, vec b, int32_t c) { - return a.s0() * b.s0() + a.s1() * b.s1() + a.s2() * b.s2() + a.s3() * b.s3() + - c; + return detail::dot_acc(a, b, c); } } // namespace ext::oneapi diff --git a/sycl/include/sycl/half_type.hpp b/sycl/include/sycl/half_type.hpp index 5cf4a4023afed..06f0b38bb07fc 100644 --- a/sycl/include/sycl/half_type.hpp +++ b/sycl/include/sycl/half_type.hpp @@ -8,13 +8,17 @@ #pragma once +#if defined(__SYCL_VEC_STANDALONE) +#include +// use sycl::bit_cast from vector.hpp +#else #include // for bit_cast -#include // for __SYCL_EXPORT #include // for istream, ostream #ifdef __SYCL_DEVICE_ONLY__ #include #endif +#endif #include // for size_t #include // for uint16_t, uint32_t, uint8_t @@ -182,7 +186,7 @@ struct RawHostHalfToken { uint16_t Value; }; -#ifndef __SYCL_DEVICE_ONLY__ +#if !defined(__SYCL_DEVICE_ONLY__) || defined(__SYCL_VEC_STANDALONE) class half { #else class [[__sycl_detail__::__uses_aspects__(aspect::fp16)]] half { diff --git a/sycl/include/sycl/stream.hpp b/sycl/include/sycl/stream.hpp index f3c70098a3b18..ec7fb8c7aad52 100644 --- a/sycl/include/sycl/stream.hpp +++ b/sycl/include/sycl/stream.hpp @@ -30,7 +30,7 @@ #include // for property_list #include // for range #include // for multi_ptr -#include // for vec, SwizzleOp +#include // for vec, Swizzle #include // for size_t, byte #include // for hash, shared_ptr @@ -748,21 +748,18 @@ inline void writeHItem(GlobalBufAccessorT &GlobalFlushBuf, write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len); } -template struct IsSwizzleOp : std::false_type {}; +template struct IsSwizzle : std::false_type {}; -template class OperationCurrentT, int... Indexes> -struct IsSwizzleOp> +template +struct IsSwizzle> : std::true_type { - using T = typename VecT::element_type; + using T = DataT; using Type = typename sycl::vec; }; template using EnableIfSwizzleVec = - typename std::enable_if_t::value, - typename IsSwizzleOp::Type>; + typename std::enable_if_t::value, typename IsSwizzle::Type>; } // namespace detail diff --git a/sycl/include/sycl/sub_group.hpp b/sycl/include/sycl/sub_group.hpp index ad94f12108bce..139a7fb91b34a 100644 --- a/sycl/include/sycl/sub_group.hpp +++ b/sycl/include/sycl/sub_group.hpp @@ -81,7 +81,7 @@ vec load(const multi_ptr src) { using VecT = sycl::detail::ConvertToOpenCLType_t>; VecT Ret = __spirv_SubgroupBlockReadINTEL(convertToBlockPtr(src)); - return sycl::bit_cast::vector_t>(Ret); + return vec{sycl::bit_cast::vector_t>(Ret)}; } template and once for SwizzleOp<>. - -// TODO: exclude L-Value swizzle like vec.xxxx() - -#if defined(__SYCL_ACCESS) || defined(__SYCL_INDEXER) || \ - defined(__SYCL_EXPAND) || defined(__SYCL_NTH_ARG) || \ - defined(__SYCL_E0) || defined(__SYCL_E1) || defined(__SYCL_E2) || \ - defined(__SYCL_E3) || defined(__SYCL_E4) || defined(__SYCL_E5) || \ - defined(__SYCL_E6) || defined(__SYCL_E7) || defined(__SYCL_E8) -#error "Undefine __SYCL_{ACCESS, INDEXER, EXPAND, NTH_ARG, E[0-8]} macros." -#endif - -#define __SYCL_INDEXER(_X) Indexer<_X>::value - -// Accepts any number of args >= _N, but expands to just the _N-th one. -// Make N equal to the max number of args handled + 1. Here, N == 9. -#define __SYCL_NTH_ARG(_1, _2, _3, _4, _5, _6, _7, _8, _N, ...) _N - -// Some compilers for such defines as '#define M1(_A, ...) M2(__VA_ARGS__)' and -// expression M1(1, 2, 3, 4) may pass (2, 3, 4) to M2 as one argument, -// not as three. To avoid that the following macro is needed. -// So, defining M1 as '#define M1(_A, ...) __SYCL_EXPAND( M2(__VA_ARGS__) )' -// solves the problem. -#define __SYCL_EXPAND(_X) _X - -// These are helper macros to create overrides based on the arity. -#define __SYCL_E0(_M, ...) -#define __SYCL_E1(_M, _X) _M(_X) -#define __SYCL_E2(_M, _X, ...) _M(_X), __SYCL_EXPAND(__SYCL_E1(_M, __VA_ARGS__)) -#define __SYCL_E3(_M, _X, ...) _M(_X), __SYCL_EXPAND(__SYCL_E2(_M, __VA_ARGS__)) -#define __SYCL_E4(_M, _X, ...) _M(_X), __SYCL_EXPAND(__SYCL_E3(_M, __VA_ARGS__)) -#define __SYCL_E5(_M, _X, ...) _M(_X), __SYCL_EXPAND(__SYCL_E4(_M, __VA_ARGS__)) -#define __SYCL_E6(_M, _X, ...) _M(_X), __SYCL_EXPAND(__SYCL_E5(_M, __VA_ARGS__)) -#define __SYCL_E7(_M, _X, ...) _M(_X), __SYCL_EXPAND(__SYCL_E6(_M, __VA_ARGS__)) -#define __SYCL_E8(_M, _X, ...) _M(_X), __SYCL_EXPAND(__SYCL_E7(_M, __VA_ARGS__)) - -// Creates template functions with required number of parameters. -// Each of the parameters X from __VA_ARGS__ is transformed into -// __SYCL_INDEXER(X). -// Currently the max number of parameters handled by this macro is 8. -#define __SYCL_ACCESS(_COND, _NAME, ...) \ - template \ - typename std::enable_if<(_COND), Swizzle< \ - __SYCL_EXPAND(__SYCL_NTH_ARG(__VA_ARGS__, \ - __SYCL_E8, __SYCL_E7, __SYCL_E6, __SYCL_E5, __SYCL_E4, \ - __SYCL_E3, __SYCL_E2, __SYCL_E1, __SYCL_E0) \ - (__SYCL_INDEXER, __VA_ARGS__))>>::type _NAME() { \ - return __SYCL_ACCESS_RETURN; \ - } \ - template \ - typename std::enable_if<(_COND), ConstSwizzle< \ - __SYCL_EXPAND(__SYCL_NTH_ARG(__VA_ARGS__, \ - __SYCL_E8, __SYCL_E7, __SYCL_E6, __SYCL_E5, __SYCL_E4, \ - __SYCL_E3, __SYCL_E2, __SYCL_E1, __SYCL_E0) \ - (__SYCL_INDEXER, __VA_ARGS__))>>::type _NAME() const { \ - return __SYCL_ACCESS_RETURN; \ - } - -#define __SYCL_SCALAR_ACCESS(_COND, _NAME, _INDEX) \ - template \ - typename std::enable_if<(_COND), DataT &>::type _NAME() { \ - return (*__SYCL_ACCESS_RETURN)[_INDEX]; \ - } \ - template \ - typename std::enable_if<(_COND), const DataT &>::type _NAME() const { \ - return (*__SYCL_ACCESS_RETURN)[_INDEX]; \ - } - -//__swizzled_vec__ XYZW_ACCESS() const; -__SYCL_SCALAR_ACCESS(N <= 4, x, 0) -__SYCL_SCALAR_ACCESS(N == 2 || N == 3 || N == 4, y, 1) -__SYCL_SCALAR_ACCESS(N == 3 || N == 4, z, 2) -__SYCL_SCALAR_ACCESS(N == 4, w, 3) - -//__swizzled_vec__ RGBA_ACCESS() const; -__SYCL_SCALAR_ACCESS(N == 4, r, 0) -__SYCL_SCALAR_ACCESS(N == 4, g, 1) -__SYCL_SCALAR_ACCESS(N == 4, b, 2) -__SYCL_SCALAR_ACCESS(N == 4, a, 3) - -//__swizzled_vec__ INDEX_ACCESS() const; -__SYCL_SCALAR_ACCESS(N > 0, s0, 0) -__SYCL_SCALAR_ACCESS(N > 1, s1, 1) -__SYCL_SCALAR_ACCESS(N > 2, s2, 2) -__SYCL_SCALAR_ACCESS(N > 2, s3, 3) -__SYCL_SCALAR_ACCESS(N > 4, s4, 4) -__SYCL_SCALAR_ACCESS(N > 4, s5, 5) -__SYCL_SCALAR_ACCESS(N > 4, s6, 6) -__SYCL_SCALAR_ACCESS(N > 4, s7, 7) -__SYCL_SCALAR_ACCESS(N == 16, s8, 8) -__SYCL_SCALAR_ACCESS(N == 16, s9, 9) -__SYCL_SCALAR_ACCESS(N == 16, sA, 10) -__SYCL_SCALAR_ACCESS(N == 16, sB, 11) -__SYCL_SCALAR_ACCESS(N == 16, sC, 12) -__SYCL_SCALAR_ACCESS(N == 16, sD, 13) -__SYCL_SCALAR_ACCESS(N == 16, sE, 14) -__SYCL_SCALAR_ACCESS(N == 16, sF, 15) - -#ifdef SYCL_SIMPLE_SWIZZLES -//__swizzled_vec__ XYZW_SWIZZLE() const; -__SYCL_ACCESS(N <= 4, xx, 0, 0) -__SYCL_ACCESS(2 <= N && N <= 4, xy, 0, 1) -__SYCL_ACCESS(N == 3 || N == 4, xz, 0, 2) -__SYCL_ACCESS(N == 4, xw, 0, 3) -__SYCL_ACCESS(2 <= N && N <= 4, yx, 1, 0) -__SYCL_ACCESS(2 <= N && N <= 4, yy, 1, 1) -__SYCL_ACCESS(N == 3 || N == 4, yz, 1, 2) -__SYCL_ACCESS(N == 4, yw, 1, 3) -__SYCL_ACCESS(N == 3 || N == 4, zx, 2, 0) -__SYCL_ACCESS(N == 3 || N == 4, zy, 2, 1) -__SYCL_ACCESS(N == 3 || N == 4, zz, 2, 2) -__SYCL_ACCESS(N == 4, zw, 2, 3) -__SYCL_ACCESS(N == 4, wx, 3, 0) -__SYCL_ACCESS(N == 4, wy, 3, 1) -__SYCL_ACCESS(N == 4, wz, 3, 2) -__SYCL_ACCESS(N == 4, ww, 3, 3) -__SYCL_ACCESS(N <= 4, xxx, 0, 0, 0) -__SYCL_ACCESS(2 <= N && N <= 4, xxy, 0, 0, 1) -__SYCL_ACCESS(N == 3 || N == 4, xxz, 0, 0, 2) -__SYCL_ACCESS(N == 4, xxw, 0, 0, 3) -__SYCL_ACCESS(2 <= N && N <= 4, xyx, 0, 1, 0) -__SYCL_ACCESS(2 <= N && N <= 4, xyy, 0, 1, 1) -__SYCL_ACCESS(N == 3 || N == 4, xyz, 0, 1, 2) -__SYCL_ACCESS(N == 4, xyw, 0, 1, 3) -__SYCL_ACCESS(N == 3 || N == 4, xzx, 0, 2, 0) -__SYCL_ACCESS(N == 3 || N == 4, xzy, 0, 2, 1) -__SYCL_ACCESS(N == 3 || N == 4, xzz, 0, 2, 2) -__SYCL_ACCESS(N == 4, xzw, 0, 2, 3) -__SYCL_ACCESS(N == 4, xwx, 0, 3, 0) -__SYCL_ACCESS(N == 4, xwy, 0, 3, 1) -__SYCL_ACCESS(N == 4, xwz, 0, 3, 2) -__SYCL_ACCESS(N == 4, xww, 0, 3, 3) -__SYCL_ACCESS(2 <= N && N <= 4, yxx, 1, 0, 0) -__SYCL_ACCESS(2 <= N && N <= 4, yxy, 1, 0, 1) -__SYCL_ACCESS(N == 3 || N == 4, yxz, 1, 0, 2) -__SYCL_ACCESS(N == 4, yxw, 1, 0, 3) -__SYCL_ACCESS(2 <= N && N <= 4, yyx, 1, 1, 0) -__SYCL_ACCESS(2 <= N && N <= 4, yyy, 1, 1, 1) -__SYCL_ACCESS(N == 3 || N == 4, yyz, 1, 1, 2) -__SYCL_ACCESS(N == 4, yyw, 1, 1, 3) -__SYCL_ACCESS(N == 3 || N == 4, yzx, 1, 2, 0) -__SYCL_ACCESS(N == 3 || N == 4, yzy, 1, 2, 1) -__SYCL_ACCESS(N == 3 || N == 4, yzz, 1, 2, 2) -__SYCL_ACCESS(N == 4, yzw, 1, 2, 3) -__SYCL_ACCESS(N == 4, ywx, 1, 3, 0) -__SYCL_ACCESS(N == 4, ywy, 1, 3, 1) -__SYCL_ACCESS(N == 4, ywz, 1, 3, 2) -__SYCL_ACCESS(N == 4, yww, 1, 3, 3) -__SYCL_ACCESS(N == 3 || N == 4, zxx, 2, 0, 0) -__SYCL_ACCESS(N == 3 || N == 4, zxy, 2, 0, 1) -__SYCL_ACCESS(N == 3 || N == 4, zxz, 2, 0, 2) -__SYCL_ACCESS(N == 4, zxw, 2, 0, 3) -__SYCL_ACCESS(N == 3 || N == 4, zyx, 2, 1, 0) -__SYCL_ACCESS(N == 3 || N == 4, zyy, 2, 1, 1) -__SYCL_ACCESS(N == 3 || N == 4, zyz, 2, 1, 2) -__SYCL_ACCESS(N == 4, zyw, 2, 1, 3) -__SYCL_ACCESS(N == 3 || N == 4, zzx, 2, 2, 0) -__SYCL_ACCESS(N == 3 || N == 4, zzy, 2, 2, 1) -__SYCL_ACCESS(N == 3 || N == 4, zzz, 2, 2, 2) -__SYCL_ACCESS(N == 4, zzw, 2, 2, 3) -__SYCL_ACCESS(N == 4, zwx, 2, 3, 0) -__SYCL_ACCESS(N == 4, zwy, 2, 3, 1) -__SYCL_ACCESS(N == 4, zwz, 2, 3, 2) -__SYCL_ACCESS(N == 4, zww, 2, 3, 3) -__SYCL_ACCESS(N == 4, wxx, 3, 0, 0) -__SYCL_ACCESS(N == 4, wxy, 3, 0, 1) -__SYCL_ACCESS(N == 4, wxz, 3, 0, 2) -__SYCL_ACCESS(N == 4, wxw, 3, 0, 3) -__SYCL_ACCESS(N == 4, wyx, 3, 1, 0) -__SYCL_ACCESS(N == 4, wyy, 3, 1, 1) -__SYCL_ACCESS(N == 4, wyz, 3, 1, 2) -__SYCL_ACCESS(N == 4, wyw, 3, 1, 3) -__SYCL_ACCESS(N == 4, wzx, 3, 2, 0) -__SYCL_ACCESS(N == 4, wzy, 3, 2, 1) -__SYCL_ACCESS(N == 4, wzz, 3, 2, 2) -__SYCL_ACCESS(N == 4, wzw, 3, 2, 3) -__SYCL_ACCESS(N == 4, wwx, 3, 3, 0) -__SYCL_ACCESS(N == 4, wwy, 3, 3, 1) -__SYCL_ACCESS(N == 4, wwz, 3, 3, 2) -__SYCL_ACCESS(N == 4, www, 3, 3, 3) -__SYCL_ACCESS(N <= 4, xxxx, 0, 0, 0, 0) -__SYCL_ACCESS(2 <= N && N <= 4, xxxy, 0, 0, 0, 1) -__SYCL_ACCESS(N == 3 || N == 4, xxxz, 0, 0, 0, 2) -__SYCL_ACCESS(N == 4, xxxw, 0, 0, 0, 3) -__SYCL_ACCESS(2 <= N && N <= 4, xxyx, 0, 0, 1, 0) -__SYCL_ACCESS(2 <= N && N <= 4, xxyy, 0, 0, 1, 1) -__SYCL_ACCESS(N == 3 || N == 4, xxyz, 0, 0, 1, 2) -__SYCL_ACCESS(N == 4, xxyw, 0, 0, 1, 3) -__SYCL_ACCESS(N == 3 || N == 4, xxzx, 0, 0, 2, 0) -__SYCL_ACCESS(N == 3 || N == 4, xxzy, 0, 0, 2, 1) -__SYCL_ACCESS(N == 3 || N == 4, xxzz, 0, 0, 2, 2) -__SYCL_ACCESS(N == 4, xxzw, 0, 0, 2, 3) -__SYCL_ACCESS(N == 4, xxwx, 0, 0, 3, 0) -__SYCL_ACCESS(N == 4, xxwy, 0, 0, 3, 1) -__SYCL_ACCESS(N == 4, xxwz, 0, 0, 3, 2) -__SYCL_ACCESS(N == 4, xxww, 0, 0, 3, 3) -__SYCL_ACCESS(2 <= N && N <= 4, xyxx, 0, 1, 0, 0) -__SYCL_ACCESS(2 <= N && N <= 4, xyxy, 0, 1, 0, 1) -__SYCL_ACCESS(N == 3 || N == 4, xyxz, 0, 1, 0, 2) -__SYCL_ACCESS(N == 4, xyxw, 0, 1, 0, 3) -__SYCL_ACCESS(2 <= N && N <= 4, xyyx, 0, 1, 1, 0) -__SYCL_ACCESS(2 <= N && N <= 4, xyyy, 0, 1, 1, 1) -__SYCL_ACCESS(N == 3 || N == 4, xyyz, 0, 1, 1, 2) -__SYCL_ACCESS(N == 4, xyyw, 0, 1, 1, 3) -__SYCL_ACCESS(N == 3 || N == 4, xyzx, 0, 1, 2, 0) -__SYCL_ACCESS(N == 3 || N == 4, xyzy, 0, 1, 2, 1) -__SYCL_ACCESS(N == 3 || N == 4, xyzz, 0, 1, 2, 2) -__SYCL_ACCESS(N == 4, xyzw, 0, 1, 2, 3) -__SYCL_ACCESS(N == 4, xywx, 0, 1, 3, 0) -__SYCL_ACCESS(N == 4, xywy, 0, 1, 3, 1) -__SYCL_ACCESS(N == 4, xywz, 0, 1, 3, 2) -__SYCL_ACCESS(N == 4, xyww, 0, 1, 3, 3) -__SYCL_ACCESS(N == 3 || N == 4, xzxx, 0, 2, 0, 0) -__SYCL_ACCESS(N == 3 || N == 4, xzxy, 0, 2, 0, 1) -__SYCL_ACCESS(N == 3 || N == 4, xzxz, 0, 2, 0, 2) -__SYCL_ACCESS(N == 4, xzxw, 0, 2, 0, 3) -__SYCL_ACCESS(N == 3 || N == 4, xzyx, 0, 2, 1, 0) -__SYCL_ACCESS(N == 3 || N == 4, xzyy, 0, 2, 1, 1) -__SYCL_ACCESS(N == 3 || N == 4, xzyz, 0, 2, 1, 2) -__SYCL_ACCESS(N == 4, xzyw, 0, 2, 1, 3) -__SYCL_ACCESS(N == 3 || N == 4, xzzx, 0, 2, 2, 0) -__SYCL_ACCESS(N == 3 || N == 4, xzzy, 0, 2, 2, 1) -__SYCL_ACCESS(N == 3 || N == 4, xzzz, 0, 2, 2, 2) -__SYCL_ACCESS(N == 4, xzzw, 0, 2, 2, 3) -__SYCL_ACCESS(N == 4, xzwx, 0, 2, 3, 0) -__SYCL_ACCESS(N == 4, xzwy, 0, 2, 3, 1) -__SYCL_ACCESS(N == 4, xzwz, 0, 2, 3, 2) -__SYCL_ACCESS(N == 4, xzww, 0, 2, 3, 3) -__SYCL_ACCESS(N == 4, xwxx, 0, 3, 0, 0) -__SYCL_ACCESS(N == 4, xwxy, 0, 3, 0, 1) -__SYCL_ACCESS(N == 4, xwxz, 0, 3, 0, 2) -__SYCL_ACCESS(N == 4, xwxw, 0, 3, 0, 3) -__SYCL_ACCESS(N == 4, xwyx, 0, 3, 1, 0) -__SYCL_ACCESS(N == 4, xwyy, 0, 3, 1, 1) -__SYCL_ACCESS(N == 4, xwyz, 0, 3, 1, 2) -__SYCL_ACCESS(N == 4, xwyw, 0, 3, 1, 3) -__SYCL_ACCESS(N == 4, xwzx, 0, 3, 2, 0) -__SYCL_ACCESS(N == 4, xwzy, 0, 3, 2, 1) -__SYCL_ACCESS(N == 4, xwzz, 0, 3, 2, 2) -__SYCL_ACCESS(N == 4, xwzw, 0, 3, 2, 3) -__SYCL_ACCESS(N == 4, xwwx, 0, 3, 3, 0) -__SYCL_ACCESS(N == 4, xwwy, 0, 3, 3, 1) -__SYCL_ACCESS(N == 4, xwwz, 0, 3, 3, 2) -__SYCL_ACCESS(N == 4, xwww, 0, 3, 3, 3) -__SYCL_ACCESS(2 <= N && N <= 4, yxxx, 1, 0, 0, 0) -__SYCL_ACCESS(2 <= N && N <= 4, yxxy, 1, 0, 0, 1) -__SYCL_ACCESS(N == 3 || N == 4, yxxz, 1, 0, 0, 2) -__SYCL_ACCESS(N == 4, yxxw, 1, 0, 0, 3) -__SYCL_ACCESS(2 <= N && N <= 4, yxyx, 1, 0, 1, 0) -__SYCL_ACCESS(2 <= N && N <= 4, yxyy, 1, 0, 1, 1) -__SYCL_ACCESS(N == 3 || N == 4, yxyz, 1, 0, 1, 2) -__SYCL_ACCESS(N == 4, yxyw, 1, 0, 1, 3) -__SYCL_ACCESS(N == 3 || N == 4, yxzx, 1, 0, 2, 0) -__SYCL_ACCESS(N == 3 || N == 4, yxzy, 1, 0, 2, 1) -__SYCL_ACCESS(N == 3 || N == 4, yxzz, 1, 0, 2, 2) -__SYCL_ACCESS(N == 4, yxzw, 1, 0, 2, 3) -__SYCL_ACCESS(N == 4, yxwx, 1, 0, 3, 0) -__SYCL_ACCESS(N == 4, yxwy, 1, 0, 3, 1) -__SYCL_ACCESS(N == 4, yxwz, 1, 0, 3, 2) -__SYCL_ACCESS(N == 4, yxww, 1, 0, 3, 3) -__SYCL_ACCESS(2 <= N && N <= 4, yyxx, 1, 1, 0, 0) -__SYCL_ACCESS(2 <= N && N <= 4, yyxy, 1, 1, 0, 1) -__SYCL_ACCESS(N == 3 || N == 4, yyxz, 1, 1, 0, 2) -__SYCL_ACCESS(N == 4, yyxw, 1, 1, 0, 3) -__SYCL_ACCESS(2 <= N && N <= 4, yyyx, 1, 1, 1, 0) -__SYCL_ACCESS(2 <= N && N <= 4, yyyy, 1, 1, 1, 1) -__SYCL_ACCESS(N == 3 || N == 4, yyyz, 1, 1, 1, 2) -__SYCL_ACCESS(N == 4, yyyw, 1, 1, 1, 3) -__SYCL_ACCESS(N == 3 || N == 4, yyzx, 1, 1, 2, 0) -__SYCL_ACCESS(N == 3 || N == 4, yyzy, 1, 1, 2, 1) -__SYCL_ACCESS(N == 3 || N == 4, yyzz, 1, 1, 2, 2) -__SYCL_ACCESS(N == 4, yyzw, 1, 1, 2, 3) -__SYCL_ACCESS(N == 4, yywx, 1, 1, 3, 0) -__SYCL_ACCESS(N == 4, yywy, 1, 1, 3, 1) -__SYCL_ACCESS(N == 4, yywz, 1, 1, 3, 2) -__SYCL_ACCESS(N == 4, yyww, 1, 1, 3, 3) -__SYCL_ACCESS(N == 3 || N == 4, yzxx, 1, 2, 0, 0) -__SYCL_ACCESS(N == 3 || N == 4, yzxy, 1, 2, 0, 1) -__SYCL_ACCESS(N == 3 || N == 4, yzxz, 1, 2, 0, 2) -__SYCL_ACCESS(N == 4, yzxw, 1, 2, 0, 3) -__SYCL_ACCESS(N == 3 || N == 4, yzyx, 1, 2, 1, 0) -__SYCL_ACCESS(N == 3 || N == 4, yzyy, 1, 2, 1, 1) -__SYCL_ACCESS(N == 3 || N == 4, yzyz, 1, 2, 1, 2) -__SYCL_ACCESS(N == 4, yzyw, 1, 2, 1, 3) -__SYCL_ACCESS(N == 3 || N == 4, yzzx, 1, 2, 2, 0) -__SYCL_ACCESS(N == 3 || N == 4, yzzy, 1, 2, 2, 1) -__SYCL_ACCESS(N == 3 || N == 4, yzzz, 1, 2, 2, 2) -__SYCL_ACCESS(N == 4, yzzw, 1, 2, 2, 3) -__SYCL_ACCESS(N == 4, yzwx, 1, 2, 3, 0) -__SYCL_ACCESS(N == 4, yzwy, 1, 2, 3, 1) -__SYCL_ACCESS(N == 4, yzwz, 1, 2, 3, 2) -__SYCL_ACCESS(N == 4, yzww, 1, 2, 3, 3) -__SYCL_ACCESS(N == 4, ywxx, 1, 3, 0, 0) -__SYCL_ACCESS(N == 4, ywxy, 1, 3, 0, 1) -__SYCL_ACCESS(N == 4, ywxz, 1, 3, 0, 2) -__SYCL_ACCESS(N == 4, ywxw, 1, 3, 0, 3) -__SYCL_ACCESS(N == 4, ywyx, 1, 3, 1, 0) -__SYCL_ACCESS(N == 4, ywyy, 1, 3, 1, 1) -__SYCL_ACCESS(N == 4, ywyz, 1, 3, 1, 2) -__SYCL_ACCESS(N == 4, ywyw, 1, 3, 1, 3) -__SYCL_ACCESS(N == 4, ywzx, 1, 3, 2, 0) -__SYCL_ACCESS(N == 4, ywzy, 1, 3, 2, 1) -__SYCL_ACCESS(N == 4, ywzz, 1, 3, 2, 2) -__SYCL_ACCESS(N == 4, ywzw, 1, 3, 2, 3) -__SYCL_ACCESS(N == 4, ywwx, 1, 3, 3, 0) -__SYCL_ACCESS(N == 4, ywwy, 1, 3, 3, 1) -__SYCL_ACCESS(N == 4, ywwz, 1, 3, 3, 2) -__SYCL_ACCESS(N == 4, ywww, 1, 3, 3, 3) -__SYCL_ACCESS(N == 3 || N == 4, zxxx, 2, 0, 0, 0) -__SYCL_ACCESS(N == 3 || N == 4, zxxy, 2, 0, 0, 1) -__SYCL_ACCESS(N == 3 || N == 4, zxxz, 2, 0, 0, 2) -__SYCL_ACCESS(N == 4, zxxw, 2, 0, 0, 3) -__SYCL_ACCESS(N == 3 || N == 4, zxyx, 2, 0, 1, 0) -__SYCL_ACCESS(N == 3 || N == 4, zxyy, 2, 0, 1, 1) -__SYCL_ACCESS(N == 3 || N == 4, zxyz, 2, 0, 1, 2) -__SYCL_ACCESS(N == 4, zxyw, 2, 0, 1, 3) -__SYCL_ACCESS(N == 3 || N == 4, zxzx, 2, 0, 2, 0) -__SYCL_ACCESS(N == 3 || N == 4, zxzy, 2, 0, 2, 1) -__SYCL_ACCESS(N == 3 || N == 4, zxzz, 2, 0, 2, 2) -__SYCL_ACCESS(N == 4, zxzw, 2, 0, 2, 3) -__SYCL_ACCESS(N == 4, zxwx, 2, 0, 3, 0) -__SYCL_ACCESS(N == 4, zxwy, 2, 0, 3, 1) -__SYCL_ACCESS(N == 4, zxwz, 2, 0, 3, 2) -__SYCL_ACCESS(N == 4, zxww, 2, 0, 3, 3) -__SYCL_ACCESS(N == 3 || N == 4, zyxx, 2, 1, 0, 0) -__SYCL_ACCESS(N == 3 || N == 4, zyxy, 2, 1, 0, 1) -__SYCL_ACCESS(N == 3 || N == 4, zyxz, 2, 1, 0, 2) -__SYCL_ACCESS(N == 4, zyxw, 2, 1, 0, 3) -__SYCL_ACCESS(N == 3 || N == 4, zyyx, 2, 1, 1, 0) -__SYCL_ACCESS(N == 3 || N == 4, zyyy, 2, 1, 1, 1) -__SYCL_ACCESS(N == 3 || N == 4, zyyz, 2, 1, 1, 2) -__SYCL_ACCESS(N == 4, zyyw, 2, 1, 1, 3) -__SYCL_ACCESS(N == 3 || N == 4, zyzx, 2, 1, 2, 0) -__SYCL_ACCESS(N == 3 || N == 4, zyzy, 2, 1, 2, 1) -__SYCL_ACCESS(N == 3 || N == 4, zyzz, 2, 1, 2, 2) -__SYCL_ACCESS(N == 4, zyzw, 2, 1, 2, 3) -__SYCL_ACCESS(N == 4, zywx, 2, 1, 3, 0) -__SYCL_ACCESS(N == 4, zywy, 2, 1, 3, 1) -__SYCL_ACCESS(N == 4, zywz, 2, 1, 3, 2) -__SYCL_ACCESS(N == 4, zyww, 2, 1, 3, 3) -__SYCL_ACCESS(N == 3 || N == 4, zzxx, 2, 2, 0, 0) -__SYCL_ACCESS(N == 3 || N == 4, zzxy, 2, 2, 0, 1) -__SYCL_ACCESS(N == 3 || N == 4, zzxz, 2, 2, 0, 2) -__SYCL_ACCESS(N == 4, zzxw, 2, 2, 0, 3) -__SYCL_ACCESS(N == 3 || N == 4, zzyx, 2, 2, 1, 0) -__SYCL_ACCESS(N == 3 || N == 4, zzyy, 2, 2, 1, 1) -__SYCL_ACCESS(N == 3 || N == 4, zzyz, 2, 2, 1, 2) -__SYCL_ACCESS(N == 4, zzyw, 2, 2, 1, 3) -__SYCL_ACCESS(N == 3 || N == 4, zzzx, 2, 2, 2, 0) -__SYCL_ACCESS(N == 3 || N == 4, zzzy, 2, 2, 2, 1) -__SYCL_ACCESS(N == 3 || N == 4, zzzz, 2, 2, 2, 2) -__SYCL_ACCESS(N == 4, zzzw, 2, 2, 2, 3) -__SYCL_ACCESS(N == 4, zzwx, 2, 2, 3, 0) -__SYCL_ACCESS(N == 4, zzwy, 2, 2, 3, 1) -__SYCL_ACCESS(N == 4, zzwz, 2, 2, 3, 2) -__SYCL_ACCESS(N == 4, zzww, 2, 2, 3, 3) -__SYCL_ACCESS(N == 4, zwxx, 2, 3, 0, 0) -__SYCL_ACCESS(N == 4, zwxy, 2, 3, 0, 1) -__SYCL_ACCESS(N == 4, zwxz, 2, 3, 0, 2) -__SYCL_ACCESS(N == 4, zwxw, 2, 3, 0, 3) -__SYCL_ACCESS(N == 4, zwyx, 2, 3, 1, 0) -__SYCL_ACCESS(N == 4, zwyy, 2, 3, 1, 1) -__SYCL_ACCESS(N == 4, zwyz, 2, 3, 1, 2) -__SYCL_ACCESS(N == 4, zwyw, 2, 3, 1, 3) -__SYCL_ACCESS(N == 4, zwzx, 2, 3, 2, 0) -__SYCL_ACCESS(N == 4, zwzy, 2, 3, 2, 1) -__SYCL_ACCESS(N == 4, zwzz, 2, 3, 2, 2) -__SYCL_ACCESS(N == 4, zwzw, 2, 3, 2, 3) -__SYCL_ACCESS(N == 4, zwwx, 2, 3, 3, 0) -__SYCL_ACCESS(N == 4, zwwy, 2, 3, 3, 1) -__SYCL_ACCESS(N == 4, zwwz, 2, 3, 3, 2) -__SYCL_ACCESS(N == 4, zwww, 2, 3, 3, 3) -__SYCL_ACCESS(N == 4, wxxx, 3, 0, 0, 0) -__SYCL_ACCESS(N == 4, wxxy, 3, 0, 0, 1) -__SYCL_ACCESS(N == 4, wxxz, 3, 0, 0, 2) -__SYCL_ACCESS(N == 4, wxxw, 3, 0, 0, 3) -__SYCL_ACCESS(N == 4, wxyx, 3, 0, 1, 0) -__SYCL_ACCESS(N == 4, wxyy, 3, 0, 1, 1) -__SYCL_ACCESS(N == 4, wxyz, 3, 0, 1, 2) -__SYCL_ACCESS(N == 4, wxyw, 3, 0, 1, 3) -__SYCL_ACCESS(N == 4, wxzx, 3, 0, 2, 0) -__SYCL_ACCESS(N == 4, wxzy, 3, 0, 2, 1) -__SYCL_ACCESS(N == 4, wxzz, 3, 0, 2, 2) -__SYCL_ACCESS(N == 4, wxzw, 3, 0, 2, 3) -__SYCL_ACCESS(N == 4, wxwx, 3, 0, 3, 0) -__SYCL_ACCESS(N == 4, wxwy, 3, 0, 3, 1) -__SYCL_ACCESS(N == 4, wxwz, 3, 0, 3, 2) -__SYCL_ACCESS(N == 4, wxww, 3, 0, 3, 3) -__SYCL_ACCESS(N == 4, wyxx, 3, 1, 0, 0) -__SYCL_ACCESS(N == 4, wyxy, 3, 1, 0, 1) -__SYCL_ACCESS(N == 4, wyxz, 3, 1, 0, 2) -__SYCL_ACCESS(N == 4, wyxw, 3, 1, 0, 3) -__SYCL_ACCESS(N == 4, wyyx, 3, 1, 1, 0) -__SYCL_ACCESS(N == 4, wyyy, 3, 1, 1, 1) -__SYCL_ACCESS(N == 4, wyyz, 3, 1, 1, 2) -__SYCL_ACCESS(N == 4, wyyw, 3, 1, 1, 3) -__SYCL_ACCESS(N == 4, wyzx, 3, 1, 2, 0) -__SYCL_ACCESS(N == 4, wyzy, 3, 1, 2, 1) -__SYCL_ACCESS(N == 4, wyzz, 3, 1, 2, 2) -__SYCL_ACCESS(N == 4, wyzw, 3, 1, 2, 3) -__SYCL_ACCESS(N == 4, wywx, 3, 1, 3, 0) -__SYCL_ACCESS(N == 4, wywy, 3, 1, 3, 1) -__SYCL_ACCESS(N == 4, wywz, 3, 1, 3, 2) -__SYCL_ACCESS(N == 4, wyww, 3, 1, 3, 3) -__SYCL_ACCESS(N == 4, wzxx, 3, 2, 0, 0) -__SYCL_ACCESS(N == 4, wzxy, 3, 2, 0, 1) -__SYCL_ACCESS(N == 4, wzxz, 3, 2, 0, 2) -__SYCL_ACCESS(N == 4, wzxw, 3, 2, 0, 3) -__SYCL_ACCESS(N == 4, wzyx, 3, 2, 1, 0) -__SYCL_ACCESS(N == 4, wzyy, 3, 2, 1, 1) -__SYCL_ACCESS(N == 4, wzyz, 3, 2, 1, 2) -__SYCL_ACCESS(N == 4, wzyw, 3, 2, 1, 3) -__SYCL_ACCESS(N == 4, wzzx, 3, 2, 2, 0) -__SYCL_ACCESS(N == 4, wzzy, 3, 2, 2, 1) -__SYCL_ACCESS(N == 4, wzzz, 3, 2, 2, 2) -__SYCL_ACCESS(N == 4, wzzw, 3, 2, 2, 3) -__SYCL_ACCESS(N == 4, wzwx, 3, 2, 3, 0) -__SYCL_ACCESS(N == 4, wzwy, 3, 2, 3, 1) -__SYCL_ACCESS(N == 4, wzwz, 3, 2, 3, 2) -__SYCL_ACCESS(N == 4, wzww, 3, 2, 3, 3) -__SYCL_ACCESS(N == 4, wwxx, 3, 3, 0, 0) -__SYCL_ACCESS(N == 4, wwxy, 3, 3, 0, 1) -__SYCL_ACCESS(N == 4, wwxz, 3, 3, 0, 2) -__SYCL_ACCESS(N == 4, wwxw, 3, 3, 0, 3) -__SYCL_ACCESS(N == 4, wwyx, 3, 3, 1, 0) -__SYCL_ACCESS(N == 4, wwyy, 3, 3, 1, 1) -__SYCL_ACCESS(N == 4, wwyz, 3, 3, 1, 2) -__SYCL_ACCESS(N == 4, wwyw, 3, 3, 1, 3) -__SYCL_ACCESS(N == 4, wwzx, 3, 3, 2, 0) -__SYCL_ACCESS(N == 4, wwzy, 3, 3, 2, 1) -__SYCL_ACCESS(N == 4, wwzz, 3, 3, 2, 2) -__SYCL_ACCESS(N == 4, wwzw, 3, 3, 2, 3) -__SYCL_ACCESS(N == 4, wwwx, 3, 3, 3, 0) -__SYCL_ACCESS(N == 4, wwwy, 3, 3, 3, 1) -__SYCL_ACCESS(N == 4, wwwz, 3, 3, 3, 2) -__SYCL_ACCESS(N == 4, wwww, 3, 3, 3, 3) - -//__swizzled_vec__ RGBA_SWIZZLE() const; -__SYCL_ACCESS(N == 4, rr, 0, 0) -__SYCL_ACCESS(N == 4, rg, 0, 1) -__SYCL_ACCESS(N == 4, rb, 0, 2) -__SYCL_ACCESS(N == 4, ra, 0, 3) -__SYCL_ACCESS(N == 4, gr, 1, 0) -__SYCL_ACCESS(N == 4, gg, 1, 1) -__SYCL_ACCESS(N == 4, gb, 1, 2) -__SYCL_ACCESS(N == 4, ga, 1, 3) -__SYCL_ACCESS(N == 4, br, 2, 0) -__SYCL_ACCESS(N == 4, bg, 2, 1) -__SYCL_ACCESS(N == 4, bb, 2, 2) -__SYCL_ACCESS(N == 4, ba, 2, 3) -__SYCL_ACCESS(N == 4, ar, 3, 0) -__SYCL_ACCESS(N == 4, ag, 3, 1) -__SYCL_ACCESS(N == 4, ab, 3, 2) -__SYCL_ACCESS(N == 4, aa, 3, 3) -__SYCL_ACCESS(N == 4, rrr, 0, 0, 0) -__SYCL_ACCESS(N == 4, rrg, 0, 0, 1) -__SYCL_ACCESS(N == 4, rrb, 0, 0, 2) -__SYCL_ACCESS(N == 4, rra, 0, 0, 3) -__SYCL_ACCESS(N == 4, rgr, 0, 1, 0) -__SYCL_ACCESS(N == 4, rgg, 0, 1, 1) -__SYCL_ACCESS(N == 4, rgb, 0, 1, 2) -__SYCL_ACCESS(N == 4, rga, 0, 1, 3) -__SYCL_ACCESS(N == 4, rbr, 0, 2, 0) -__SYCL_ACCESS(N == 4, rbg, 0, 2, 1) -__SYCL_ACCESS(N == 4, rbb, 0, 2, 2) -__SYCL_ACCESS(N == 4, rba, 0, 2, 3) -__SYCL_ACCESS(N == 4, rar, 0, 3, 0) -__SYCL_ACCESS(N == 4, rag, 0, 3, 1) -__SYCL_ACCESS(N == 4, rab, 0, 3, 2) -__SYCL_ACCESS(N == 4, raa, 0, 3, 3) -__SYCL_ACCESS(N == 4, grr, 1, 0, 0) -__SYCL_ACCESS(N == 4, grg, 1, 0, 1) -__SYCL_ACCESS(N == 4, grb, 1, 0, 2) -__SYCL_ACCESS(N == 4, gra, 1, 0, 3) -__SYCL_ACCESS(N == 4, ggr, 1, 1, 0) -__SYCL_ACCESS(N == 4, ggg, 1, 1, 1) -__SYCL_ACCESS(N == 4, ggb, 1, 1, 2) -__SYCL_ACCESS(N == 4, gga, 1, 1, 3) -__SYCL_ACCESS(N == 4, gbr, 1, 2, 0) -__SYCL_ACCESS(N == 4, gbg, 1, 2, 1) -__SYCL_ACCESS(N == 4, gbb, 1, 2, 2) -__SYCL_ACCESS(N == 4, gba, 1, 2, 3) -__SYCL_ACCESS(N == 4, gar, 1, 3, 0) -__SYCL_ACCESS(N == 4, gag, 1, 3, 1) -__SYCL_ACCESS(N == 4, gab, 1, 3, 2) -__SYCL_ACCESS(N == 4, gaa, 1, 3, 3) -__SYCL_ACCESS(N == 4, brr, 2, 0, 0) -__SYCL_ACCESS(N == 4, brg, 2, 0, 1) -__SYCL_ACCESS(N == 4, brb, 2, 0, 2) -__SYCL_ACCESS(N == 4, bra, 2, 0, 3) -__SYCL_ACCESS(N == 4, bgr, 2, 1, 0) -__SYCL_ACCESS(N == 4, bgg, 2, 1, 1) -__SYCL_ACCESS(N == 4, bgb, 2, 1, 2) -__SYCL_ACCESS(N == 4, bga, 2, 1, 3) -__SYCL_ACCESS(N == 4, bbr, 2, 2, 0) -__SYCL_ACCESS(N == 4, bbg, 2, 2, 1) -__SYCL_ACCESS(N == 4, bbb, 2, 2, 2) -__SYCL_ACCESS(N == 4, bba, 2, 2, 3) -__SYCL_ACCESS(N == 4, bar, 2, 3, 0) -__SYCL_ACCESS(N == 4, bag, 2, 3, 1) -__SYCL_ACCESS(N == 4, bab, 2, 3, 2) -__SYCL_ACCESS(N == 4, baa, 2, 3, 3) -__SYCL_ACCESS(N == 4, arr, 3, 0, 0) -__SYCL_ACCESS(N == 4, arg, 3, 0, 1) -__SYCL_ACCESS(N == 4, arb, 3, 0, 2) -__SYCL_ACCESS(N == 4, ara, 3, 0, 3) -__SYCL_ACCESS(N == 4, agr, 3, 1, 0) -__SYCL_ACCESS(N == 4, agg, 3, 1, 1) -__SYCL_ACCESS(N == 4, agb, 3, 1, 2) -__SYCL_ACCESS(N == 4, aga, 3, 1, 3) -__SYCL_ACCESS(N == 4, abr, 3, 2, 0) -__SYCL_ACCESS(N == 4, abg, 3, 2, 1) -__SYCL_ACCESS(N == 4, abb, 3, 2, 2) -__SYCL_ACCESS(N == 4, aba, 3, 2, 3) -__SYCL_ACCESS(N == 4, aar, 3, 3, 0) -__SYCL_ACCESS(N == 4, aag, 3, 3, 1) -__SYCL_ACCESS(N == 4, aab, 3, 3, 2) -__SYCL_ACCESS(N == 4, aaa, 3, 3, 3) -__SYCL_ACCESS(N == 4, rrrr, 0, 0, 0, 0) -__SYCL_ACCESS(N == 4, rrrg, 0, 0, 0, 1) -__SYCL_ACCESS(N == 4, rrrb, 0, 0, 0, 2) -__SYCL_ACCESS(N == 4, rrra, 0, 0, 0, 3) -__SYCL_ACCESS(N == 4, rrgr, 0, 0, 1, 0) -__SYCL_ACCESS(N == 4, rrgg, 0, 0, 1, 1) -__SYCL_ACCESS(N == 4, rrgb, 0, 0, 1, 2) -__SYCL_ACCESS(N == 4, rrga, 0, 0, 1, 3) -__SYCL_ACCESS(N == 4, rrbr, 0, 0, 2, 0) -__SYCL_ACCESS(N == 4, rrbg, 0, 0, 2, 1) -__SYCL_ACCESS(N == 4, rrbb, 0, 0, 2, 2) -__SYCL_ACCESS(N == 4, rrba, 0, 0, 2, 3) -__SYCL_ACCESS(N == 4, rrar, 0, 0, 3, 0) -__SYCL_ACCESS(N == 4, rrag, 0, 0, 3, 1) -__SYCL_ACCESS(N == 4, rrab, 0, 0, 3, 2) -__SYCL_ACCESS(N == 4, rraa, 0, 0, 3, 3) -__SYCL_ACCESS(N == 4, rgrr, 0, 1, 0, 0) -__SYCL_ACCESS(N == 4, rgrg, 0, 1, 0, 1) -__SYCL_ACCESS(N == 4, rgrb, 0, 1, 0, 2) -__SYCL_ACCESS(N == 4, rgra, 0, 1, 0, 3) -__SYCL_ACCESS(N == 4, rggr, 0, 1, 1, 0) -__SYCL_ACCESS(N == 4, rggg, 0, 1, 1, 1) -__SYCL_ACCESS(N == 4, rggb, 0, 1, 1, 2) -__SYCL_ACCESS(N == 4, rgga, 0, 1, 1, 3) -__SYCL_ACCESS(N == 4, rgbr, 0, 1, 2, 0) -__SYCL_ACCESS(N == 4, rgbg, 0, 1, 2, 1) -__SYCL_ACCESS(N == 4, rgbb, 0, 1, 2, 2) -__SYCL_ACCESS(N == 4, rgba, 0, 1, 2, 3) -__SYCL_ACCESS(N == 4, rgar, 0, 1, 3, 0) -__SYCL_ACCESS(N == 4, rgag, 0, 1, 3, 1) -__SYCL_ACCESS(N == 4, rgab, 0, 1, 3, 2) -__SYCL_ACCESS(N == 4, rgaa, 0, 1, 3, 3) -__SYCL_ACCESS(N == 4, rbrr, 0, 2, 0, 0) -__SYCL_ACCESS(N == 4, rbrg, 0, 2, 0, 1) -__SYCL_ACCESS(N == 4, rbrb, 0, 2, 0, 2) -__SYCL_ACCESS(N == 4, rbra, 0, 2, 0, 3) -__SYCL_ACCESS(N == 4, rbgr, 0, 2, 1, 0) -__SYCL_ACCESS(N == 4, rbgg, 0, 2, 1, 1) -__SYCL_ACCESS(N == 4, rbgb, 0, 2, 1, 2) -__SYCL_ACCESS(N == 4, rbga, 0, 2, 1, 3) -__SYCL_ACCESS(N == 4, rbbr, 0, 2, 2, 0) -__SYCL_ACCESS(N == 4, rbbg, 0, 2, 2, 1) -__SYCL_ACCESS(N == 4, rbbb, 0, 2, 2, 2) -__SYCL_ACCESS(N == 4, rbba, 0, 2, 2, 3) -__SYCL_ACCESS(N == 4, rbar, 0, 2, 3, 0) -__SYCL_ACCESS(N == 4, rbag, 0, 2, 3, 1) -__SYCL_ACCESS(N == 4, rbab, 0, 2, 3, 2) -__SYCL_ACCESS(N == 4, rbaa, 0, 2, 3, 3) -__SYCL_ACCESS(N == 4, rarr, 0, 3, 0, 0) -__SYCL_ACCESS(N == 4, rarg, 0, 3, 0, 1) -__SYCL_ACCESS(N == 4, rarb, 0, 3, 0, 2) -__SYCL_ACCESS(N == 4, rara, 0, 3, 0, 3) -__SYCL_ACCESS(N == 4, ragr, 0, 3, 1, 0) -__SYCL_ACCESS(N == 4, ragg, 0, 3, 1, 1) -__SYCL_ACCESS(N == 4, ragb, 0, 3, 1, 2) -__SYCL_ACCESS(N == 4, raga, 0, 3, 1, 3) -__SYCL_ACCESS(N == 4, rabr, 0, 3, 2, 0) -__SYCL_ACCESS(N == 4, rabg, 0, 3, 2, 1) -__SYCL_ACCESS(N == 4, rabb, 0, 3, 2, 2) -__SYCL_ACCESS(N == 4, raba, 0, 3, 2, 3) -__SYCL_ACCESS(N == 4, raar, 0, 3, 3, 0) -__SYCL_ACCESS(N == 4, raag, 0, 3, 3, 1) -__SYCL_ACCESS(N == 4, raab, 0, 3, 3, 2) -__SYCL_ACCESS(N == 4, raaa, 0, 3, 3, 3) -__SYCL_ACCESS(N == 4, grrr, 1, 0, 0, 0) -__SYCL_ACCESS(N == 4, grrg, 1, 0, 0, 1) -__SYCL_ACCESS(N == 4, grrb, 1, 0, 0, 2) -__SYCL_ACCESS(N == 4, grra, 1, 0, 0, 3) -__SYCL_ACCESS(N == 4, grgr, 1, 0, 1, 0) -__SYCL_ACCESS(N == 4, grgg, 1, 0, 1, 1) -__SYCL_ACCESS(N == 4, grgb, 1, 0, 1, 2) -__SYCL_ACCESS(N == 4, grga, 1, 0, 1, 3) -__SYCL_ACCESS(N == 4, grbr, 1, 0, 2, 0) -__SYCL_ACCESS(N == 4, grbg, 1, 0, 2, 1) -__SYCL_ACCESS(N == 4, grbb, 1, 0, 2, 2) -__SYCL_ACCESS(N == 4, grba, 1, 0, 2, 3) -__SYCL_ACCESS(N == 4, grar, 1, 0, 3, 0) -__SYCL_ACCESS(N == 4, grag, 1, 0, 3, 1) -__SYCL_ACCESS(N == 4, grab, 1, 0, 3, 2) -__SYCL_ACCESS(N == 4, graa, 1, 0, 3, 3) -__SYCL_ACCESS(N == 4, ggrr, 1, 1, 0, 0) -__SYCL_ACCESS(N == 4, ggrg, 1, 1, 0, 1) -__SYCL_ACCESS(N == 4, ggrb, 1, 1, 0, 2) -__SYCL_ACCESS(N == 4, ggra, 1, 1, 0, 3) -__SYCL_ACCESS(N == 4, gggr, 1, 1, 1, 0) -__SYCL_ACCESS(N == 4, gggg, 1, 1, 1, 1) -__SYCL_ACCESS(N == 4, gggb, 1, 1, 1, 2) -__SYCL_ACCESS(N == 4, ggga, 1, 1, 1, 3) -__SYCL_ACCESS(N == 4, ggbr, 1, 1, 2, 0) -__SYCL_ACCESS(N == 4, ggbg, 1, 1, 2, 1) -__SYCL_ACCESS(N == 4, ggbb, 1, 1, 2, 2) -__SYCL_ACCESS(N == 4, ggba, 1, 1, 2, 3) -__SYCL_ACCESS(N == 4, ggar, 1, 1, 3, 0) -__SYCL_ACCESS(N == 4, ggag, 1, 1, 3, 1) -__SYCL_ACCESS(N == 4, ggab, 1, 1, 3, 2) -__SYCL_ACCESS(N == 4, ggaa, 1, 1, 3, 3) -__SYCL_ACCESS(N == 4, gbrr, 1, 2, 0, 0) -__SYCL_ACCESS(N == 4, gbrg, 1, 2, 0, 1) -__SYCL_ACCESS(N == 4, gbrb, 1, 2, 0, 2) -__SYCL_ACCESS(N == 4, gbra, 1, 2, 0, 3) -__SYCL_ACCESS(N == 4, gbgr, 1, 2, 1, 0) -__SYCL_ACCESS(N == 4, gbgg, 1, 2, 1, 1) -__SYCL_ACCESS(N == 4, gbgb, 1, 2, 1, 2) -__SYCL_ACCESS(N == 4, gbga, 1, 2, 1, 3) -__SYCL_ACCESS(N == 4, gbbr, 1, 2, 2, 0) -__SYCL_ACCESS(N == 4, gbbg, 1, 2, 2, 1) -__SYCL_ACCESS(N == 4, gbbb, 1, 2, 2, 2) -__SYCL_ACCESS(N == 4, gbba, 1, 2, 2, 3) -__SYCL_ACCESS(N == 4, gbar, 1, 2, 3, 0) -__SYCL_ACCESS(N == 4, gbag, 1, 2, 3, 1) -__SYCL_ACCESS(N == 4, gbab, 1, 2, 3, 2) -__SYCL_ACCESS(N == 4, gbaa, 1, 2, 3, 3) -__SYCL_ACCESS(N == 4, garr, 1, 3, 0, 0) -__SYCL_ACCESS(N == 4, garg, 1, 3, 0, 1) -__SYCL_ACCESS(N == 4, garb, 1, 3, 0, 2) -__SYCL_ACCESS(N == 4, gara, 1, 3, 0, 3) -__SYCL_ACCESS(N == 4, gagr, 1, 3, 1, 0) -__SYCL_ACCESS(N == 4, gagg, 1, 3, 1, 1) -__SYCL_ACCESS(N == 4, gagb, 1, 3, 1, 2) -__SYCL_ACCESS(N == 4, gaga, 1, 3, 1, 3) -__SYCL_ACCESS(N == 4, gabr, 1, 3, 2, 0) -__SYCL_ACCESS(N == 4, gabg, 1, 3, 2, 1) -__SYCL_ACCESS(N == 4, gabb, 1, 3, 2, 2) -__SYCL_ACCESS(N == 4, gaba, 1, 3, 2, 3) -__SYCL_ACCESS(N == 4, gaar, 1, 3, 3, 0) -__SYCL_ACCESS(N == 4, gaag, 1, 3, 3, 1) -__SYCL_ACCESS(N == 4, gaab, 1, 3, 3, 2) -__SYCL_ACCESS(N == 4, gaaa, 1, 3, 3, 3) -__SYCL_ACCESS(N == 4, brrr, 2, 0, 0, 0) -__SYCL_ACCESS(N == 4, brrg, 2, 0, 0, 1) -__SYCL_ACCESS(N == 4, brrb, 2, 0, 0, 2) -__SYCL_ACCESS(N == 4, brra, 2, 0, 0, 3) -__SYCL_ACCESS(N == 4, brgr, 2, 0, 1, 0) -__SYCL_ACCESS(N == 4, brgg, 2, 0, 1, 1) -__SYCL_ACCESS(N == 4, brgb, 2, 0, 1, 2) -__SYCL_ACCESS(N == 4, brga, 2, 0, 1, 3) -__SYCL_ACCESS(N == 4, brbr, 2, 0, 2, 0) -__SYCL_ACCESS(N == 4, brbg, 2, 0, 2, 1) -__SYCL_ACCESS(N == 4, brbb, 2, 0, 2, 2) -__SYCL_ACCESS(N == 4, brba, 2, 0, 2, 3) -__SYCL_ACCESS(N == 4, brar, 2, 0, 3, 0) -__SYCL_ACCESS(N == 4, brag, 2, 0, 3, 1) -__SYCL_ACCESS(N == 4, brab, 2, 0, 3, 2) -__SYCL_ACCESS(N == 4, braa, 2, 0, 3, 3) -__SYCL_ACCESS(N == 4, bgrr, 2, 1, 0, 0) -__SYCL_ACCESS(N == 4, bgrg, 2, 1, 0, 1) -__SYCL_ACCESS(N == 4, bgrb, 2, 1, 0, 2) -__SYCL_ACCESS(N == 4, bgra, 2, 1, 0, 3) -__SYCL_ACCESS(N == 4, bggr, 2, 1, 1, 0) -__SYCL_ACCESS(N == 4, bggg, 2, 1, 1, 1) -__SYCL_ACCESS(N == 4, bggb, 2, 1, 1, 2) -__SYCL_ACCESS(N == 4, bgga, 2, 1, 1, 3) -__SYCL_ACCESS(N == 4, bgbr, 2, 1, 2, 0) -__SYCL_ACCESS(N == 4, bgbg, 2, 1, 2, 1) -__SYCL_ACCESS(N == 4, bgbb, 2, 1, 2, 2) -__SYCL_ACCESS(N == 4, bgba, 2, 1, 2, 3) -__SYCL_ACCESS(N == 4, bgar, 2, 1, 3, 0) -__SYCL_ACCESS(N == 4, bgag, 2, 1, 3, 1) -__SYCL_ACCESS(N == 4, bgab, 2, 1, 3, 2) -__SYCL_ACCESS(N == 4, bgaa, 2, 1, 3, 3) -__SYCL_ACCESS(N == 4, bbrr, 2, 2, 0, 0) -__SYCL_ACCESS(N == 4, bbrg, 2, 2, 0, 1) -__SYCL_ACCESS(N == 4, bbrb, 2, 2, 0, 2) -__SYCL_ACCESS(N == 4, bbra, 2, 2, 0, 3) -__SYCL_ACCESS(N == 4, bbgr, 2, 2, 1, 0) -__SYCL_ACCESS(N == 4, bbgg, 2, 2, 1, 1) -__SYCL_ACCESS(N == 4, bbgb, 2, 2, 1, 2) -__SYCL_ACCESS(N == 4, bbga, 2, 2, 1, 3) -__SYCL_ACCESS(N == 4, bbbr, 2, 2, 2, 0) -__SYCL_ACCESS(N == 4, bbbg, 2, 2, 2, 1) -__SYCL_ACCESS(N == 4, bbbb, 2, 2, 2, 2) -__SYCL_ACCESS(N == 4, bbba, 2, 2, 2, 3) -__SYCL_ACCESS(N == 4, bbar, 2, 2, 3, 0) -__SYCL_ACCESS(N == 4, bbag, 2, 2, 3, 1) -__SYCL_ACCESS(N == 4, bbab, 2, 2, 3, 2) -__SYCL_ACCESS(N == 4, bbaa, 2, 2, 3, 3) -__SYCL_ACCESS(N == 4, barr, 2, 3, 0, 0) -__SYCL_ACCESS(N == 4, barg, 2, 3, 0, 1) -__SYCL_ACCESS(N == 4, barb, 2, 3, 0, 2) -__SYCL_ACCESS(N == 4, bara, 2, 3, 0, 3) -__SYCL_ACCESS(N == 4, bagr, 2, 3, 1, 0) -__SYCL_ACCESS(N == 4, bagg, 2, 3, 1, 1) -__SYCL_ACCESS(N == 4, bagb, 2, 3, 1, 2) -__SYCL_ACCESS(N == 4, baga, 2, 3, 1, 3) -__SYCL_ACCESS(N == 4, babr, 2, 3, 2, 0) -__SYCL_ACCESS(N == 4, babg, 2, 3, 2, 1) -__SYCL_ACCESS(N == 4, babb, 2, 3, 2, 2) -__SYCL_ACCESS(N == 4, baba, 2, 3, 2, 3) -__SYCL_ACCESS(N == 4, baar, 2, 3, 3, 0) -__SYCL_ACCESS(N == 4, baag, 2, 3, 3, 1) -__SYCL_ACCESS(N == 4, baab, 2, 3, 3, 2) -__SYCL_ACCESS(N == 4, baaa, 2, 3, 3, 3) -__SYCL_ACCESS(N == 4, arrr, 3, 0, 0, 0) -__SYCL_ACCESS(N == 4, arrg, 3, 0, 0, 1) -__SYCL_ACCESS(N == 4, arrb, 3, 0, 0, 2) -__SYCL_ACCESS(N == 4, arra, 3, 0, 0, 3) -__SYCL_ACCESS(N == 4, argr, 3, 0, 1, 0) -__SYCL_ACCESS(N == 4, argg, 3, 0, 1, 1) -__SYCL_ACCESS(N == 4, argb, 3, 0, 1, 2) -__SYCL_ACCESS(N == 4, arga, 3, 0, 1, 3) -__SYCL_ACCESS(N == 4, arbr, 3, 0, 2, 0) -__SYCL_ACCESS(N == 4, arbg, 3, 0, 2, 1) -__SYCL_ACCESS(N == 4, arbb, 3, 0, 2, 2) -__SYCL_ACCESS(N == 4, arba, 3, 0, 2, 3) -__SYCL_ACCESS(N == 4, arar, 3, 0, 3, 0) -__SYCL_ACCESS(N == 4, arag, 3, 0, 3, 1) -__SYCL_ACCESS(N == 4, arab, 3, 0, 3, 2) -__SYCL_ACCESS(N == 4, araa, 3, 0, 3, 3) -__SYCL_ACCESS(N == 4, agrr, 3, 1, 0, 0) -__SYCL_ACCESS(N == 4, agrg, 3, 1, 0, 1) -__SYCL_ACCESS(N == 4, agrb, 3, 1, 0, 2) -__SYCL_ACCESS(N == 4, agra, 3, 1, 0, 3) -__SYCL_ACCESS(N == 4, aggr, 3, 1, 1, 0) -__SYCL_ACCESS(N == 4, aggg, 3, 1, 1, 1) -__SYCL_ACCESS(N == 4, aggb, 3, 1, 1, 2) -__SYCL_ACCESS(N == 4, agga, 3, 1, 1, 3) -__SYCL_ACCESS(N == 4, agbr, 3, 1, 2, 0) -__SYCL_ACCESS(N == 4, agbg, 3, 1, 2, 1) -__SYCL_ACCESS(N == 4, agbb, 3, 1, 2, 2) -__SYCL_ACCESS(N == 4, agba, 3, 1, 2, 3) -__SYCL_ACCESS(N == 4, agar, 3, 1, 3, 0) -__SYCL_ACCESS(N == 4, agag, 3, 1, 3, 1) -__SYCL_ACCESS(N == 4, agab, 3, 1, 3, 2) -__SYCL_ACCESS(N == 4, agaa, 3, 1, 3, 3) -__SYCL_ACCESS(N == 4, abrr, 3, 2, 0, 0) -__SYCL_ACCESS(N == 4, abrg, 3, 2, 0, 1) -__SYCL_ACCESS(N == 4, abrb, 3, 2, 0, 2) -__SYCL_ACCESS(N == 4, abra, 3, 2, 0, 3) -__SYCL_ACCESS(N == 4, abgr, 3, 2, 1, 0) -__SYCL_ACCESS(N == 4, abgg, 3, 2, 1, 1) -__SYCL_ACCESS(N == 4, abgb, 3, 2, 1, 2) -__SYCL_ACCESS(N == 4, abga, 3, 2, 1, 3) -__SYCL_ACCESS(N == 4, abbr, 3, 2, 2, 0) -__SYCL_ACCESS(N == 4, abbg, 3, 2, 2, 1) -__SYCL_ACCESS(N == 4, abbb, 3, 2, 2, 2) -__SYCL_ACCESS(N == 4, abba, 3, 2, 2, 3) -__SYCL_ACCESS(N == 4, abar, 3, 2, 3, 0) -__SYCL_ACCESS(N == 4, abag, 3, 2, 3, 1) -__SYCL_ACCESS(N == 4, abab, 3, 2, 3, 2) -__SYCL_ACCESS(N == 4, abaa, 3, 2, 3, 3) -__SYCL_ACCESS(N == 4, aarr, 3, 3, 0, 0) -__SYCL_ACCESS(N == 4, aarg, 3, 3, 0, 1) -__SYCL_ACCESS(N == 4, aarb, 3, 3, 0, 2) -__SYCL_ACCESS(N == 4, aara, 3, 3, 0, 3) -__SYCL_ACCESS(N == 4, aagr, 3, 3, 1, 0) -__SYCL_ACCESS(N == 4, aagg, 3, 3, 1, 1) -__SYCL_ACCESS(N == 4, aagb, 3, 3, 1, 2) -__SYCL_ACCESS(N == 4, aaga, 3, 3, 1, 3) -__SYCL_ACCESS(N == 4, aabr, 3, 3, 2, 0) -__SYCL_ACCESS(N == 4, aabg, 3, 3, 2, 1) -__SYCL_ACCESS(N == 4, aabb, 3, 3, 2, 2) -__SYCL_ACCESS(N == 4, aaba, 3, 3, 2, 3) -__SYCL_ACCESS(N == 4, aaar, 3, 3, 3, 0) -__SYCL_ACCESS(N == 4, aaag, 3, 3, 3, 1) -__SYCL_ACCESS(N == 4, aaab, 3, 3, 3, 2) -__SYCL_ACCESS(N == 4, aaaa, 3, 3, 3, 3) - -#endif // #ifdef SYCL_SIMPLE_SWIZZLES - -//__swizzled_vec__ lo()/hi() const; -__SYCL_ACCESS(N == 2, lo, 0) -__SYCL_ACCESS(N == 3, lo, 0, 1) -__SYCL_ACCESS(N == 4, lo, 0, 1) -__SYCL_ACCESS(N == 8, lo, 0, 1, 2, 3) -__SYCL_ACCESS(N == 16, lo, 0, 1, 2, 3, 4, 5, 6, 7) -__SYCL_ACCESS(N == 2, hi, 1) -__SYCL_ACCESS(N == 3, hi, 2, 3) -__SYCL_ACCESS(N == 4, hi, 2, 3) -__SYCL_ACCESS(N == 8, hi, 4, 5, 6, 7) -__SYCL_ACCESS(N == 16, hi, 8, 9, 10, 11, 12, 13, 14, 15) -//__swizzled_vec__ odd()/even() const; -__SYCL_ACCESS(N == 2, odd, 1) -__SYCL_ACCESS(N == 3, odd, 1, 3) -__SYCL_ACCESS(N == 4, odd, 1, 3) -__SYCL_ACCESS(N == 8, odd, 1, 3, 5, 7) -__SYCL_ACCESS(N == 16, odd, 1, 3, 5, 7, 9, 11, 13, 15) -__SYCL_ACCESS(N == 2, even, 0) -__SYCL_ACCESS(N == 3, even, 0, 2) -__SYCL_ACCESS(N == 4, even, 0, 2) -__SYCL_ACCESS(N == 8, even, 0, 2, 4, 6) -__SYCL_ACCESS(N == 16, even, 0, 2, 4, 6, 8, 10, 12, 14) - -#undef __SYCL_E0 -#undef __SYCL_E1 -#undef __SYCL_E2 -#undef __SYCL_E3 -#undef __SYCL_E4 -#undef __SYCL_E5 -#undef __SYCL_E6 -#undef __SYCL_E7 -#undef __SYCL_E8 - -#undef __SYCL_NTH_ARG -#undef __SYCL_EXPAND -#undef __SYCL_INDEXER -#undef __SYCL_ACCESS -#undef __SYCL_SCALAR_ACCESS diff --git a/sycl/include/sycl/types.hpp b/sycl/include/sycl/types.hpp index 87698c835a3ee..53ed36d83eaad 100644 --- a/sycl/include/sycl/types.hpp +++ b/sycl/include/sycl/types.hpp @@ -8,22 +8,21 @@ #pragma once -#include // for decorated, address_space -#include // for half, cl_char, cl_int -#include // for ArrayCreator, RepeatV... -#include // for __SYCL2020_DEPRECATED -#include // for vector_basic_list -#include // for is_sigeninteger, is_s... +#include +#include +#include +#include +#include +#include #include -#include // for is_contained -#include // for is_floating_point -#include // for make_error_code, errc -#include // for StorageT, half, Vec16... -#include // for __SYCL_BINOP, __SYCL_... -#include // for multi_ptr +#include +#include +#include +#include +#include +#include #include - #include -#include // bfloat16 +#include diff --git a/sycl/include/sycl/vector.hpp b/sycl/include/sycl/vector.hpp index 6a0e10fe01a6d..5c2d39acaee5d 100644 --- a/sycl/include/sycl/vector.hpp +++ b/sycl/include/sycl/vector.hpp @@ -26,38 +26,405 @@ #error "SYCL device compiler is built without ext_vector_type support" #endif -#include // for decorated, address_space -#include // for half, cl_char, cl_int -#include // for ArrayCreator, RepeatV... -#include // for __SYCL2020_DEPRECATED -#include // for vector_basic_list -#include // for is_sigeninteger, is_s... -#include // for memcpy -#include // for is_contained -#include // for is_floating_point -#include -#include // for StorageT, half, Vec16... - -#include // bfloat16 - -#include // for std::min -#include // for array -#include // for assert -#include // for size_t, NULL, byte -#include // for uint8_t, int16_t, int... -#include // for divides, multiplies -#include // for pair -#include // for operator<<, basic_ost... -#include // for enable_if_t, is_same -#include // for index_sequence, make_... +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __SYCL_VEC_STANDALONE +#define __SYCL_EBO +#define __SYCL2020_DEPRECATED(...) namespace sycl { +inline namespace _V1 { +namespace access { +enum class address_space { global_space }; +enum class decorated { yes, no }; +enum class placeholder; +enum class mode; +enum class target; +} // namespace access + +namespace detail { +template struct TargetToAS { + constexpr static access::address_space AS = + access::address_space::global_space; +}; +} // namespace detail + +template +class accessor; + +template +class multi_ptr; + +template +constexpr std::enable_if_t::value && + std::is_trivially_copyable::value, + To> +bit_cast(const From &from) noexcept { + return __builtin_bit_cast(To, from); +} +template class __SYCL_EBO vec; + +namespace detail::half_impl { +class half; +#ifdef __SYCL_DEVICE_ONLY__ +using StorageT = _Float16; +using BIsRepresentationT = _Float16; +using VecElemT = _Float16; +#else // SYCL_DEVICE_ONLY +using StorageT = uint16_t; +// No need to extract underlying data type for built-in functions operating on +// host +using BIsRepresentationT = half; +using VecElemT = half; +#endif // SYCL_DEVICE_ONLY +} // namespace detail::half_impl +using half = detail::half_impl::half; + +namespace ext::oneapi { +class bfloat16; +namespace detail { +using Bfloat16StorageT = uint16_t; +} +} // namespace ext::oneapi + +namespace detail { +template +class __SYCL_EBO Swizzle; + +template struct is_vec : std::false_type {}; +template struct is_vec> : std::true_type {}; + +template constexpr bool is_vec_v = is_vec::value; + +template struct is_swizzle : std::false_type {}; +template +struct is_swizzle> + : std::true_type {}; + +template constexpr bool is_swizzle_v = is_swizzle::value; + +template +constexpr bool is_vec_or_swizzle_v = is_vec_v || is_swizzle_v; + +template +struct is_ext_vector : std::false_type {}; + +template +struct is_ext_vector< + T, std::void_t()))>> + : std::true_type {}; + +template +inline constexpr bool is_ext_vector_v = is_ext_vector::value; + +template struct get_elem_type { + using type = T; +}; +template +struct get_elem_type>> { + using type = typename T::element_type; +}; +template using get_elem_type_t = typename get_elem_type::type; + +template +using select_cl_scalar_integral_signed_t = std::conditional_t< + sizeof(T) == 1, int8_t, + std::conditional_t>>; + +template +using select_cl_scalar_integral_unsigned_t = std::conditional_t< + sizeof(T) == 1, uint8_t, + std::conditional_t>>; + +// Example usage: +// using mapped = map_type*/ to0, +// from1, /*->*/ to1, +// ...> +template struct map_type { + using type = void; +}; + +template +struct map_type { + using type = std::conditional_t, To, + typename map_type::type>; +}; + +template auto convertToOpenCLType(T &&x) { + using no_ref = std::remove_reference_t; + if constexpr (is_vec_v) { + using ElemTy = typename no_ref::element_type; + // sycl::half may convert to _Float16, and we would try to instantiate + // vec class with _Float16 DataType, which is not expected there. As + // such, leave vector as-is. + using MatchingVec = + vec, ElemTy, + decltype(convertToOpenCLType( + std::declval()))>, + no_ref::size()>; +#ifdef __SYCL_DEVICE_ONLY__ + return sycl::bit_cast(x); +#else + return x.template as(); +#endif + } else if constexpr (is_vec_v) { + using ElemTy = typename no_ref::element_type; + // sycl::half may convert to _Float16, and we would try to instantiate + // vec class with _Float16 DataType, which is not expected there. As + // such, leave vector as-is. + using MatchingVec = + vec, ElemTy, + decltype(convertToOpenCLType( + std::declval()))>, + no_ref::size()>; +#ifdef __SYCL_DEVICE_ONLY__ + return sycl::bit_cast(x); +#else + return x.template as(); +#endif +#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) + } else if constexpr (std::is_same_v) { + return static_cast(x); +#endif + } else if constexpr (std::is_integral_v) { + using OpenCLType = + std::conditional_t, + select_cl_scalar_integral_signed_t, + select_cl_scalar_integral_unsigned_t>; + static_assert(sizeof(OpenCLType) == sizeof(T)); + return static_cast(x); + } else if constexpr (std::is_same_v) { + // Make it a dependent type. + using OpenCLType = + std::conditional_t; + static_assert(std::is_same_v); + static_assert(sizeof(OpenCLType) == sizeof(T)); + return static_cast(x); + } else if constexpr (std::is_same_v) { + // On host, don't interpret BF16 as uint16. +#ifdef __SYCL_DEVICE_ONLY__ + using OpenCLType = sycl::ext::oneapi::detail::Bfloat16StorageT; + return sycl::bit_cast(x); +#else + return std::forward(x); +#endif + } else if constexpr (std::is_floating_point_v) { + static_assert(std::is_same_v || + std::is_same_v, + "Other FP types are not expected/supported (yet?)"); + return std::forward(x); + } else { + static_assert(std::is_same_v, "Something is wrong"); + return std::forward(x); + } +} + +template +using ConvertToOpenCLType_t = decltype(convertToOpenCLType(std::declval())); + +template auto convertFromOpenCLTypeFor(From &&x) { + if constexpr (std::is_same_v && + std::is_same_v, bool>) { + // FIXME: Something seems to be wrong elsewhere... + return x; + } else { + using OpenCLType = decltype(convertToOpenCLType(std::declval())); + static_assert(std::is_same_v, OpenCLType>); + static_assert(sizeof(OpenCLType) == sizeof(To)); + using To_noref = std::remove_reference_t; + using From_noref = std::remove_reference_t; + if constexpr (is_vec_v && is_vec_v) + return x.template as(); + else if constexpr (is_vec_v && is_ext_vector_v) + return To_noref{bit_cast(x)}; + else + return static_cast(x); + } +} + +// Helper function for concatenating two std::array. +template +constexpr std::array +ConcatArrays(const std::array &A1, + const std::array &A2, + std::index_sequence, std::index_sequence) { + return {A1[Is1]..., A2[Is2]...}; +} +template +constexpr std::array ConcatArrays(const std::array &A1, + const std::array &A2) { + return ConcatArrays(A1, A2, std::make_index_sequence(), + std::make_index_sequence()); +} + +// Utility for creating an std::array from the results of flattening the +// arguments using a flattening functor. +template typename FlattenF, + typename... ArgTN> +struct ArrayCreator; +template typename FlattenF, + typename ArgT, typename... ArgTN> +struct ArrayCreator { + static constexpr auto Create(const ArgT &Arg, const ArgTN &...Args) { + auto ImmArray = FlattenF()(Arg); + // Due to a bug in MSVC narrowing size_t to a bool in an if constexpr causes + // warnings. To avoid this we add the comparison to 0. + if constexpr (sizeof...(Args) > 0) + return ConcatArrays( + ImmArray, ArrayCreator::Create(Args...)); + else + return ImmArray; + } +}; +template typename FlattenF> +struct ArrayCreator { + static constexpr auto Create() { return std::array{}; } +}; + +// Helper function for creating an arbitrary sized array with the same value +// repeating. +template +static constexpr std::array +RepeatValueHelper(const T &Arg, std::index_sequence) { + auto ReturnArg = [&](size_t) { return Arg; }; + return {ReturnArg(Is)...}; +} +template +static constexpr std::array RepeatValue(const T &Arg) { + return RepeatValueHelper(Arg, std::make_index_sequence()); +} + +#define __SYCL_SWIZZLE_MIXIN_ALL_SWIZZLES \ + /* __swizzled_vec__ XYZW_ACCESS() const; */ \ + __SYCL_SWIZZLE_MIXIN_METHOD(N <= 4, x, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 2 || N == 3 || N == 4, y, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, z, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, w, 3) \ + \ + /* __swizzled_vec__ RGBA_ACCESS() const; */ \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, r, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, g, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, b, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, a, 3) \ + \ + /* __swizzled_vec__ INDEX_ACCESS() const; */ \ + __SYCL_SWIZZLE_MIXIN_METHOD(N > 0, s0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N > 1, s1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N > 2, s2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N > 2, s3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N > 4, s4, 4) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N > 4, s5, 5) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N > 4, s6, 6) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N > 4, s7, 7) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 16, s8, 8) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 16, s9, 9) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 16, sA, 10) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 16, sB, 11) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 16, sC, 12) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 16, sD, 13) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 16, sE, 14) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 16, sF, 15) \ + \ + /* __swizzled_vec__ lo()/hi() const; */ \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 2, lo, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3, lo, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, lo, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 8, lo, 0, 1, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 16, lo, 0, 1, 2, 3, 4, 5, 6, 7) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 2, hi, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3, hi, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, hi, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 8, hi, 4, 5, 6, 7) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 16, hi, 8, 9, 10, 11, 12, 13, 14, 15) \ + /* __swizzled_vec__ odd()/even() const; */ \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 2, odd, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3, odd, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, odd, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 8, odd, 1, 3, 5, 7) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 16, odd, 1, 3, 5, 7, 9, 11, 13, 15) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 2, even, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3, even, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, even, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 8, even, 0, 2, 4, 6) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 16, even, 0, 2, 4, 6, 8, 10, 12, 14) \ + /* Omitted SYCL_SIMPLE_SWIZZLES */ + +#define __SYCL_SWIZZLE_MIXIN_METHOD_NON_CONST(COND, NAME, ...) \ + template \ + std::enable_if_t< \ + (COND), decltype(std::declval().template swizzle<__VA_ARGS__>())> \ + NAME() { \ + return static_cast(this)->template swizzle<__VA_ARGS__>(); \ + } + +#define __SYCL_SWIZZLE_MIXIN_METHOD_CONST(COND, NAME, ...) \ + template \ + std::enable_if_t<(COND), decltype(std::declval() \ + .template swizzle<__VA_ARGS__>())> \ + NAME() const { \ + return static_cast(this)->template swizzle<__VA_ARGS__>(); \ + } + +template struct NamedSwizzlesMixinConst { +#define __SYCL_SWIZZLE_MIXIN_METHOD(COND, NAME, ...) \ + __SYCL_SWIZZLE_MIXIN_METHOD_CONST(COND, NAME, __VA_ARGS__) + + __SYCL_SWIZZLE_MIXIN_ALL_SWIZZLES + +#undef __SYCL_SWIZZLE_MIXIN_METHOD +}; + +template struct NamedSwizzlesMixinBoth { +#define __SYCL_SWIZZLE_MIXIN_METHOD(COND, NAME, ...) \ + __SYCL_SWIZZLE_MIXIN_METHOD_NON_CONST(COND, NAME, __VA_ARGS__) \ + __SYCL_SWIZZLE_MIXIN_METHOD_CONST(COND, NAME, __VA_ARGS__) + + __SYCL_SWIZZLE_MIXIN_ALL_SWIZZLES +#undef __SYCL_SWIZZLE_MIXIN_METHOD +}; + +#undef __SYCL_SWIZZLE_MIXIN_METHOD_CONST +#undef __SYCL_SWIZZLE_MIXIN_METHOD_NON_CONST + +} // namespace detail +} // namespace _V1 +} // namespace sycl + +#else +#include +#include +#include +#include +#include +#include +#include + +#include +#endif + +namespace sycl { // TODO: Fix in the next ABI breaking windows. enum class rounding_mode { automatic = 0, rte = 1, rtz = 2, rtp = 3, rtn = 4 }; inline namespace _V1 { - struct elem { static constexpr int x = 0; static constexpr int y = 1; @@ -85,20 +452,136 @@ struct elem { static constexpr int sF = 15; }; +template class __SYCL_EBO vec; + namespace detail { -template class OperationCurrentT, int... Indexes> -class SwizzleOp; +template +class __SYCL_EBO Swizzle; + +template struct is_assignable_swizzle; + +template +struct is_assignable_swizzle> { + static constexpr bool value = !IsConstVec && []() constexpr { + int Idxs[] = {Indexes...}; + for (std::size_t i = 1; i < sizeof...(Indexes); ++i) { + for (std::size_t j = 0; j < i; ++j) + if (Idxs[j] == Idxs[i]) + // Repeating index + return false; + } -// Special type indicating that SwizzleOp should just read value from vector - -// not trying to perform any operations. Should not be called. -template class GetOp { -public: - using DataT = T; - DataT getValue(size_t) const { return (DataT)0; } - DataT operator()(DataT, DataT) { return (DataT)0; } + return true; + }(); +}; + +template +constexpr bool is_assignable_swizzle_v = is_assignable_swizzle::value; + +// We need that trait when the type is still incomplete (inside mixin), so +// cannot deduce the property through the swizzle's `operator[]`. +template struct is_over_const_vec_impl; + +template +struct is_over_const_vec_impl> + : std::bool_constant {}; + +template +inline constexpr bool is_over_const_vec = + is_over_const_vec_impl::value; + +#ifdef __SYCL_DEVICE_ONLY__ +template +using element_type_for_vector_t = typename detail::map_type< + DataT, +#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) + std::byte, /*->*/ std::uint8_t, // +#endif + bool, /*->*/ std::uint8_t, // + sycl::half, /*->*/ sycl::detail::half_impl::StorageT, // + sycl::ext::oneapi::bfloat16, + /*->*/ sycl::ext::oneapi::detail::Bfloat16StorageT, // + char, /*->*/ detail::ConvertToOpenCLType_t, // + DataT, /*->*/ DataT // + >::type; + +// Type used for passing sycl::vec to SPIRV builtins. +// We can not use ext_vector_type(1) as it's not supported by SPIRV +// plugins (CTS fails). +template +using vector_t = + typename std::conditional_t, + element_type_for_vector_t __attribute__(( + ext_vector_type(NumElements)))>; +#endif // __SYCL_DEVICE_ONLY__ + +// Provide a class that can deduce element_type/size() from an incomplete type +// to be used in mixins like: +// +// template +// struct AMixin : private from_incomplete { +// /* `typename` is required with gcc and not clang /* +// ... typename AMixin::element_type/AMixin::size() ... +// }; +// +// or via type alias +// +// template +// class AMixin { +// using element_type = typename from_incomplete::element_type; +// ... +// }; +// +// NOTE: `AMixin` CANNOT use `DataT` as type alias because MSVC is buggy without +// `/permissive:-`, see https://godbolt.org/z/bMdn3hWds +// +// +// We'd like actual vec/swizle to `public`-inherit from this to avoid code +// duplication as well, but it's impossible due to `-Winaccessible-base` +// warning: +// +// > direct base 'from_incomplete>' is inaccessible due to +// > ambiguity. +// +// I personally think it's meaningless, because this helper is eligible for +// Empty Bases Optimization meaning its size as a sub-object is zero and no +// members of it will ever be accessed (and `element_type`/`size()` don't result +// in an ill-formed code, meaning no errors are emitted for them). +template struct from_incomplete; +template +struct from_incomplete : public from_incomplete {}; + +template +struct from_incomplete> { + using element_type = DataT; + static constexpr size_t size() { return NumElements; } + +#ifdef __SYCL_DEVICE_ONLY__ + using vector_t = vector_t; +#endif }; +template +struct from_incomplete> + : public from_incomplete> {}; + +template +struct is_explicitly_convertible_to_impl : std::false_type {}; + +template +struct is_explicitly_convertible_to_impl< + T, U, std::void_t(std::declval()))>> + : std::true_type {}; + +template +struct is_explicitly_convertible_to : is_explicitly_convertible_to_impl { +}; + +template +inline constexpr bool is_explicitly_convertible_to_v = + is_explicitly_convertible_to::value; + // Templated vs. non-templated conversion operator behaves differently when two // conversions are needed as in the case below: // @@ -113,14 +596,825 @@ template class GetOp { // // must go throw `v.x()` returning a swizzle, then its `operator==` returning // vec and we want that code to compile. -template -struct ScalarConversionOperatorMixIn {}; +enum class ConversionOpType { + conv_regular, + conv_explicit, + conv_template, + conv_explicit_template_convert +}; +template +struct ConversionOperatorMixin {}; +template +struct ConversionOperatorMixin { + operator To() const { + return static_cast(this)->template convertOperatorImpl(); + } +}; +template +struct ConversionOperatorMixin { + explicit operator To() const { + return static_cast(this)->template convertOperatorImpl(); + } +}; +template +struct ConversionOperatorMixin { + template >> + operator T() const { + return static_cast(this)->template convertOperatorImpl(); + } +}; -template -struct ScalarConversionOperatorMixIn> { - operator T() const { return (*static_cast(this))[0]; } +// Only for vec/swizzle: +template +struct ConversionOperatorMixin< + Self, To, ConversionOpType::conv_explicit_template_convert, true> { + // FIXME: guard against byte and check the other is integral + // TODO: probable remove swizzle/vec from T as well. + template && + !std::is_same_v +#ifdef __SYCL_DEVICE_ONLY__ + && !std::is_same_v> +#endif + >> + explicit operator T() const { + return static_cast(this)->template convertOperatorImpl(); + } }; +// Everything could have been much easier if we had C++20 concepts, then all the +// operators could be provided in a single mixin class with proper `requires` +// clauses on each overload. Until then, we have to have at least a separate +// mixing for each requirement (e.g. not byte, neither byte nor fp, not fp, +// etc.). Grouping like that would also be somewhat confusing, so we just create +// a separate mixin for each overload/narrow set of overloads and just "merge" +// them all back later. + +template +class IncDecMixin {}; + +template +class IncDecMixin< + SelfOperandTy, + std::enable_if_t::element_type>>> { + using element_type = typename from_incomplete::element_type; + +public: + friend SelfOperandTy &operator++(SelfOperandTy &x) { + x += element_type{1}; + return x; + } + friend SelfOperandTy &operator--(SelfOperandTy &x) { + x -= element_type{1}; + return x; + } + friend auto operator++(SelfOperandTy &x, int) { + auto tmp = +x; + x += element_type{1}; + return tmp; + } + friend auto operator--(SelfOperandTy &x, int) { + auto tmp = +x; + x -= element_type{1}; + return tmp; + } +}; + +// TODO: The specification doesn't mention this specifically, but that's what +// the implementation has been doing and it seems to be a reasonable thing to +// do. Otherwise shift operators for byte element type would have to be disabled +// completely to follow C++ standard approach. +template +struct ByteShiftsNonAssignMixin {}; + +template +struct ByteShiftsOpAssignMixin {}; + +#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) +template +struct ByteShiftsNonAssignMixin< + Self, std::enable_if_t::element_type>>> { + friend auto operator<<(const Self &lhs, int shift) { + vec::element_type, + from_incomplete::size()> + tmp; + for (int i = 0; i < tmp.size(); ++i) + tmp[i] = lhs[i] << shift; + return tmp; + } + friend auto operator>>(const Self &lhs, int shift) { + vec::element_type, + from_incomplete::size()> + tmp; + for (int i = 0; i < tmp.size(); ++i) + tmp[i] = lhs[i] >> shift; + return tmp; + } +}; + +template +struct ByteShiftsOpAssignMixin< + SelfOperandTy, + std::enable_if_t::element_type>>> { + friend SelfOperandTy &operator<<=(SelfOperandTy &lhs, int shift) { + lhs = lhs << shift; + return lhs; + } + friend SelfOperandTy &operator>>=(SelfOperandTy &lhs, int shift) { + lhs = lhs >> shift; + return lhs; + } +}; +#endif + +// We use std::plus and similar to "map" template parameter to an +// overloaded operator. These three below are missing from ``. +struct ShiftLeft { + template + constexpr auto operator()(T &&lhs, U &&rhs) const + -> decltype(std::forward(lhs) << std::forward(rhs)) { + return std::forward(lhs) << std::forward(rhs); + } +}; +struct ShiftRight { + template + constexpr auto operator()(T &&lhs, + U &&rhs) const -> decltype(std::forward(lhs) >> + std::forward(rhs)) { + return std::forward(lhs) >> std::forward(rhs); + } +}; + +struct UnaryPlus { + template + constexpr auto operator()(T &&arg) const -> decltype(+std::forward(arg)) { + return +std::forward(arg); + } +}; + +template +static constexpr bool not_fp = + !std::is_same_v && !std::is_same_v && + !std::is_same_v && !std::is_same_v; + +// Not using `is_byte_v` to avoid unnecessary dependencies on `half`/`bfloat16` +// headers. +template +static constexpr bool not_byte = +#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) + !std::is_same_v; +#else + true; +#endif + +// To provide information about operators availability depending on vec/swizzle +// element type. +template +inline constexpr bool is_op_available_for_type = false; + +#define __SYCL_OP_AVAILABILITY(OP, COND) \ + template \ + inline constexpr bool is_op_available_for_type = COND; + +// clang-format off +__SYCL_OP_AVAILABILITY(std::plus , not_byte) +__SYCL_OP_AVAILABILITY(std::minus , not_byte) +__SYCL_OP_AVAILABILITY(std::multiplies , not_byte) +__SYCL_OP_AVAILABILITY(std::divides , not_byte) +__SYCL_OP_AVAILABILITY(std::modulus , not_byte && not_fp) + +__SYCL_OP_AVAILABILITY(std::bit_and , not_fp) +__SYCL_OP_AVAILABILITY(std::bit_or , not_fp) +__SYCL_OP_AVAILABILITY(std::bit_xor , not_fp) + +__SYCL_OP_AVAILABILITY(std::equal_to , true) +__SYCL_OP_AVAILABILITY(std::not_equal_to , true) +__SYCL_OP_AVAILABILITY(std::less , not_byte) +__SYCL_OP_AVAILABILITY(std::greater , not_byte) +__SYCL_OP_AVAILABILITY(std::less_equal , not_byte) +__SYCL_OP_AVAILABILITY(std::greater_equal , not_byte) + +__SYCL_OP_AVAILABILITY(std::logical_and , not_byte) +__SYCL_OP_AVAILABILITY(std::logical_or , not_byte) + +__SYCL_OP_AVAILABILITY(ShiftLeft , not_byte && not_fp) +__SYCL_OP_AVAILABILITY(ShiftRight , not_byte && not_fp) + +// Unary +__SYCL_OP_AVAILABILITY(std::negate , not_byte) +__SYCL_OP_AVAILABILITY(std::logical_not , not_byte) +__SYCL_OP_AVAILABILITY(std::bit_not , not_fp) +__SYCL_OP_AVAILABILITY(UnaryPlus , not_byte) +// clang-format on + +#undef __SYCL_OP_AVAILABILITY + +template +inline constexpr bool is_op_available = + (from_incomplete::size() >= 1 && + is_op_available_for_type::element_type>); + +// Vector-specific part of the mixins' implementation. +struct VectorImpl { +private: +#ifdef __SYCL_DEVICE_ONLY__ + static constexpr bool is_host = false; +#else + static constexpr bool is_host = true; +#endif + + template static constexpr int num_elements() { + if constexpr (is_vec_or_swizzle_v) + return T::size(); + else + return 1; + } + +public: + // Binop: + template + auto operator()(const T0 &Lhs, const T1 &Rhs, OpTy &&Op) { + static_assert(std::is_same_v, get_elem_type_t>); + constexpr auto N = (std::max)(num_elements(), num_elements()); + using DataT = get_elem_type_t; + constexpr bool is_logical = + std::is_same_v> || + std::is_same_v> || + std::is_same_v> || + std::is_same_v> || + std::is_same_v> || + std::is_same_v> || + std::is_same_v> || + std::is_same_v>; + auto Get = [](const auto &a, [[maybe_unused]] int idx) { + if constexpr (is_vec_v>>) + return a[idx]; + else + return a; + }; + using ResultVec = + vec, + DataT>, + N>; + if constexpr (is_host || std::is_same_v || + std::is_same_v || N == 1) { + ResultVec tmp{}; + for (int i = 0; i < N; ++i) + if constexpr (is_logical) + tmp[i] = Op(Get(Lhs, i), Get(Rhs, i)) ? -1 : 0; + else + tmp[i] = Op(Get(Lhs, i), Get(Rhs, i)); + return tmp; + } else { + using vec_t = vec; + using vector_t = typename vec_t::vector_t; + if constexpr (is_logical) { + // Workaround a crash in the C++ front end, reported internally. + constexpr bool no_crash = + std::is_same_v> || + std::is_same_v>; + if constexpr (no_crash) { + auto res = Op(static_cast(vec_t{Lhs}), + static_cast(vec_t{Rhs})); + // bit_cast is needed to cast between char/signed char + // `ext_vector_type`s. + // + // TODO: Can we just change `vector_t`, or is that some mismatch + // between clang/SPIR-V? + return ResultVec{sycl::bit_cast(res)}; + } else { + auto vec_lhs = static_cast(vec_t{Lhs}); + auto vec_rhs = static_cast(vec_t{Rhs}); + auto res = [&]() { + if constexpr (std::is_same_v>) + return vec_lhs == vec_rhs; + else if constexpr (std::is_same_v>) + return vec_lhs != vec_rhs; + else if constexpr (std::is_same_v>) + return vec_lhs < vec_rhs; + else if constexpr (std::is_same_v>) + return vec_lhs > vec_rhs; + else if constexpr (std::is_same_v>) + return vec_lhs <= vec_rhs; + else if constexpr (std::is_same_v>) + return vec_lhs >= vec_rhs; + else + static_assert(!std::is_same_v, "Must be unreachable"); + }(); + // See the comment above. + return ResultVec{sycl::bit_cast(res)}; + } + } else { + return ResultVec{Op(static_cast(vec_t{Lhs}), + static_cast(vec_t{Rhs}))}; + } + } + } + + // Unary op: + template auto operator()(const T &X, OpTy &&Op) { + static_assert(is_vec_v); + constexpr bool is_logical = std::is_same_v>; + if constexpr (is_logical) { + vec, + T::size()> + tmp; + for (int i = 0; i < T::size(); ++i) + tmp[i] = Op(X[i]) ? -1 : 0; + return tmp; + } else if constexpr (is_host || + std::is_same_v) { + T tmp; + for (int i = 0; i < T::size(); ++i) + tmp[i] = Op(X[i]); + return tmp; + } else { + return T{Op(static_cast(X))}; + } + } +}; + +// In swizzles, depending on the constness of the underlying vector and if +// swizzle indices are repeating or not, opassign operators might not be +// available for an operation even if such an operator can be overloaded (e.g., +// `+`/`+=` vs `<`). +// +// While it's not the same in vec, we process vec mixins similarly to swizzles +// to unify the code, both between vec/swizzle, and between arithmetic/logical +// ops. + +template +class SwizzleOpMixin {}; + +template +class VecOpMixin {}; + +#define __SYCL_BINARY_OP_MIXIN(OP, BINOP) \ + template \ + class SwizzleOpMixin>> { \ + using element_type = typename from_incomplete::element_type; \ + static constexpr int N = from_incomplete::size(); \ + \ + public: \ + template && !is_swizzle_v>> \ + friend auto operator BINOP(const Self &lhs, const T &rhs) { \ + using Vec = vec; \ + return OP{}(Vec{lhs}, Vec{static_cast(rhs)}); \ + } \ + template && !is_swizzle_v>> \ + friend auto operator BINOP(const T &lhs, const Self &rhs) { \ + using Vec = vec; \ + return OP{}(Vec{static_cast(lhs)}, Vec{rhs}); \ + } \ + friend auto operator BINOP(const Self &lhs, \ + const vec &rhs) { \ + return OP{}(vec{lhs}, rhs); \ + } \ + friend auto operator BINOP(const vec &lhs, \ + const Self &rhs) { \ + return OP{}(lhs, vec{rhs}); \ + } \ + template >, \ + typename = std::enable_if_t< \ + std::is_same_v && \ + N == OtherSwizzle::size()>> \ + friend auto operator BINOP(const Self &lhs, const OtherSwizzle &rhs) { \ + using ResultVec = vec; \ + return OP{}(static_cast(lhs), static_cast(rhs)); \ + } \ + /* Can't have both (Self, Swizzle) and (Swizzle, Self) enabled at the \ + * same time if they use the same `const` as that would be ambiguous. As \ + * such, only enable the latter if "constness" differs. */ \ + template >, \ + typename = std::enable_if_t< \ + std::is_same_v && \ + N == OtherSwizzle::size() && \ + is_over_const_vec != is_over_const_vec>> \ + friend auto operator BINOP(const OtherSwizzle &lhs, const Self &rhs) { \ + using ResultVec = vec; \ + return OP{}(static_cast(lhs), static_cast(rhs)); \ + } \ + }; \ + template \ + class VecOpMixin>> { \ + using element_type = typename from_incomplete::element_type; \ + static constexpr int N = from_incomplete::size(); \ + \ + public: \ + template >> \ + friend auto operator BINOP(const Self &lhs, const T &rhs) { \ + return OP{}(lhs, Self{static_cast(rhs)}); \ + } \ + template >> \ + friend auto operator BINOP(const T &lhs, const Self &rhs) { \ + return OP{}(Self{static_cast(lhs)}, rhs); \ + } \ + friend auto operator BINOP(const Self &lhs, const Self &rhs) { \ + return VectorImpl{}(lhs, rhs, OP{}); \ + } \ + }; + +#define __SYCL_BINARY_OP_AND_OPASSIGN_MIXIN(OP, BINOP, OPASSIGN) \ + __SYCL_BINARY_OP_MIXIN(OP, BINOP) \ + template \ + class SwizzleOpMixin>> { \ + using element_type = typename from_incomplete::element_type; \ + static constexpr int N = from_incomplete::size(); \ + \ + public: \ + template && !is_swizzle_v>> \ + friend const Self &operator OPASSIGN(const Self & lhs, const T & rhs) { \ + lhs = OP{}(lhs, rhs); \ + return lhs; \ + } \ + friend const Self &operator OPASSIGN(const Self & lhs, \ + const vec &rhs) { \ + lhs = OP{}(lhs, rhs); \ + return lhs; \ + } \ + template >, \ + typename = std::enable_if_t< \ + std::is_same_v && \ + N == OtherSwizzle::size()>> \ + friend const Self &operator OPASSIGN(const Self & lhs, \ + const OtherSwizzle & rhs) { \ + lhs = OP{}(lhs, rhs); \ + return lhs; \ + } \ + }; \ + template \ + class VecOpMixin>> { \ + using element_type = typename from_incomplete::element_type; \ + static constexpr int N = from_incomplete::size(); \ + \ + public: \ + template >> \ + friend Self &operator OPASSIGN(Self & lhs, const T & rhs) { \ + lhs = OP{}(lhs, static_cast(rhs)); \ + return lhs; \ + } \ + friend Self &operator OPASSIGN(Self & lhs, const Self & rhs) { \ + lhs = OP{}(lhs, rhs); \ + return lhs; \ + } \ + }; + +// There is no "OpAssign" version of the unary operators overloads, use "false" +// directly. That will leave "true" version without partial specialization and +// would use default empty implementation. That is important, becuase we only +// want the "false" one to provide the implementation to avoid ambiguity. +#define __SYCL_UNARY_OP_MIXIN(OP, UOP) \ + template \ + class SwizzleOpMixin>> { \ + using element_type = typename from_incomplete::element_type; \ + static constexpr int N = from_incomplete::size(); \ + \ + public: \ + friend auto operator UOP(const Self &x) { \ + return OP{}(vec{x}); \ + } \ + }; \ + template \ + class VecOpMixin>> { \ + using element_type = typename from_incomplete::element_type; \ + static constexpr int N = from_incomplete::size(); \ + \ + public: \ + friend auto operator UOP(const Self &x) { return VectorImpl{}(x, OP{}); } \ + }; + +// clang-format off + __SYCL_BINARY_OP_AND_OPASSIGN_MIXIN(std::plus , +, +=) + __SYCL_BINARY_OP_AND_OPASSIGN_MIXIN(std::minus , -, -=) + __SYCL_BINARY_OP_AND_OPASSIGN_MIXIN(std::multiplies , *, *=) + __SYCL_BINARY_OP_AND_OPASSIGN_MIXIN(std::divides , /, /=) + __SYCL_BINARY_OP_AND_OPASSIGN_MIXIN(std::modulus , %, %=) + + __SYCL_BINARY_OP_AND_OPASSIGN_MIXIN(std::bit_and , &, &=) + __SYCL_BINARY_OP_AND_OPASSIGN_MIXIN(std::bit_or , |, |=) + __SYCL_BINARY_OP_AND_OPASSIGN_MIXIN(std::bit_xor , ^, ^=) + + __SYCL_BINARY_OP_MIXIN(std::equal_to , ==) + __SYCL_BINARY_OP_MIXIN(std::not_equal_to , !=) + __SYCL_BINARY_OP_MIXIN(std::less , <) + __SYCL_BINARY_OP_MIXIN(std::greater , >) + __SYCL_BINARY_OP_MIXIN(std::less_equal , <=) + __SYCL_BINARY_OP_MIXIN(std::greater_equal , >=) + + __SYCL_BINARY_OP_MIXIN(std::logical_and , &&) + __SYCL_BINARY_OP_MIXIN(std::logical_or , ||) + + __SYCL_BINARY_OP_AND_OPASSIGN_MIXIN(ShiftLeft , <<, <<=) + __SYCL_BINARY_OP_AND_OPASSIGN_MIXIN(ShiftRight , >>, >>=) + + __SYCL_UNARY_OP_MIXIN(std::negate , -) + __SYCL_UNARY_OP_MIXIN(std::logical_not , !) + __SYCL_UNARY_OP_MIXIN(std::bit_not , ~) + __SYCL_UNARY_OP_MIXIN(UnaryPlus , +) +// clang-format on + +#undef __SYCL_BINARY_OP_AND_OPASSIGN_MIXIN +#undef __SYCL_BINARY_OP_MIXIN +#undef __SYCL_UNARY_OP_MIXIN + +// Now use individual per-operation mixins to create aggregated mixins that are +// easier to use. + +// clang-format off +#define __SYCL_COMBINE_OP_MIXINS(MIXIN_TEMPLATE, ...) \ + public MIXIN_TEMPLATE<__VA_ARGS__, std::plus>, \ + public MIXIN_TEMPLATE<__VA_ARGS__, std::minus>, \ + public MIXIN_TEMPLATE<__VA_ARGS__, std::multiplies>, \ + public MIXIN_TEMPLATE<__VA_ARGS__, std::divides>, \ + public MIXIN_TEMPLATE<__VA_ARGS__, std::modulus>, \ + public MIXIN_TEMPLATE<__VA_ARGS__, std::bit_and>, \ + public MIXIN_TEMPLATE<__VA_ARGS__, std::bit_or>, \ + public MIXIN_TEMPLATE<__VA_ARGS__, std::bit_xor>, \ + public MIXIN_TEMPLATE<__VA_ARGS__, std::equal_to>, \ + public MIXIN_TEMPLATE<__VA_ARGS__, std::not_equal_to>, \ + public MIXIN_TEMPLATE<__VA_ARGS__, std::less>, \ + public MIXIN_TEMPLATE<__VA_ARGS__, std::greater>, \ + public MIXIN_TEMPLATE<__VA_ARGS__, std::less_equal>, \ + public MIXIN_TEMPLATE<__VA_ARGS__, std::greater_equal>, \ + public MIXIN_TEMPLATE<__VA_ARGS__, std::logical_and>, \ + public MIXIN_TEMPLATE<__VA_ARGS__, std::logical_or>, \ + public MIXIN_TEMPLATE<__VA_ARGS__, ShiftLeft>, \ + public MIXIN_TEMPLATE<__VA_ARGS__, ShiftRight>, \ + public MIXIN_TEMPLATE<__VA_ARGS__, std::negate>, \ + public MIXIN_TEMPLATE<__VA_ARGS__, std::logical_not>, \ + public MIXIN_TEMPLATE<__VA_ARGS__, std::bit_not>, \ + public MIXIN_TEMPLATE<__VA_ARGS__, UnaryPlus> +// clang-format on + +template +struct __SYCL_EBO SwizzleOpsMixin + : __SYCL_COMBINE_OP_MIXINS(SwizzleOpMixin, Self, EnableAssign) {}; + +template +struct __SYCL_EBO VecOpsMixin + : __SYCL_COMBINE_OP_MIXINS(VecOpMixin, Self, false), + __SYCL_COMBINE_OP_MIXINS(VecOpMixin, Self, true) {}; + +#undef __SYCL_COMBINE_OP_MIXINS + +// Mixins infrastructure above is complete, now use these shared (vec/swizzle) +// mixins to define swizzle class. + +template ::vector_t, +#endif + typename DataT = typename from_incomplete::element_type, + int N = from_incomplete::size()> +struct __SYCL_EBO VecConversionsMixin : +#ifdef __SYCL_DEVICE_ONLY__ + public detail::ConversionOperatorMixin< + Self, vector_t, ConversionOpType::conv_explicit, + // if `vector_t` and `DataT` are the same, then the `operator DataT` + // from the above is enough. + !std::is_same_v>, +#endif + public ConversionOperatorMixin, + public ConversionOperatorMixin< + Self, DataT, ConversionOpType::conv_explicit_template_convert, + /* Enable = */ (N == 1)> { +}; + +template > +struct __SYCL_EBO SwizzleMixins + : public NamedSwizzlesMixinConst::size()>, + public SwizzleOpsMixin, + public ByteShiftsNonAssignMixin, + // Same conversions as in sycl::vec of the same size as the produced + // swizzle. + public VecConversionsMixin, + // Conversion to sycl::vec, must be available only when `NumElements > 1` + // per the SYCL 2020 specification: + public ConversionOperatorMixin< + Self, + vec::element_type, + from_incomplete::size()>, + ConversionOpType::conv_regular, + /* Enable = */ true> {}; + +template +struct __SYCL_EBO SwizzleMixins + : public SwizzleMixins, + public SwizzleOpsMixin, + public IncDecMixin, + public ByteShiftsOpAssignMixin {}; + +template +inline constexpr bool has_repeating_indexes = []() constexpr { + int Idxs[] = {Indexes...}; + for (std::size_t i = 1; i < sizeof...(Indexes); ++i) { + for (std::size_t j = 0; j < i; ++j) + if (Idxs[j] == Idxs[i]) + // Repeating index + return true; + } + + return false; +}(); + +template class SwizzleBase { + using DataT = typename from_incomplete::element_type; + using VecT = + std::conditional_t, const vec, + vec>; + +public: + explicit SwizzleBase(VecT &Vec) : Vec(Vec) {} + + const Self &operator=(const Self &) = delete; + +protected: + VecT &Vec; +}; + +template +class SwizzleBase>> { + using DataT = typename from_incomplete::element_type; + using VecT = + std::conditional_t, const vec, + vec>; + static constexpr int N = from_incomplete::size(); + +public: + explicit SwizzleBase(VecT &Vec) : Vec(Vec) {} + + template + void load(size_t offset, + multi_ptr ptr) const { + vec v; + v.load(offset, ptr); + *static_cast(this) = v; + } + + template + std::enable_if_t + operator=(const Swizzle + &rhs) { + return (*this = static_cast>(rhs)); + } + + const Self &operator=(const vec &rhs) const { + for (int i = 0; i < N; ++i) + (*static_cast(this))[i] = rhs[i]; + + return *static_cast(this); + } + + template && + !is_swizzle_v>> + const Self &operator=(const T &rhs) const { + for (int i = 0; i < N; ++i) + (*static_cast(this))[i] = static_cast(rhs); + + return *static_cast(this); + } + + // Default copy-assignment. Self's implicitly generated copy-assignment uses + // this. + // + // We're templated on "Self", so each Swizzle has its own SwizzleBase and the + // following is ok (1-to-1 bidirectional mapping between Self and its + // SwizzleBase instantiation) even if a bit counterintuitive. + const SwizzleBase &operator=(const SwizzleBase &rhs) const { + const Self &self = (*static_cast(this)); + self = static_cast>(static_cast(rhs)); + return self; + } + +protected: + VecT &Vec; +}; + +// Can't have sycl::vec anywhere in template parameters because that would bring +// its hidden friends into ADL. +template +class __SYCL_EBO Swizzle + : public SwizzleBase, + VecSize>, + public SwizzleMixins> { + using Base = + SwizzleBase, VecSize>; + static constexpr int NumElements = sizeof...(Indexes); + using ResultVec = vec; + + // Get underlying vec index for (*this)[idx] access. + static constexpr auto get_vec_idx(int idx) { + int counter = 0; + int result = -1; + ((result = counter++ == idx ? Indexes : result), ...); + return result; + } + +#ifdef __SYCL_DEVICE_ONLY__ +public: + using vector_t = typename vec::vector_t; + +private: +#endif // __SYCL_DEVICE_ONLY__ + + // This mixin calls `convertOperatorImpl` below so has to be a friend. + template + friend struct ConversionOperatorMixin; + + template To convertOperatorImpl() const { + if constexpr (std::is_same_v && NumElements == 1) { + return (*this)[0]; + } else if constexpr (std::is_same_v) { + return ResultVec{this->Vec[Indexes]...}; +#ifdef __SYCL_DEVICE_ONLY__ + } else if constexpr (std::is_same_v) { + // operator ResultVec() isn't available for single-element swizzle, create + // sycl::vec explicitly here. + return static_cast(ResultVec{this->Vec[Indexes]...}); +#endif + } else { + static_assert(is_explicitly_convertible_to_v && + NumElements == 1); + return static_cast((*this)[0]); + } + } + +public: + using Base::Base; + using Base::operator=; + + using element_type = DataT; + using value_type = DataT; + + Swizzle() = delete; + Swizzle(const Swizzle &) = delete; + + static constexpr size_t byte_size() noexcept { + return ResultVec::byte_size(); + } + static constexpr size_t size() noexcept { return ResultVec::size(); } + + __SYCL2020_DEPRECATED( + "get_size() is deprecated, please use byte_size() instead") + size_t get_size() const { return static_cast(*this).get_size(); } + + __SYCL2020_DEPRECATED("get_count() is deprecated, please use size() instead") + size_t get_count() const { + return static_cast(*this).get_count(); + }; + + template + vec convert() const { + return static_cast(*this) + .template convert(); + } + + template asT as() const { + return static_cast(*this).template as(); + } + + template + void store(size_t offset, + multi_ptr ptr) const { + return static_cast(*this).store(offset, ptr); + } + + template auto swizzle() const { + return this->Vec.template swizzle(); + } + + auto &operator[](int index) const { return this->Vec[get_vec_idx(index)]; } +}; } // namespace detail ///////////////////////// class sycl::vec ///////////////////////// @@ -128,9 +1422,13 @@ struct ScalarConversionOperatorMixIn> { // SYCL devices as well as in host C++ code. template class __SYCL_EBO vec - : public detail::vec_arith, - public detail::ScalarConversionOperatorMixIn, - DataT, NumElements> { + : public detail::VecConversionsMixin>, + public detail::IncDecMixin>, + public detail::ByteShiftsNonAssignMixin>, + public detail::ByteShiftsOpAssignMixin>, + public detail::VecOpsMixin>, + public detail::NamedSwizzlesMixinBoth, + NumElements> { static_assert(NumElements == 1 || NumElements == 2 || NumElements == 3 || NumElements == 4 || NumElements == 8 || NumElements == 16, @@ -147,56 +1445,40 @@ class __SYCL_EBO vec using DataType = std::array; #ifdef __SYCL_DEVICE_ONLY__ - using element_type_for_vector_t = typename detail::map_type< - DataT, -#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) - std::byte, /*->*/ std::uint8_t, // -#endif - bool, /*->*/ std::uint8_t, // - sycl::half, /*->*/ sycl::detail::half_impl::StorageT, // - sycl::ext::oneapi::bfloat16, - /*->*/ sycl::ext::oneapi::detail::Bfloat16StorageT, // - char, /*->*/ detail::ConvertToOpenCLType_t, // - DataT, /*->*/ DataT // - >::type; - public: - // Type used for passing sycl::vec to SPIRV builtins. - // We can not use ext_vector_type(1) as it's not supported by SPIRV - // plugins (CTS fails). - using vector_t = - typename std::conditional_t; + using vector_t = detail::vector_t; private: #endif // __SYCL_DEVICE_ONLY__ - static constexpr int getNumElements() { return NumElements; } - - // SizeChecker is needed for vec(const argTN &... args) ctor to validate args. - template - struct SizeChecker : std::conditional_t {}; + template + friend struct detail::ConversionOperatorMixin; - template - struct SizeChecker - : std::conditional_t, - std::false_type> {}; + template To convertOperatorImpl() const { + if constexpr (std::is_same_v && NumElements == 1) { + return m_Data[0]; +#ifdef __SYCL_DEVICE_ONLY__ + } else if constexpr (std::is_same_v) { + /* @SYCL2020 + * Available only when: compiled for the device. + * Converts this SYCL vec instance to the underlying backend-native vector + * type defined by vector_t. + */ + return sycl::bit_cast(m_Data); +#endif + } else { + static_assert(detail::is_explicitly_convertible_to_v && + NumElements == 1); + return static_cast((*this)[0]); + } + } // Utility trait for creating an std::array from an vector argument. template class FlattenVecArg { template static constexpr auto helper(const T &V, std::index_sequence) { - // FIXME: Swizzle's `operator[]` for expression trees seems to be broken - // and returns values of the underlying vector of some of the operands. On - // the other hand, `getValue()` gives correct results. This can be changed - // to using `operator[]` once the bug is fixed. - if constexpr (detail::is_swizzle_v) - return std::array{static_cast(V.getValue(Is))...}; - else - return std::array{static_cast(V[Is])...}; + return std::array{static_cast(V[Is])...}; } public: @@ -215,24 +1497,15 @@ class __SYCL_EBO vec detail::ArrayCreator; template - using Swizzle = - detail::SwizzleOp, detail::GetOp, - detail::GetOp, Indexes...>; - + using Swizzle = detail::Swizzle; template - using ConstSwizzle = - detail::SwizzleOp, detail::GetOp, - detail::GetOp, Indexes...>; + using ConstSwizzle = detail::Swizzle; // Shortcuts for args validation in vec(const argTN &... args) ctor. template static constexpr bool AllowArgTypeInVariadicCtor = []() constexpr { - // FIXME: This logic implements the behavior of the previous implementation. if constexpr (detail::is_vec_or_swizzle_v) { - if constexpr (CtorArgTy::size() == 1) - return std::is_convertible_v; - else - return std::is_same_v; + return std::is_convertible_v; } else { return std::is_convertible_v; } @@ -260,23 +1533,38 @@ class __SYCL_EBO vec constexpr vec(vec &&Rhs) = default; private: - // Implementation detail for the next public ctor. - template - constexpr vec(const std::array &Arr, - std::index_sequence) + // Implementation detail for the next public ctor. Note that for 3-elements + // vector created from vector_t we use 4-elements array, potentially ignoring + // the last padding element. + template + constexpr vec(const Container &Arr, std::index_sequence) : m_Data{Arr[Is]...} {} + template struct type_identity { + using type = T; + }; + public: + // Explicit because replication isn't an obvious conversion. + template 1)>> explicit constexpr vec(const DataT &arg) : vec{detail::RepeatValue(arg), std::make_index_sequence()} {} + // Extra `void` to make this really different from the previous for the C++ + // compiler. + template , typename = void> + constexpr vec(const DataT &arg) + : vec{detail::RepeatValue(arg), + std::make_index_sequence()} {} + // Constructor from values of base type or vec of base type. Checks that // base types are match and that the NumElements == sum of lengths of args. - template && ...)) && - ((num_elements() + ...)) == NumElements>> + template < + typename... argTN, + typename = std::enable_if_t< + (NumElements > 1 && ((AllowArgTypeInVariadicCtor && ...)) && + ((num_elements() + ...)) == NumElements)>> constexpr vec(const argTN &...args) : vec{VecArgArrayCreator::Create(args...), std::make_index_sequence()} {} @@ -284,46 +1572,26 @@ class __SYCL_EBO vec /****************** Assignment Operators **************/ constexpr vec &operator=(const vec &Rhs) = default; - // Template required to prevent ambiguous overload with the copy assignment - // when NumElements == 1. The template prevents implicit conversion from - // vec<_, 1> to DataT. - template - typename std::enable_if_t< - std::is_fundamental_v || - detail::is_half_or_bf16_v>, - vec &> - operator=(const DataT &Rhs) { - *this = vec{Rhs}; - return *this; - } - - // W/o this, things like "vec = vec" doesn't work. - template - typename std::enable_if_t< - !std::is_same_v && std::is_convertible_v, vec &> - operator=(const vec &Rhs) { - *this = Rhs.template as(); + template >> + vec &operator=(const T &Rhs) { + *this = vec{static_cast(Rhs)}; return *this; } #ifdef __SYCL_DEVICE_ONLY__ - // Make it a template to avoid ambiguity with `vec(const DataT &)` when - // `vector_t` is the same as `DataT`. Not that the other ctor isn't a template - // so we don't even need a smart `enable_if` condition here, the mere fact of - // this being a template makes the other ctor preferred. - template < - typename vector_t_ = vector_t, - typename = typename std::enable_if_t>> - constexpr vec(vector_t_ openclVector) { - m_Data = sycl::bit_cast(openclVector); - } - - /* @SYCL2020 - * Available only when: compiled for the device. - * Converts this SYCL vec instance to the underlying backend-native vector - * type defined by vector_t. - */ - operator vector_t() const { return sycl::bit_cast(m_Data); } + public: + template >> + // TODO: current draft would use non-template `vector_t` as an operand, + // causing sycl::vec{1} to go through different paths on + // host/device, open question in the specification. + explicit vec(vector_t openclVector) + // FIXME: Doesn't work when instantiated for 3-elements vectors, + // indetermined padding can't be used to initialize constexpr std::array + // storage. + : vec(bit_cast(openclVector), + std::make_index_sequence()) {} #endif // __SYCL_DEVICE_ONLY__ __SYCL2020_DEPRECATED("get_count() is deprecated, please use size() instead") @@ -334,27 +1602,6 @@ class __SYCL_EBO vec static constexpr size_t get_size() { return byte_size(); } static constexpr size_t byte_size() noexcept { return sizeof(m_Data); } -private: - // getValue should be able to operate on different underlying - // types: enum cl_float#N , builtin vector float#N, builtin type float. - constexpr auto getValue(int Index) const { - using RetType = - typename std::conditional_t, int8_t, -#ifdef __SYCL_DEVICE_ONLY__ - element_type_for_vector_t -#else - DataT -#endif - >; - -#ifdef __SYCL_DEVICE_ONLY__ - if constexpr (std::is_same_v) - return sycl::bit_cast(m_Data[Index]); - else -#endif - return static_cast(m_Data[Index]); - } - public: // Out-of-class definition is in `sycl/detail/vector_convert.hpp` template asT as() const { return sycl::bit_cast(*this); } - template Swizzle swizzle() { - return this; +private: + static constexpr bool one_elem_swizzle_return_scalar = false; + +public: + template + std::conditional_t> + swizzle() { + if constexpr (sizeof...(SwizzleIndexes) == 1 && + one_elem_swizzle_return_scalar) + return this->operator[](SwizzleIndexes...); + else + return Swizzle{*this}; } template - ConstSwizzle swizzle() const { - return this; + std::conditional_t> + swizzle() const { + if constexpr (sizeof...(SwizzleIndexes) == 1 && + one_elem_swizzle_return_scalar) + return this->operator[](SwizzleIndexes...); + else + return ConstSwizzle{*this}; } const DataT &operator[](int i) const { return m_Data[i]; } DataT &operator[](int i) { return m_Data[i]; } - // Begin hi/lo, even/odd, xyzw, and rgba swizzles. @{ -private: - // Indexer used in the swizzles.def - // Currently it is defined as a template struct. Replacing it with a constexpr - // function would activate a bug in MSVC that is fixed only in v19.20. - // Until then MSVC does not recognize such constexpr functions as const and - // thus does not let using them in template parameters inside swizzle.def. - template struct Indexer { - static constexpr int value = Index; - }; - -public: -#ifdef __SYCL_ACCESS_RETURN -#error "Undefine __SYCL_ACCESS_RETURN macro" -#endif -#define __SYCL_ACCESS_RETURN this -#include "swizzles.def" -#undef __SYCL_ACCESS_RETURN - // }@ End of hi/lo, even/odd, xyzw, and rgba swizzles. - template void load(size_t Offset, multi_ptr Ptr) { for (int I = 0; I < NumElements; I++) { @@ -457,16 +1703,12 @@ class __SYCL_EBO vec // the element type in bytes multiplied by the number of elements." static constexpr int alignment = (std::min)((size_t)64, sizeof(DataType)); alignas(alignment) DataType m_Data; - // friends - template class T4, - int... T5> - friend class detail::SwizzleOp; template friend class __SYCL_EBO vec; - // To allow arithmetic operators access private members of vec. - template friend class detail::vec_arith; - template friend class detail::vec_arith_common; }; + +static_assert(sizeof(vec) == 2 * sizeof(int), + "Empty Bases Optimization didn't work!"); ///////////////////////// class sycl::vec ///////////////////////// #ifdef __cpp_deduction_guides @@ -476,845 +1718,5 @@ template vec; #endif -namespace detail { - -// Special type for working SwizzleOp with scalars, stores a scalar and gives -// the scalar at any index. Provides interface is compatible with SwizzleOp -// operations -template class GetScalarOp { -public: - using DataT = T; - GetScalarOp(DataT Data) : m_Data(Data) {} - DataT getValue(size_t) const { return m_Data; } - -private: - DataT m_Data; -}; -template -using rel_t = detail::select_cl_scalar_integral_signed_t; - -template struct EqualTo { - constexpr rel_t operator()(const T &Lhs, const T &Rhs) const { - return (Lhs == Rhs) ? -1 : 0; - } -}; - -template struct NotEqualTo { - constexpr rel_t operator()(const T &Lhs, const T &Rhs) const { - return (Lhs != Rhs) ? -1 : 0; - } -}; - -template struct GreaterEqualTo { - constexpr rel_t operator()(const T &Lhs, const T &Rhs) const { - return (Lhs >= Rhs) ? -1 : 0; - } -}; - -template struct LessEqualTo { - constexpr rel_t operator()(const T &Lhs, const T &Rhs) const { - return (Lhs <= Rhs) ? -1 : 0; - } -}; - -template struct GreaterThan { - constexpr rel_t operator()(const T &Lhs, const T &Rhs) const { - return (Lhs > Rhs) ? -1 : 0; - } -}; - -template struct LessThan { - constexpr rel_t operator()(const T &Lhs, const T &Rhs) const { - return (Lhs < Rhs) ? -1 : 0; - } -}; - -template struct LogicalAnd { - constexpr rel_t operator()(const T &Lhs, const T &Rhs) const { - return (Lhs && Rhs) ? -1 : 0; - } -}; - -template struct LogicalOr { - constexpr rel_t operator()(const T &Lhs, const T &Rhs) const { - return (Lhs || Rhs) ? -1 : 0; - } -}; - -template struct RShift { - constexpr T operator()(const T &Lhs, const T &Rhs) const { - return Lhs >> Rhs; - } -}; - -template struct LShift { - constexpr T operator()(const T &Lhs, const T &Rhs) const { - return Lhs << Rhs; - } -}; - -///////////////////////// class SwizzleOp ///////////////////////// -// SwizzleOP represents expression templates that operate on vec. -// Actual computation performed on conversion or assignment operators. -template class OperationCurrentT, int... Indexes> -class SwizzleOp { - using DataT = typename VecT::element_type; - // Certain operators return a vector with a different element type. Also, the - // left and right operand types may differ. CommonDataT selects a result type - // based on these types to ensure that the result value can be represented. - // - // Example 1: - // sycl::vec vec{...}; - // auto result = 300u + vec.x(); - // - // CommonDataT is std::common_type_t since - // it's larger than unsigned char. - // - // Example 2: - // sycl::vec vec{...}; - // auto result = vec.template swizzle() && vec; - // - // CommonDataT is DataT since operator&& returns a vector with element type - // int8_t, which is larger than bool. - // - // Example 3: - // sycl::vec vec{...}; auto swlo = vec.lo(); - // auto result = swlo == swlo; - // - // CommonDataT is DataT since operator== returns a vector with element type - // int8_t, which is the same size as std::byte. std::common_type_t - // can't be used here since there's no type that int8_t and std::byte can both - // be implicitly converted to. - using OpLeftDataT = typename OperationLeftT::DataT; - using OpRightDataT = typename OperationRightT::DataT; - using CommonDataT = std::conditional_t< - sizeof(DataT) >= sizeof(std::common_type_t), - DataT, std::common_type_t>; - static constexpr int getNumElements() { return sizeof...(Indexes); } - - using rel_t = detail::rel_t; - using vec_t = vec; - using vec_rel_t = vec; - - template class OperationCurrentT_, int... Idx_> - using NewLHOp = SwizzleOp, - OperationRightT_, OperationCurrentT_, Idx_...>; - - template class OperationCurrentT_, int... Idx_> - using NewRelOp = SwizzleOp, - SwizzleOp, - OperationRightT_, OperationCurrentT_, Idx_...>; - - template class OperationCurrentT_, int... Idx_> - using NewRHOp = SwizzleOp, - OperationCurrentT_, Idx_...>; - - template - using EnableIfOneIndex = typename std::enable_if_t< - 1 == IdxNum && SwizzleOp::getNumElements() == IdxNum, T>; - - template - using EnableIfMultipleIndexes = typename std::enable_if_t< - 1 != IdxNum && SwizzleOp::getNumElements() == IdxNum, T>; - - template - using EnableIfScalarType = typename std::enable_if_t< - std::is_convertible_v && - (std::is_fundamental_v || - detail::is_half_or_bf16_v>)>; - - template - using EnableIfNoScalarType = typename std::enable_if_t< - !std::is_convertible_v || - !(std::is_fundamental_v || - detail::is_half_or_bf16_v>)>; - - template - using Swizzle = - SwizzleOp, GetOp, GetOp, Indices...>; - - template - using ConstSwizzle = - SwizzleOp, GetOp, GetOp, Indices...>; - -public: - using element_type = DataT; - using value_type = DataT; - -#ifdef __SYCL_DEVICE_ONLY__ - using vector_t = typename vec_t::vector_t; -#endif // __SYCL_DEVICE_ONLY__ - - const DataT &operator[](int i) const { - std::array Idxs{Indexes...}; - return (*m_Vector)[Idxs[i]]; - } - - template - std::enable_if_t, DataT> &operator[](int i) { - std::array Idxs{Indexes...}; - return (*m_Vector)[Idxs[i]]; - } - - __SYCL2020_DEPRECATED("get_count() is deprecated, please use size() instead") - size_t get_count() const { return size(); } - static constexpr size_t size() noexcept { return getNumElements(); } - - template - __SYCL2020_DEPRECATED( - "get_size() is deprecated, please use byte_size() instead") - size_t get_size() const { - return byte_size(); - } - - template size_t byte_size() const noexcept { - return sizeof(DataT) * (Num == 3 ? 4 : Num); - } - - template , - typename = EnableIfScalarType> - operator T() const { - return getValue(0); - } - - template > - friend NewRHOp, std::multiplies, Indexes...> - operator*(const T &Lhs, const SwizzleOp &Rhs) { - return NewRHOp, std::multiplies, Indexes...>( - Rhs.m_Vector, GetScalarOp(Lhs), Rhs); - } - - template > - friend NewRHOp, std::plus, Indexes...> - operator+(const T &Lhs, const SwizzleOp &Rhs) { - return NewRHOp, std::plus, Indexes...>( - Rhs.m_Vector, GetScalarOp(Lhs), Rhs); - } - - template > - friend NewRHOp, std::divides, Indexes...> - operator/(const T &Lhs, const SwizzleOp &Rhs) { - return NewRHOp, std::divides, Indexes...>( - Rhs.m_Vector, GetScalarOp(Lhs), Rhs); - } - - // TODO: Check that Rhs arg is suitable. -#ifdef __SYCL_OPASSIGN -#error "Undefine __SYCL_OPASSIGN macro." -#endif -#define __SYCL_OPASSIGN(OPASSIGN, OP) \ - friend const SwizzleOp &operator OPASSIGN(const SwizzleOp & Lhs, \ - const DataT & Rhs) { \ - Lhs.operatorHelper(vec_t(Rhs)); \ - return Lhs; \ - } \ - template \ - friend const SwizzleOp &operator OPASSIGN(const SwizzleOp & Lhs, \ - const RhsOperation & Rhs) { \ - Lhs.operatorHelper(Rhs); \ - return Lhs; \ - } \ - friend const SwizzleOp &operator OPASSIGN(const SwizzleOp & Lhs, \ - const vec_t & Rhs) { \ - Lhs.operatorHelper(Rhs); \ - return Lhs; \ - } - - __SYCL_OPASSIGN(+=, std::plus) - __SYCL_OPASSIGN(-=, std::minus) - __SYCL_OPASSIGN(*=, std::multiplies) - __SYCL_OPASSIGN(/=, std::divides) - __SYCL_OPASSIGN(%=, std::modulus) - __SYCL_OPASSIGN(&=, std::bit_and) - __SYCL_OPASSIGN(|=, std::bit_or) - __SYCL_OPASSIGN(^=, std::bit_xor) - __SYCL_OPASSIGN(>>=, RShift) - __SYCL_OPASSIGN(<<=, LShift) -#undef __SYCL_OPASSIGN - -#ifdef __SYCL_UOP -#error "Undefine __SYCL_UOP macro" -#endif -#define __SYCL_UOP(UOP, OPASSIGN) \ - friend const SwizzleOp &operator UOP(const SwizzleOp & sv) { \ - sv OPASSIGN static_cast(1); \ - return sv; \ - } \ - friend vec_t operator UOP(const SwizzleOp &sv, int) { \ - vec_t Ret = sv; \ - sv OPASSIGN static_cast(1); \ - return Ret; \ - } - - __SYCL_UOP(++, +=) - __SYCL_UOP(--, -=) -#undef __SYCL_UOP - - template - friend typename std::enable_if_t< - std::is_same_v && !detail::is_vgenfloat_v, vec_t> - operator~(const SwizzleOp &Rhs) { - vec_t Tmp = Rhs; - return ~Tmp; - } - - friend vec_rel_t operator!(const SwizzleOp &Rhs) { - vec_t Tmp = Rhs; - return !Tmp; - } - - friend vec_t operator+(const SwizzleOp &Rhs) { - vec_t Tmp = Rhs; - return +Tmp; - } - - friend vec_t operator-(const SwizzleOp &Rhs) { - vec_t Tmp = Rhs; - return -Tmp; - } - -// scalar BINOP vec<> -// scalar BINOP SwizzleOp -// vec<> BINOP SwizzleOp -#ifdef __SYCL_BINOP -#error "Undefine __SYCL_BINOP macro" -#endif -#define __SYCL_BINOP(BINOP, COND) \ - template \ - friend std::enable_if_t<(COND), vec_t> operator BINOP( \ - const DataT & Lhs, const SwizzleOp & Rhs) { \ - vec_t Tmp = Rhs; \ - return Lhs BINOP Tmp; \ - } \ - template \ - friend std::enable_if_t<(COND), vec_t> operator BINOP(const SwizzleOp & Lhs, \ - const DataT & Rhs) { \ - vec_t Tmp = Lhs; \ - return Tmp BINOP Rhs; \ - } \ - template \ - friend std::enable_if_t<(COND), vec_t> operator BINOP( \ - const vec_t & Lhs, const SwizzleOp & Rhs) { \ - vec_t Tmp = Rhs; \ - return Lhs BINOP Tmp; \ - } \ - template \ - friend std::enable_if_t<(COND), vec_t> operator BINOP(const SwizzleOp & Lhs, \ - const vec_t & Rhs) { \ - vec_t Tmp = Lhs; \ - return Tmp BINOP Rhs; \ - } - - __SYCL_BINOP(+, (!detail::is_byte_v)) - __SYCL_BINOP(-, (!detail::is_byte_v)) - __SYCL_BINOP(*, (!detail::is_byte_v)) - __SYCL_BINOP(/, (!detail::is_byte_v)) - __SYCL_BINOP(%, (!detail::is_byte_v)) - __SYCL_BINOP(&, true) - __SYCL_BINOP(|, true) - __SYCL_BINOP(^, true) - // We have special <<, >> operators for std::byte. - __SYCL_BINOP(>>, (!detail::is_byte_v)) - __SYCL_BINOP(<<, (!detail::is_byte_v)) - - template - friend std::enable_if_t, vec_t> - operator>>(const SwizzleOp &Lhs, const int shift) { - vec_t Tmp = Lhs; - return Tmp >> shift; - } - - template - friend std::enable_if_t, vec_t> - operator<<(const SwizzleOp &Lhs, const int shift) { - vec_t Tmp = Lhs; - return Tmp << shift; - } -#undef __SYCL_BINOP - -// scalar RELLOGOP vec<> -// scalar RELLOGOP SwizzleOp -// vec<> RELLOGOP SwizzleOp -#ifdef __SYCL_RELLOGOP -#error "Undefine __SYCL_RELLOGOP macro" -#endif -#define __SYCL_RELLOGOP(RELLOGOP, COND) \ - template \ - friend std::enable_if_t<(COND), vec_rel_t> operator RELLOGOP( \ - const DataT & Lhs, const SwizzleOp & Rhs) { \ - vec_t Tmp = Rhs; \ - return Lhs RELLOGOP Tmp; \ - } \ - template \ - friend std::enable_if_t<(COND), vec_rel_t> operator RELLOGOP( \ - const SwizzleOp & Lhs, const DataT & Rhs) { \ - vec_t Tmp = Lhs; \ - return Tmp RELLOGOP Rhs; \ - } \ - template \ - friend std::enable_if_t<(COND), vec_rel_t> operator RELLOGOP( \ - const vec_t & Lhs, const SwizzleOp & Rhs) { \ - vec_t Tmp = Rhs; \ - return Lhs RELLOGOP Tmp; \ - } \ - template \ - friend std::enable_if_t<(COND), vec_rel_t> operator RELLOGOP( \ - const SwizzleOp & Lhs, const vec_t & Rhs) { \ - vec_t Tmp = Lhs; \ - return Tmp RELLOGOP Rhs; \ - } - - __SYCL_RELLOGOP(==, (!detail::is_byte_v)) - __SYCL_RELLOGOP(!=, (!detail::is_byte_v)) - __SYCL_RELLOGOP(>, (!detail::is_byte_v)) - __SYCL_RELLOGOP(<, (!detail::is_byte_v)) - __SYCL_RELLOGOP(>=, (!detail::is_byte_v)) - __SYCL_RELLOGOP(<=, (!detail::is_byte_v)) - __SYCL_RELLOGOP(&&, (!detail::is_byte_v && !detail::is_vgenfloat_v)) - __SYCL_RELLOGOP(||, (!detail::is_byte_v && !detail::is_vgenfloat_v)) -#undef __SYCL_RELLOGOP - - template > - SwizzleOp &operator=(const vec &Rhs) { - std::array Idxs{Indexes...}; - for (size_t I = 0; I < Idxs.size(); ++I) { - (*m_Vector)[Idxs[I]] = Rhs[I]; - } - return *this; - } - - template > - SwizzleOp &operator=(const DataT &Rhs) { - std::array Idxs{Indexes...}; - (*m_Vector)[Idxs[0]] = Rhs; - return *this; - } - - template = true> - SwizzleOp &operator=(const DataT &Rhs) { - std::array Idxs{Indexes...}; - for (auto Idx : Idxs) { - (*m_Vector)[Idx] = Rhs; - } - return *this; - } - - template > - SwizzleOp &operator=(DataT &&Rhs) { - std::array Idxs{Indexes...}; - (*m_Vector)[Idxs[0]] = Rhs; - return *this; - } - - template > - NewLHOp, std::multiplies, Indexes...> - operator*(const T &Rhs) const { - return NewLHOp, std::multiplies, Indexes...>( - m_Vector, *this, GetScalarOp(Rhs)); - } - - template > - NewLHOp - operator*(const RhsOperation &Rhs) const { - return NewLHOp(m_Vector, *this, - Rhs); - } - - template > - NewLHOp, std::plus, Indexes...> operator+(const T &Rhs) const { - return NewLHOp, std::plus, Indexes...>(m_Vector, *this, - GetScalarOp(Rhs)); - } - - template > - NewLHOp - operator+(const RhsOperation &Rhs) const { - return NewLHOp(m_Vector, *this, Rhs); - } - - template > - NewLHOp, std::minus, Indexes...> - operator-(const T &Rhs) const { - return NewLHOp, std::minus, Indexes...>(m_Vector, *this, - GetScalarOp(Rhs)); - } - - template > - NewLHOp - operator-(const RhsOperation &Rhs) const { - return NewLHOp(m_Vector, *this, Rhs); - } - - template > - NewLHOp, std::divides, Indexes...> - operator/(const T &Rhs) const { - return NewLHOp, std::divides, Indexes...>( - m_Vector, *this, GetScalarOp(Rhs)); - } - - template > - NewLHOp - operator/(const RhsOperation &Rhs) const { - return NewLHOp(m_Vector, *this, - Rhs); - } - - template > - NewLHOp, std::modulus, Indexes...> - operator%(const T &Rhs) const { - return NewLHOp, std::modulus, Indexes...>( - m_Vector, *this, GetScalarOp(Rhs)); - } - - template > - NewLHOp - operator%(const RhsOperation &Rhs) const { - return NewLHOp(m_Vector, *this, - Rhs); - } - - template > - NewLHOp, std::bit_and, Indexes...> - operator&(const T &Rhs) const { - return NewLHOp, std::bit_and, Indexes...>( - m_Vector, *this, GetScalarOp(Rhs)); - } - - template > - NewLHOp - operator&(const RhsOperation &Rhs) const { - return NewLHOp(m_Vector, *this, - Rhs); - } - - template > - NewLHOp, std::bit_or, Indexes...> - operator|(const T &Rhs) const { - return NewLHOp, std::bit_or, Indexes...>( - m_Vector, *this, GetScalarOp(Rhs)); - } - - template > - NewLHOp - operator|(const RhsOperation &Rhs) const { - return NewLHOp(m_Vector, *this, Rhs); - } - - template > - NewLHOp, std::bit_xor, Indexes...> - operator^(const T &Rhs) const { - return NewLHOp, std::bit_xor, Indexes...>( - m_Vector, *this, GetScalarOp(Rhs)); - } - - template > - NewLHOp - operator^(const RhsOperation &Rhs) const { - return NewLHOp(m_Vector, *this, - Rhs); - } - - template > - NewLHOp, RShift, Indexes...> operator>>(const T &Rhs) const { - return NewLHOp, RShift, Indexes...>(m_Vector, *this, - GetScalarOp(Rhs)); - } - - template > - NewLHOp - operator>>(const RhsOperation &Rhs) const { - return NewLHOp(m_Vector, *this, Rhs); - } - - template > - NewLHOp, LShift, Indexes...> operator<<(const T &Rhs) const { - return NewLHOp, LShift, Indexes...>(m_Vector, *this, - GetScalarOp(Rhs)); - } - - template > - NewLHOp - operator<<(const RhsOperation &Rhs) const { - return NewLHOp(m_Vector, *this, Rhs); - } - - template < - typename T1, typename T2, typename T3, template class T4, - int... T5, - typename = typename std::enable_if_t> - SwizzleOp &operator=(const SwizzleOp &Rhs) { - std::array Idxs{Indexes...}; - for (size_t I = 0; I < Idxs.size(); ++I) { - (*m_Vector)[Idxs[I]] = Rhs.getValue(I); - } - return *this; - } - - template < - typename T1, typename T2, typename T3, template class T4, - int... T5, - typename = typename std::enable_if_t> - SwizzleOp &operator=(SwizzleOp &&Rhs) { - std::array Idxs{Indexes...}; - for (size_t I = 0; I < Idxs.size(); ++I) { - (*m_Vector)[Idxs[I]] = Rhs.getValue(I); - } - return *this; - } - - template > - NewRelOp, EqualTo, Indexes...> operator==(const T &Rhs) const { - return NewRelOp, EqualTo, Indexes...>(NULL, *this, - GetScalarOp(Rhs)); - } - - template > - NewRelOp - operator==(const RhsOperation &Rhs) const { - return NewRelOp(NULL, *this, Rhs); - } - - template > - NewRelOp, NotEqualTo, Indexes...> - operator!=(const T &Rhs) const { - return NewRelOp, NotEqualTo, Indexes...>( - NULL, *this, GetScalarOp(Rhs)); - } - - template > - NewRelOp - operator!=(const RhsOperation &Rhs) const { - return NewRelOp(NULL, *this, Rhs); - } - - template > - NewRelOp, GreaterEqualTo, Indexes...> - operator>=(const T &Rhs) const { - return NewRelOp, GreaterEqualTo, Indexes...>( - NULL, *this, GetScalarOp(Rhs)); - } - - template > - NewRelOp - operator>=(const RhsOperation &Rhs) const { - return NewRelOp(NULL, *this, Rhs); - } - - template > - NewRelOp, LessEqualTo, Indexes...> - operator<=(const T &Rhs) const { - return NewRelOp, LessEqualTo, Indexes...>( - NULL, *this, GetScalarOp(Rhs)); - } - - template > - NewRelOp - operator<=(const RhsOperation &Rhs) const { - return NewRelOp(NULL, *this, Rhs); - } - - template > - NewRelOp, GreaterThan, Indexes...> - operator>(const T &Rhs) const { - return NewRelOp, GreaterThan, Indexes...>( - NULL, *this, GetScalarOp(Rhs)); - } - - template > - NewRelOp - operator>(const RhsOperation &Rhs) const { - return NewRelOp(NULL, *this, Rhs); - } - - template > - NewRelOp, LessThan, Indexes...> operator<(const T &Rhs) const { - return NewRelOp, LessThan, Indexes...>(NULL, *this, - GetScalarOp(Rhs)); - } - - template > - NewRelOp - operator<(const RhsOperation &Rhs) const { - return NewRelOp(NULL, *this, Rhs); - } - - template > - NewRelOp, LogicalAnd, Indexes...> - operator&&(const T &Rhs) const { - return NewRelOp, LogicalAnd, Indexes...>( - NULL, *this, GetScalarOp(Rhs)); - } - - template > - NewRelOp - operator&&(const RhsOperation &Rhs) const { - return NewRelOp(NULL, *this, Rhs); - } - - template > - NewRelOp, LogicalOr, Indexes...> - operator||(const T &Rhs) const { - return NewRelOp, LogicalOr, Indexes...>(NULL, *this, - GetScalarOp(Rhs)); - } - - template > - NewRelOp - operator||(const RhsOperation &Rhs) const { - return NewRelOp(NULL, *this, Rhs); - } - - // Begin hi/lo, even/odd, xyzw, and rgba swizzles. -private: - // Indexer used in the swizzles.def. - // Currently it is defined as a template struct. Replacing it with a constexpr - // function would activate a bug in MSVC that is fixed only in v19.20. - // Until then MSVC does not recognize such constexpr functions as const and - // thus does not let using them in template parameters inside swizzle.def. - template struct Indexer { - static constexpr int IDXs[sizeof...(Indexes)] = {Indexes...}; - static constexpr int value = IDXs[Index >= getNumElements() ? 0 : Index]; - }; - -public: -#ifdef __SYCL_ACCESS_RETURN -#error "Undefine __SYCL_ACCESS_RETURN macro" -#endif -#define __SYCL_ACCESS_RETURN m_Vector -#include "swizzles.def" -#undef __SYCL_ACCESS_RETURN - // End of hi/lo, even/odd, xyzw, and rgba swizzles. - - // Leave store() interface to automatic conversion to vec<>. - // Load to vec_t and then assign to swizzle. - template - void load(size_t offset, multi_ptr ptr) { - vec_t Tmp; - Tmp.load(offset, ptr); - *this = Tmp; - } - - template - vec convert() const { - // First materialize the swizzle to vec_t and then apply convert() to it. - vec_t Tmp; - std::array Idxs{Indexes...}; - for (size_t I = 0; I < Idxs.size(); ++I) { - Tmp[I] = (*m_Vector)[Idxs[I]]; - } - return Tmp.template convert(); - } - - template asT as() const { - // First materialize the swizzle to vec_t and then apply as() to it. - vec_t Tmp = *this; - static_assert((sizeof(Tmp) == sizeof(asT)), - "The new SYCL vec type must have the same storage size in " - "bytes as this SYCL swizzled vec"); - static_assert( - detail::is_contained::value || - detail::is_contained::value, - "asT must be SYCL vec of a different element type and " - "number of elements specified by asT"); - return Tmp.template as(); - } - -private: - SwizzleOp(const SwizzleOp &Rhs) - : m_Vector(Rhs.m_Vector), m_LeftOperation(Rhs.m_LeftOperation), - m_RightOperation(Rhs.m_RightOperation) {} - - SwizzleOp(VecT *Vector, OperationLeftT LeftOperation, - OperationRightT RightOperation) - : m_Vector(Vector), m_LeftOperation(LeftOperation), - m_RightOperation(RightOperation) {} - - SwizzleOp(VecT *Vector) : m_Vector(Vector) {} - - SwizzleOp(SwizzleOp &&Rhs) - : m_Vector(Rhs.m_Vector), m_LeftOperation(std::move(Rhs.m_LeftOperation)), - m_RightOperation(std::move(Rhs.m_RightOperation)) {} - - // Either performing CurrentOperation on results of left and right operands - // or reading values from actual vector. Perform implicit type conversion when - // the number of elements == 1 - - template - CommonDataT getValue(EnableIfOneIndex Index) const { - if (std::is_same_v, GetOp>) { - std::array Idxs{Indexes...}; - return (*m_Vector)[Idxs[Index]]; - } - auto Op = OperationCurrentT(); - return Op(m_LeftOperation.getValue(Index), - m_RightOperation.getValue(Index)); - } - - template - DataT getValue(EnableIfMultipleIndexes Index) const { - if (std::is_same_v, GetOp>) { - std::array Idxs{Indexes...}; - return (*m_Vector)[Idxs[Index]]; - } - auto Op = OperationCurrentT(); - return Op(m_LeftOperation.getValue(Index), - m_RightOperation.getValue(Index)); - } - - template