From 1617c6c7ff109c1570b2f291be4289fcb23ca60e Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Thu, 25 Jul 2024 15:12:10 -0700 Subject: [PATCH 01/78] [SYCL] Refactor vec/swizzle implementations, remove expression trees --- sycl/include/sycl/builtins_utils_vec.hpp | 40 +- .../sycl/detail/named_swizzles_mixin.hpp | 836 ++++++++ sycl/include/sycl/detail/type_traits.hpp | 28 +- sycl/include/sycl/detail/vector_arith.hpp | 394 ---- sycl/include/sycl/stream.hpp | 14 +- sycl/include/sycl/swizzles.def | 823 -------- sycl/include/sycl/vector.hpp | 1680 ++++++++--------- .../vector/vector_bf16_builtins.cpp | 10 +- .../vector/vector_convert_bfloat.cpp | 14 +- .../vector/vector_math_ops.cpp | 588 ++++-- 10 files changed, 2006 insertions(+), 2421 deletions(-) create mode 100644 sycl/include/sycl/detail/named_swizzles_mixin.hpp delete mode 100644 sycl/include/sycl/detail/vector_arith.hpp delete mode 100644 sycl/include/sycl/swizzles.def diff --git a/sycl/include/sycl/builtins_utils_vec.hpp b/sycl/include/sycl/builtins_utils_vec.hpp index eeaff9450b031..facde02be3b3f 100644 --- a/sycl/include/sycl/builtins_utils_vec.hpp +++ b/sycl/include/sycl/builtins_utils_vec.hpp @@ -25,12 +25,8 @@ struct is_valid_elem_type, Ts...> template struct is_valid_elem_type, Ts...> : std::bool_constant> {}; -template class OperationCurrentT, int... Indexes, - typename... Ts> -struct is_valid_elem_type, - Ts...> +template +struct is_valid_elem_type, Ts...> : std::bool_constant> { }; template struct num_elements> : std::integral_constant {}; template struct num_elements> : std::integral_constant {}; -template class OperationCurrentT, int... Indexes> -struct num_elements> +template +struct num_elements> : std::integral_constant {}; // Utilty trait for checking that the number of elements in T is in Ns. @@ -64,10 +58,8 @@ constexpr bool is_valid_size_v = is_valid_size::value; // Utility for converting a swizzle to a vector or preserve the type if it isn't // a swizzle. -template class OperationCurrentT, int... Indexes> -struct simplify_if_swizzle> { +template +struct simplify_if_swizzle> { using type = vec; }; @@ -83,10 +75,8 @@ template struct same_size_signed_int> { template struct same_size_signed_int> { using type = vec::type, N>; }; -template class OperationCurrentT, int... Indexes> -struct same_size_signed_int> { +template +struct same_size_signed_int> { // Converts to vec for simplicity. using type = vec::type, @@ -99,10 +89,8 @@ template struct same_size_unsigned_int> { template struct same_size_unsigned_int> { using type = vec::type, N>; }; -template class OperationCurrentT, int... Indexes> -struct same_size_unsigned_int> { +template +struct same_size_unsigned_int> { // Converts to vec for simplicity. using type = vec::type, @@ -122,12 +110,8 @@ template struct change_elements> { using type = vec::type, N>; }; -template class OperationCurrentT, - int... Indexes> -struct change_elements> { +template +struct change_elements> { // Converts to vec for simplicity. using type = vec::type, diff --git a/sycl/include/sycl/detail/named_swizzles_mixin.hpp b/sycl/include/sycl/detail/named_swizzles_mixin.hpp new file mode 100644 index 0000000000000..75fc13b47ce9c --- /dev/null +++ b/sycl/include/sycl/detail/named_swizzles_mixin.hpp @@ -0,0 +1,836 @@ +//==---------------- named_swizzles_mixin.hpp ------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +// This files implements two mixins +// `NamedSwizzlesMixinConst`/`NamedSwizzlesMixinBoth` that abstract away named +// swizzles implementation for SYCL vector and swizzles classes + +#include + +namespace sycl { +inline namespace _V1 { +namespace detail { + +#ifndef SYCL_SIMPLE_SWIZZLES +#define __SYCL_SWIZZLE_MIXIN_SIMPLE_SWIZZLES +#else +// TODO: It might be beneficial to use partial specializations for different Ns, +// instead of making all the named swizzles templates with SFINAE conditions. +#define __SYCL_SWIZZLE_MIXIN_SIMPLE_SWIZZLES \ + /* __swizzled_vec__ XYZW_SWIZZLE() const; */ \ + __SYCL_SWIZZLE_MIXIN_METHOD(N <= 4, xx, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, xy, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xz, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xw, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, yx, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, yy, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yz, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yw, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zx, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zy, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zz, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zw, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wx, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wy, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wz, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ww, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N <= 4, xxx, 0, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, xxy, 0, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xxz, 0, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xxw, 0, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, xyx, 0, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, xyy, 0, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xyz, 0, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xyw, 0, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xzx, 0, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xzy, 0, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xzz, 0, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xzw, 0, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwx, 0, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwy, 0, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwz, 0, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xww, 0, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, yxx, 1, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, yxy, 1, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yxz, 1, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yxw, 1, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, yyx, 1, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, yyy, 1, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yyz, 1, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yyw, 1, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yzx, 1, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yzy, 1, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yzz, 1, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yzw, 1, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywx, 1, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywy, 1, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywz, 1, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yww, 1, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zxx, 2, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zxy, 2, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zxz, 2, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zxw, 2, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zyx, 2, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zyy, 2, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zyz, 2, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zyw, 2, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zzx, 2, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zzy, 2, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zzz, 2, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zzw, 2, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwx, 2, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwy, 2, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwz, 2, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zww, 2, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxx, 3, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxy, 3, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxz, 3, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxw, 3, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyx, 3, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyy, 3, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyz, 3, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyw, 3, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzx, 3, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzy, 3, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzz, 3, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzw, 3, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwx, 3, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwy, 3, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwz, 3, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, www, 3, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N <= 4, xxxx, 0, 0, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, xxxy, 0, 0, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xxxz, 0, 0, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xxxw, 0, 0, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, xxyx, 0, 0, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, xxyy, 0, 0, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xxyz, 0, 0, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xxyw, 0, 0, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xxzx, 0, 0, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xxzy, 0, 0, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xxzz, 0, 0, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xxzw, 0, 0, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xxwx, 0, 0, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xxwy, 0, 0, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xxwz, 0, 0, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xxww, 0, 0, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, xyxx, 0, 1, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, xyxy, 0, 1, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xyxz, 0, 1, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xyxw, 0, 1, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, xyyx, 0, 1, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, xyyy, 0, 1, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xyyz, 0, 1, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xyyw, 0, 1, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xyzx, 0, 1, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xyzy, 0, 1, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xyzz, 0, 1, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xyzw, 0, 1, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xywx, 0, 1, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xywy, 0, 1, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xywz, 0, 1, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xyww, 0, 1, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xzxx, 0, 2, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xzxy, 0, 2, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xzxz, 0, 2, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xzxw, 0, 2, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xzyx, 0, 2, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xzyy, 0, 2, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xzyz, 0, 2, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xzyw, 0, 2, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xzzx, 0, 2, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xzzy, 0, 2, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, xzzz, 0, 2, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xzzw, 0, 2, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xzwx, 0, 2, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xzwy, 0, 2, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xzwz, 0, 2, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xzww, 0, 2, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwxx, 0, 3, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwxy, 0, 3, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwxz, 0, 3, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwxw, 0, 3, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwyx, 0, 3, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwyy, 0, 3, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwyz, 0, 3, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwyw, 0, 3, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwzx, 0, 3, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwzy, 0, 3, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwzz, 0, 3, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwzw, 0, 3, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwwx, 0, 3, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwwy, 0, 3, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwwz, 0, 3, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, xwww, 0, 3, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, yxxx, 1, 0, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, yxxy, 1, 0, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yxxz, 1, 0, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yxxw, 1, 0, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, yxyx, 1, 0, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, yxyy, 1, 0, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yxyz, 1, 0, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yxyw, 1, 0, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yxzx, 1, 0, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yxzy, 1, 0, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yxzz, 1, 0, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yxzw, 1, 0, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yxwx, 1, 0, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yxwy, 1, 0, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yxwz, 1, 0, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yxww, 1, 0, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, yyxx, 1, 1, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, yyxy, 1, 1, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yyxz, 1, 1, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yyxw, 1, 1, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, yyyx, 1, 1, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(2 <= N && N <= 4, yyyy, 1, 1, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yyyz, 1, 1, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yyyw, 1, 1, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yyzx, 1, 1, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yyzy, 1, 1, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yyzz, 1, 1, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yyzw, 1, 1, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yywx, 1, 1, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yywy, 1, 1, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yywz, 1, 1, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yyww, 1, 1, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yzxx, 1, 2, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yzxy, 1, 2, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yzxz, 1, 2, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yzxw, 1, 2, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yzyx, 1, 2, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yzyy, 1, 2, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yzyz, 1, 2, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yzyw, 1, 2, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yzzx, 1, 2, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yzzy, 1, 2, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, yzzz, 1, 2, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yzzw, 1, 2, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yzwx, 1, 2, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yzwy, 1, 2, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yzwz, 1, 2, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, yzww, 1, 2, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywxx, 1, 3, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywxy, 1, 3, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywxz, 1, 3, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywxw, 1, 3, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywyx, 1, 3, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywyy, 1, 3, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywyz, 1, 3, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywyw, 1, 3, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywzx, 1, 3, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywzy, 1, 3, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywzz, 1, 3, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywzw, 1, 3, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywwx, 1, 3, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywwy, 1, 3, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywwz, 1, 3, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ywww, 1, 3, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zxxx, 2, 0, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zxxy, 2, 0, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zxxz, 2, 0, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zxxw, 2, 0, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zxyx, 2, 0, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zxyy, 2, 0, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zxyz, 2, 0, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zxyw, 2, 0, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zxzx, 2, 0, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zxzy, 2, 0, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zxzz, 2, 0, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zxzw, 2, 0, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zxwx, 2, 0, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zxwy, 2, 0, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zxwz, 2, 0, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zxww, 2, 0, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zyxx, 2, 1, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zyxy, 2, 1, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zyxz, 2, 1, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zyxw, 2, 1, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zyyx, 2, 1, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zyyy, 2, 1, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zyyz, 2, 1, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zyyw, 2, 1, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zyzx, 2, 1, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zyzy, 2, 1, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zyzz, 2, 1, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zyzw, 2, 1, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zywx, 2, 1, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zywy, 2, 1, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zywz, 2, 1, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zyww, 2, 1, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zzxx, 2, 2, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zzxy, 2, 2, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zzxz, 2, 2, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zzxw, 2, 2, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zzyx, 2, 2, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zzyy, 2, 2, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zzyz, 2, 2, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zzyw, 2, 2, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zzzx, 2, 2, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zzzy, 2, 2, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3 || N == 4, zzzz, 2, 2, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zzzw, 2, 2, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zzwx, 2, 2, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zzwy, 2, 2, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zzwz, 2, 2, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zzww, 2, 2, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwxx, 2, 3, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwxy, 2, 3, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwxz, 2, 3, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwxw, 2, 3, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwyx, 2, 3, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwyy, 2, 3, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwyz, 2, 3, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwyw, 2, 3, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwzx, 2, 3, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwzy, 2, 3, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwzz, 2, 3, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwzw, 2, 3, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwwx, 2, 3, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwwy, 2, 3, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwwz, 2, 3, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, zwww, 2, 3, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxxx, 3, 0, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxxy, 3, 0, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxxz, 3, 0, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxxw, 3, 0, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxyx, 3, 0, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxyy, 3, 0, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxyz, 3, 0, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxyw, 3, 0, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxzx, 3, 0, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxzy, 3, 0, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxzz, 3, 0, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxzw, 3, 0, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxwx, 3, 0, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxwy, 3, 0, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxwz, 3, 0, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wxww, 3, 0, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyxx, 3, 1, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyxy, 3, 1, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyxz, 3, 1, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyxw, 3, 1, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyyx, 3, 1, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyyy, 3, 1, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyyz, 3, 1, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyyw, 3, 1, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyzx, 3, 1, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyzy, 3, 1, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyzz, 3, 1, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyzw, 3, 1, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wywx, 3, 1, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wywy, 3, 1, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wywz, 3, 1, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wyww, 3, 1, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzxx, 3, 2, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzxy, 3, 2, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzxz, 3, 2, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzxw, 3, 2, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzyx, 3, 2, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzyy, 3, 2, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzyz, 3, 2, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzyw, 3, 2, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzzx, 3, 2, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzzy, 3, 2, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzzz, 3, 2, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzzw, 3, 2, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzwx, 3, 2, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzwy, 3, 2, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzwz, 3, 2, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wzww, 3, 2, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwxx, 3, 3, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwxy, 3, 3, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwxz, 3, 3, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwxw, 3, 3, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwyx, 3, 3, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwyy, 3, 3, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwyz, 3, 3, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwyw, 3, 3, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwzx, 3, 3, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwzy, 3, 3, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwzz, 3, 3, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwzw, 3, 3, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwwx, 3, 3, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwwy, 3, 3, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwwz, 3, 3, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, wwww, 3, 3, 3, 3) \ + \ + /* __swizzled_vec__ RGBA_SWIZZLE() const; */ \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rr, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rg, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rb, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ra, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gr, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gg, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gb, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ga, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, br, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bg, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bb, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ba, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ar, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ag, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ab, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aa, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrr, 0, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrg, 0, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrb, 0, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rra, 0, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rgr, 0, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rgg, 0, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rgb, 0, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rga, 0, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbr, 0, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbg, 0, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbb, 0, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rba, 0, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rar, 0, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rag, 0, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rab, 0, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, raa, 0, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grr, 1, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grg, 1, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grb, 1, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gra, 1, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ggr, 1, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ggg, 1, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ggb, 1, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gga, 1, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbr, 1, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbg, 1, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbb, 1, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gba, 1, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gar, 1, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gag, 1, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gab, 1, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gaa, 1, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brr, 2, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brg, 2, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brb, 2, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bra, 2, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bgr, 2, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bgg, 2, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bgb, 2, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bga, 2, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbr, 2, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbg, 2, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbb, 2, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bba, 2, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bar, 2, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bag, 2, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bab, 2, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, baa, 2, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, arr, 3, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, arg, 3, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, arb, 3, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ara, 3, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, agr, 3, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, agg, 3, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, agb, 3, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aga, 3, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abr, 3, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abg, 3, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abb, 3, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aba, 3, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aar, 3, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aag, 3, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aab, 3, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aaa, 3, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrrr, 0, 0, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrrg, 0, 0, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrrb, 0, 0, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrra, 0, 0, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrgr, 0, 0, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrgg, 0, 0, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrgb, 0, 0, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrga, 0, 0, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrbr, 0, 0, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrbg, 0, 0, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrbb, 0, 0, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrba, 0, 0, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrar, 0, 0, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrag, 0, 0, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rrab, 0, 0, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rraa, 0, 0, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rgrr, 0, 1, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rgrg, 0, 1, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rgrb, 0, 1, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rgra, 0, 1, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rggr, 0, 1, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rggg, 0, 1, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rggb, 0, 1, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rgga, 0, 1, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rgbr, 0, 1, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rgbg, 0, 1, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rgbb, 0, 1, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rgba, 0, 1, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rgar, 0, 1, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rgag, 0, 1, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rgab, 0, 1, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rgaa, 0, 1, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbrr, 0, 2, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbrg, 0, 2, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbrb, 0, 2, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbra, 0, 2, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbgr, 0, 2, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbgg, 0, 2, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbgb, 0, 2, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbga, 0, 2, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbbr, 0, 2, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbbg, 0, 2, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbbb, 0, 2, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbba, 0, 2, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbar, 0, 2, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbag, 0, 2, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbab, 0, 2, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rbaa, 0, 2, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rarr, 0, 3, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rarg, 0, 3, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rarb, 0, 3, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rara, 0, 3, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ragr, 0, 3, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ragg, 0, 3, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ragb, 0, 3, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, raga, 0, 3, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rabr, 0, 3, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rabg, 0, 3, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, rabb, 0, 3, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, raba, 0, 3, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, raar, 0, 3, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, raag, 0, 3, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, raab, 0, 3, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, raaa, 0, 3, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grrr, 1, 0, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grrg, 1, 0, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grrb, 1, 0, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grra, 1, 0, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grgr, 1, 0, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grgg, 1, 0, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grgb, 1, 0, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grga, 1, 0, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grbr, 1, 0, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grbg, 1, 0, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grbb, 1, 0, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grba, 1, 0, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grar, 1, 0, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grag, 1, 0, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, grab, 1, 0, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, graa, 1, 0, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ggrr, 1, 1, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ggrg, 1, 1, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ggrb, 1, 1, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ggra, 1, 1, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gggr, 1, 1, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gggg, 1, 1, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gggb, 1, 1, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ggga, 1, 1, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ggbr, 1, 1, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ggbg, 1, 1, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ggbb, 1, 1, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ggba, 1, 1, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ggar, 1, 1, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ggag, 1, 1, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ggab, 1, 1, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, ggaa, 1, 1, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbrr, 1, 2, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbrg, 1, 2, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbrb, 1, 2, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbra, 1, 2, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbgr, 1, 2, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbgg, 1, 2, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbgb, 1, 2, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbga, 1, 2, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbbr, 1, 2, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbbg, 1, 2, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbbb, 1, 2, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbba, 1, 2, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbar, 1, 2, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbag, 1, 2, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbab, 1, 2, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gbaa, 1, 2, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, garr, 1, 3, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, garg, 1, 3, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, garb, 1, 3, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gara, 1, 3, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gagr, 1, 3, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gagg, 1, 3, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gagb, 1, 3, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gaga, 1, 3, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gabr, 1, 3, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gabg, 1, 3, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gabb, 1, 3, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gaba, 1, 3, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gaar, 1, 3, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gaag, 1, 3, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gaab, 1, 3, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, gaaa, 1, 3, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brrr, 2, 0, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brrg, 2, 0, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brrb, 2, 0, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brra, 2, 0, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brgr, 2, 0, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brgg, 2, 0, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brgb, 2, 0, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brga, 2, 0, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brbr, 2, 0, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brbg, 2, 0, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brbb, 2, 0, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brba, 2, 0, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brar, 2, 0, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brag, 2, 0, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, brab, 2, 0, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, braa, 2, 0, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bgrr, 2, 1, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bgrg, 2, 1, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bgrb, 2, 1, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bgra, 2, 1, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bggr, 2, 1, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bggg, 2, 1, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bggb, 2, 1, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bgga, 2, 1, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bgbr, 2, 1, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bgbg, 2, 1, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bgbb, 2, 1, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bgba, 2, 1, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bgar, 2, 1, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bgag, 2, 1, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bgab, 2, 1, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bgaa, 2, 1, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbrr, 2, 2, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbrg, 2, 2, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbrb, 2, 2, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbra, 2, 2, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbgr, 2, 2, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbgg, 2, 2, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbgb, 2, 2, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbga, 2, 2, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbbr, 2, 2, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbbg, 2, 2, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbbb, 2, 2, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbba, 2, 2, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbar, 2, 2, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbag, 2, 2, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbab, 2, 2, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bbaa, 2, 2, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, barr, 2, 3, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, barg, 2, 3, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, barb, 2, 3, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bara, 2, 3, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bagr, 2, 3, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bagg, 2, 3, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, bagb, 2, 3, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, baga, 2, 3, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, babr, 2, 3, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, babg, 2, 3, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, babb, 2, 3, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, baba, 2, 3, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, baar, 2, 3, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, baag, 2, 3, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, baab, 2, 3, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, baaa, 2, 3, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, arrr, 3, 0, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, arrg, 3, 0, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, arrb, 3, 0, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, arra, 3, 0, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, argr, 3, 0, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, argg, 3, 0, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, argb, 3, 0, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, arga, 3, 0, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, arbr, 3, 0, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, arbg, 3, 0, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, arbb, 3, 0, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, arba, 3, 0, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, arar, 3, 0, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, arag, 3, 0, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, arab, 3, 0, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, araa, 3, 0, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, agrr, 3, 1, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, agrg, 3, 1, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, agrb, 3, 1, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, agra, 3, 1, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aggr, 3, 1, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aggg, 3, 1, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aggb, 3, 1, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, agga, 3, 1, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, agbr, 3, 1, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, agbg, 3, 1, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, agbb, 3, 1, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, agba, 3, 1, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, agar, 3, 1, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, agag, 3, 1, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, agab, 3, 1, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, agaa, 3, 1, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abrr, 3, 2, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abrg, 3, 2, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abrb, 3, 2, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abra, 3, 2, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abgr, 3, 2, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abgg, 3, 2, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abgb, 3, 2, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abga, 3, 2, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abbr, 3, 2, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abbg, 3, 2, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abbb, 3, 2, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abba, 3, 2, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abar, 3, 2, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abag, 3, 2, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abab, 3, 2, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, abaa, 3, 2, 3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aarr, 3, 3, 0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aarg, 3, 3, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aarb, 3, 3, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aara, 3, 3, 0, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aagr, 3, 3, 1, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aagg, 3, 3, 1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aagb, 3, 3, 1, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aaga, 3, 3, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aabr, 3, 3, 2, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aabg, 3, 3, 2, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aabb, 3, 3, 2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aaba, 3, 3, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aaar, 3, 3, 3, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aaag, 3, 3, 3, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aaab, 3, 3, 3, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, aaaa, 3, 3, 3, 3) +#endif + +// FIXME: One element swizzles must return a swizzle and not a scalar. However, +// old implementation didn't do that and we want to have that fix separately +// from other swizzle changes. To be addressed soon. + +#define __SYCL_SWIZZLE_MIXIN_ALL_SWIZZLES \ + /* __swizzled_vec__ XYZW_ACCESS() const; */ \ + __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR(N <= 4, x, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR(N == 2 || N == 3 || N == 4, y, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR(N == 3 || N == 4, z, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR(N == 4, w, 3) \ + \ + /* __swizzled_vec__ RGBA_ACCESS() const; */ \ + __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR(N == 4, r, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR(N == 4, g, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR(N == 4, b, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR(N == 4, a, 3) \ + \ + /* __swizzled_vec__ INDEX_ACCESS() const; */ \ + __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR(N > 0, s0, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR(N > 1, s1, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR(N > 2, s2, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR(N > 2, s3, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR(N > 4, s4, 4) \ + __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR(N > 4, s5, 5) \ + __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR(N > 4, s6, 6) \ + __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR(N > 4, s7, 7) \ + __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR(N == 16, s8, 8) \ + __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR(N == 16, s9, 9) \ + __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR(N == 16, sA, 10) \ + __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR(N == 16, sB, 11) \ + __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR(N == 16, sC, 12) \ + __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR(N == 16, sD, 13) \ + __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR(N == 16, sE, 14) \ + __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR(N == 16, sF, 15) \ + \ + /* __swizzled_vec__ lo()/hi() const; */ \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 2, lo, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3, lo, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, lo, 0, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 8, lo, 0, 1, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 16, lo, 0, 1, 2, 3, 4, 5, 6, 7) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 2, hi, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3, hi, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, hi, 2, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 8, hi, 4, 5, 6, 7) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 16, hi, 8, 9, 10, 11, 12, 13, 14, 15) \ + /* __swizzled_vec__ odd()/even() const; */ \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 2, odd, 1) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3, odd, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, odd, 1, 3) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 8, odd, 1, 3, 5, 7) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 16, odd, 1, 3, 5, 7, 9, 11, 13, 15) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 2, even, 0) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 3, even, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 4, even, 0, 2) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 8, even, 0, 2, 4, 6) \ + __SYCL_SWIZZLE_MIXIN_METHOD(N == 16, even, 0, 2, 4, 6, 8, 10, 12, 14) \ + /* SYCL_SIMPLE_SWIZZLES */ \ + __SYCL_SWIZZLE_MIXIN_SIMPLE_SWIZZLES + +#define __SYCL_SWIZZLE_MIXIN_METHOD_NON_CONST(COND, NAME, ...) \ + template \ + std::enable_if_t< \ + (COND), decltype(std::declval().template swizzle<__VA_ARGS__>())> \ + NAME() { \ + return static_cast(this)->template swizzle<__VA_ARGS__>(); \ + } + +#define __SYCL_SWIZZLE_MIXIN_METHOD_CONST(COND, NAME, ...) \ + template \ + std::enable_if_t<(COND), decltype(std::declval() \ + .template swizzle<__VA_ARGS__>())> \ + NAME() const { \ + return static_cast(this)->template swizzle<__VA_ARGS__>(); \ + } + +#define __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR_NON_CONST(COND, NAME, ...) \ + template < \ + int N = NumElements, typename Self_ = Self, \ + typename SwizzleResult = \ + decltype(std::declval().template swizzle<__VA_ARGS__>())> \ + std::enable_if_t<(COND), decltype(std::declval()[0])> \ + NAME() { \ + return static_cast(this)->template swizzle<__VA_ARGS__>()[0]; \ + } + +#define __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR_CONST(COND, NAME, ...) \ + template () \ + .template swizzle<__VA_ARGS__>())> \ + std::enable_if_t<(COND), decltype(std::declval()[0])> NAME() \ + const { \ + return static_cast(this) \ + ->template swizzle<__VA_ARGS__>()[0]; \ + } + +template struct NamedSwizzlesMixinConst { +#define __SYCL_SWIZZLE_MIXIN_METHOD(COND, NAME, ...) \ + __SYCL_SWIZZLE_MIXIN_METHOD_CONST(COND, NAME, __VA_ARGS__) +#define __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR(COND, NAME, ...) \ + __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR_CONST(COND, NAME, __VA_ARGS__) + + __SYCL_SWIZZLE_MIXIN_ALL_SWIZZLES + +#undef __SYCL_SWIZZLE_MIXIN_METHOD +#undef __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR +}; + +template struct NamedSwizzlesMixinBoth { +#define __SYCL_SWIZZLE_MIXIN_METHOD(COND, NAME, ...) \ + __SYCL_SWIZZLE_MIXIN_METHOD_NON_CONST(COND, NAME, __VA_ARGS__) \ + __SYCL_SWIZZLE_MIXIN_METHOD_CONST(COND, NAME, __VA_ARGS__) +#define __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR(COND, NAME, ...) \ + __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR_NON_CONST(COND, NAME, __VA_ARGS__) \ + __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR_CONST(COND, NAME, __VA_ARGS__) + + __SYCL_SWIZZLE_MIXIN_ALL_SWIZZLES + +#undef __SYCL_SWIZZLE_MIXIN_METHOD +#undef __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR +}; + +#undef __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR_CONST +#undef __SYCL_SWIZZLE_MIXIN_METHOD_SCALAR_NON_CONST +#undef __SYCL_SWIZZLE_MIXIN_METHOD_CONST +#undef __SYCL_SWIZZLE_MIXIN_METHOD_NON_CONST + +#undef __SYCL_SWIZZLE_MIXIN_SIMPLE_SWIZZLES + +} // namespace detail +} // namespace _V1 +} // namespace sycl diff --git a/sycl/include/sycl/detail/type_traits.hpp b/sycl/include/sycl/detail/type_traits.hpp index 0f1d46ba5f731..34202006b7058 100644 --- a/sycl/include/sycl/detail/type_traits.hpp +++ b/sycl/include/sycl/detail/type_traits.hpp @@ -25,9 +25,7 @@ template struct is_fixed_size_group : std::false_type {}; template inline constexpr bool is_fixed_size_group_v = is_fixed_size_group::value; -template class OperationCurrentT, int... Indexes> -class SwizzleOp; +template class __SYCL_EBO Swizzle; } // namespace detail template class group; @@ -165,10 +163,8 @@ template struct get_elem_type_unqual> { template struct get_elem_type_unqual> { using type = T; }; -template class OperationCurrentT, int... Indexes> -struct get_elem_type_unqual> { +template +struct get_elem_type_unqual> { using type = typename get_elem_type_unqual>::type; }; @@ -249,10 +245,8 @@ template struct make_signed { template struct make_signed> { using type = vec, N>; }; -template class OperationCurrentT, int... Indexes> -struct make_signed> { +template +struct make_signed> { using type = make_signed_t>; }; template struct make_signed> { @@ -270,10 +264,8 @@ template struct make_unsigned { template struct make_unsigned> { using type = vec, N>; }; -template class OperationCurrentT, int... Indexes> -struct make_unsigned> { +template +struct make_unsigned> { using type = make_unsigned_t>; }; template struct make_unsigned> { @@ -300,10 +292,8 @@ template struct get_vec_size> { // is_swizzle template struct is_swizzle : std::false_type {}; -template class OperationCurrentT, int... Indexes> -struct is_swizzle> : std::true_type {}; +template +struct is_swizzle> : std::true_type {}; template constexpr bool is_swizzle_v = is_swizzle::value; diff --git a/sycl/include/sycl/detail/vector_arith.hpp b/sycl/include/sycl/detail/vector_arith.hpp deleted file mode 100644 index bde6ce270afb5..0000000000000 --- a/sycl/include/sycl/detail/vector_arith.hpp +++ /dev/null @@ -1,394 +0,0 @@ -//=== vector_arith.hpp --- Implementation of arithmetic ops on sycl::vec ===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include // for half, cl_char, cl_int -#include // for is_sigeninteger, is_s... -#include // for is_contained -#include // for is_floating_point - -#include // bfloat16 - -#include -#include // for enable_if_t, is_same - -namespace sycl { -inline namespace _V1 { - -template class __SYCL_EBO vec; - -namespace detail { - -template class VecAccess; - -// Macros to populate binary operation on sycl::vec. -#if defined(__SYCL_BINOP) || defined(BINOP_BASE) -#error "Undefine __SYCL_BINOP and BINOP_BASE macro" -#endif - -#ifdef __SYCL_DEVICE_ONLY__ -#define BINOP_BASE(BINOP, OPASSIGN, CONVERT, COND) \ - template \ - friend std::enable_if_t<(COND), vec_t> operator BINOP(const vec_t & Lhs, \ - const vec_t & Rhs) { \ - vec_t Ret; \ - if constexpr (vec_t::IsBfloat16) { \ - for (size_t I = 0; I < NumElements; ++I) { \ - Ret[I] = Lhs[I] BINOP Rhs[I]; \ - } \ - } else { \ - auto ExtVecLhs = sycl::bit_cast(Lhs); \ - auto ExtVecRhs = sycl::bit_cast(Rhs); \ - Ret = vec(ExtVecLhs BINOP ExtVecRhs); \ - if constexpr (std::is_same_v && CONVERT) { \ - vec_arith_common::ConvertToDataT(Ret); \ - } \ - } \ - return Ret; \ - } -#else // __SYCL_DEVICE_ONLY__ - -#define BINOP_BASE(BINOP, OPASSIGN, CONVERT, COND) \ - template \ - friend std::enable_if_t<(COND), vec_t> operator BINOP(const vec_t & Lhs, \ - const vec_t & Rhs) { \ - vec_t Ret{}; \ - for (size_t I = 0; I < NumElements; ++I) { \ - Ret[I] = Lhs[I] BINOP Rhs[I]; \ - } \ - return Ret; \ - } -#endif // __SYCL_DEVICE_ONLY__ - -#define __SYCL_BINOP(BINOP, OPASSIGN, CONVERT, COND) \ - BINOP_BASE(BINOP, OPASSIGN, CONVERT, COND) \ - \ - template \ - friend std::enable_if_t<(COND), vec_t> operator BINOP(const vec_t & Lhs, \ - const DataT & Rhs) { \ - return Lhs BINOP vec_t(Rhs); \ - } \ - template \ - friend std::enable_if_t<(COND), vec_t> operator BINOP(const DataT & Lhs, \ - const vec_t & Rhs) { \ - return vec_t(Lhs) BINOP Rhs; \ - } \ - template \ - friend std::enable_if_t<(COND), vec_t> &operator OPASSIGN( \ - vec_t & Lhs, const vec_t & Rhs) { \ - Lhs = Lhs BINOP Rhs; \ - return Lhs; \ - } \ - template \ - friend std::enable_if_t<(Num != 1) && (COND), vec_t &> operator OPASSIGN( \ - vec_t & Lhs, const DataT & Rhs) { \ - Lhs = Lhs BINOP vec_t(Rhs); \ - return Lhs; \ - } - -/**************************************************************** - * vec_arith_common - * / | \ - * / | \ - * vec_arith vec_arith ... vec_arith - * \ | / - * \ | / - * sycl::vec - * - * vec_arith_common is the base class for vec_arith. It contains - * the common math operators of sycl::vec for all types. - * vec_arith is the derived class that contains the math operators - * specialized for certain types. sycl::vec inherits from vec_arith. - * *************************************************************/ -template class vec_arith_common; -template struct vec_helper; - -template -class vec_arith : public vec_arith_common { -protected: - using vec_t = vec; - using ocl_t = detail::select_cl_scalar_integral_signed_t; - template using vec_data = vec_helper; - - // operator!. - friend vec operator!(const vec_t &Rhs) { -#ifdef __SYCL_DEVICE_ONLY__ - if constexpr (!vec_t::IsBfloat16) { - auto extVec = sycl::bit_cast(Rhs); - vec Ret{ - (typename vec::vector_t) !extVec}; - return Ret; - } else -#endif // __SYCL_DEVICE_ONLY__ - { - vec Ret{}; - for (size_t I = 0; I < NumElements; ++I) { - // static_cast will work here as the output of ! operator is either 0 or - // -1. - Ret[I] = static_cast(-1 * (!Rhs[I])); - } - return Ret; - } - } - - // operator +. - friend vec_t operator+(const vec_t &Lhs) { -#ifdef __SYCL_DEVICE_ONLY__ - auto extVec = sycl::bit_cast(Lhs); - return vec_t{+extVec}; -#else - vec_t Ret{}; - for (size_t I = 0; I < NumElements; ++I) - Ret[I] = +Lhs[I]; - return Ret; -#endif - } - - // operator -. - friend vec_t operator-(const vec_t &Lhs) { - vec_t Ret{}; - if constexpr (vec_t::IsBfloat16) { - for (size_t I = 0; I < NumElements; I++) - Ret[I] = -Lhs[I]; - } else { -#ifndef __SYCL_DEVICE_ONLY__ - for (size_t I = 0; I < NumElements; ++I) - Ret[I] = -Lhs[I]; -#else - auto extVec = sycl::bit_cast(Lhs); - Ret = vec_t{-extVec}; - if constexpr (std::is_same_v) { - vec_arith_common::ConvertToDataT(Ret); - } -#endif - } - return Ret; - } - -// Unary operations on sycl::vec -// FIXME: Don't allow Unary operators on vec after -// https://github.com/KhronosGroup/SYCL-CTS/issues/896 gets fixed. -#ifdef __SYCL_UOP -#error "Undefine __SYCL_UOP macro" -#endif -#define __SYCL_UOP(UOP, OPASSIGN) \ - friend vec_t &operator UOP(vec_t & Rhs) { \ - Rhs OPASSIGN DataT{1}; \ - return Rhs; \ - } \ - friend vec_t operator UOP(vec_t &Lhs, int) { \ - vec_t Ret(Lhs); \ - Lhs OPASSIGN DataT{1}; \ - return Ret; \ - } - - __SYCL_UOP(++, +=) - __SYCL_UOP(--, -=) -#undef __SYCL_UOP - - // The logical operations on scalar types results in 0/1, while for vec<>, - // logical operations should result in 0 and -1 (similar to OpenCL vectors). - // That's why, for vec, we need to invert the result of the logical - // operations since we store vec as scalar type on the device. -#if defined(__SYCL_RELLOGOP) || defined(RELLOGOP_BASE) -#error "Undefine __SYCL_RELLOGOP and RELLOGOP_BASE macro." -#endif - -#ifdef __SYCL_DEVICE_ONLY__ -#define RELLOGOP_BASE(RELLOGOP, COND) \ - template \ - friend std::enable_if_t<(COND), vec> operator RELLOGOP( \ - const vec_t & Lhs, const vec_t & Rhs) { \ - vec Ret{}; \ - /* ext_vector_type does not support bfloat16, so for these */ \ - /* we do element-by-element operation on the underlying std::array. */ \ - if constexpr (vec_t::IsBfloat16) { \ - for (size_t I = 0; I < NumElements; ++I) { \ - Ret[I] = static_cast(-(Lhs[I] RELLOGOP Rhs[I])); \ - } \ - } else { \ - auto ExtVecLhs = sycl::bit_cast(Lhs); \ - auto ExtVecRhs = sycl::bit_cast(Rhs); \ - /* Cast required to convert unsigned char ext_vec_type to */ \ - /* char ext_vec_type. */ \ - Ret = vec( \ - (typename vec::vector_t)( \ - ExtVecLhs RELLOGOP ExtVecRhs)); \ - /* For NumElements == 1, we use scalar instead of ext_vector_type. */ \ - if constexpr (NumElements == 1) { \ - Ret *= -1; \ - } \ - } \ - return Ret; \ - } -#else // __SYCL_DEVICE_ONLY__ -#define RELLOGOP_BASE(RELLOGOP, COND) \ - template \ - friend std::enable_if_t<(COND), vec> operator RELLOGOP( \ - const vec_t & Lhs, const vec_t & Rhs) { \ - vec Ret{}; \ - for (size_t I = 0; I < NumElements; ++I) { \ - Ret[I] = static_cast(-(Lhs[I] RELLOGOP Rhs[I])); \ - } \ - return Ret; \ - } -#endif - -#define __SYCL_RELLOGOP(RELLOGOP, COND) \ - RELLOGOP_BASE(RELLOGOP, COND) \ - \ - template \ - friend std::enable_if_t<(COND), vec> operator RELLOGOP( \ - const vec_t & Lhs, const DataT & Rhs) { \ - return Lhs RELLOGOP vec_t(Rhs); \ - } \ - template \ - friend std::enable_if_t<(COND), vec> operator RELLOGOP( \ - const DataT & Lhs, const vec_t & Rhs) { \ - return vec_t(Lhs) RELLOGOP Rhs; \ - } - - // OP is: ==, !=, <, >, <=, >=, &&, || - // vec operatorOP(const vec &Rhs) const; - // vec operatorOP(const DataT &Rhs) const; - __SYCL_RELLOGOP(==, true) - __SYCL_RELLOGOP(!=, true) - __SYCL_RELLOGOP(>, true) - __SYCL_RELLOGOP(<, true) - __SYCL_RELLOGOP(>=, true) - __SYCL_RELLOGOP(<=, true) - - // Only available to integral types. - __SYCL_RELLOGOP(&&, (!detail::is_vgenfloat_v)) - __SYCL_RELLOGOP(||, (!detail::is_vgenfloat_v)) -#undef __SYCL_RELLOGOP -#undef RELLOGOP_BASE - - // Binary operations on sycl::vec<> for all types except std::byte. - __SYCL_BINOP(+, +=, true, true) - __SYCL_BINOP(-, -=, true, true) - __SYCL_BINOP(*, *=, false, true) - __SYCL_BINOP(/, /=, false, true) - - // The following OPs are available only when: DataT != cl_float && - // DataT != cl_double && DataT != cl_half && DataT != BF16. - __SYCL_BINOP(%, %=, false, (!detail::is_vgenfloat_v)) - // Bitwise operations are allowed for std::byte. - __SYCL_BINOP(|, |=, false, (!detail::is_vgenfloat_v)) - __SYCL_BINOP(&, &=, false, (!detail::is_vgenfloat_v)) - __SYCL_BINOP(^, ^=, false, (!detail::is_vgenfloat_v)) - __SYCL_BINOP(>>, >>=, false, (!detail::is_vgenfloat_v)) - __SYCL_BINOP(<<, <<=, true, (!detail::is_vgenfloat_v)) - - // friends - template friend class __SYCL_EBO vec; -}; // class vec_arith<> - -#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) -template -class vec_arith - : public vec_arith_common { -protected: - // NumElements can never be zero. Still using the redundant check to avoid - // incomplete type errors. - using DataT = typename std::conditional_t; - using vec_t = vec; - template using vec_data = vec_helper; - - // Special <<, >> operators for std::byte. - // std::byte is not an arithmetic type and it only supports the following - // overloads of >> and << operators. - // - // 1 template - // constexpr std::byte operator<<( std::byte b, IntegerType shift ) - // noexcept; - friend vec_t operator<<(const vec_t &Lhs, int shift) { - vec_t Ret; - for (size_t I = 0; I < NumElements; ++I) { - Ret[I] = Lhs[I] << shift; - } - return Ret; - } - friend vec_t &operator<<=(vec_t &Lhs, int shift) { - Lhs = Lhs << shift; - return Lhs; - } - - // 2 template - // constexpr std::byte operator>>( std::byte b, IntegerType shift ) - // noexcept; - friend vec_t operator>>(const vec_t &Lhs, int shift) { - vec_t Ret; - for (size_t I = 0; I < NumElements; ++I) { - Ret[I] = Lhs[I] >> shift; - } - return Ret; - } - friend vec_t &operator>>=(vec_t &Lhs, int shift) { - Lhs = Lhs >> shift; - return Lhs; - } - - __SYCL_BINOP(|, |=, false, true) - __SYCL_BINOP(&, &=, false, true) - __SYCL_BINOP(^, ^=, false, true) - - // friends - template friend class __SYCL_EBO vec; -}; -#endif // (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) - -template class vec_arith_common { -protected: - using vec_t = vec; - - static constexpr bool IsBfloat16 = - std::is_same_v; - - // operator~() available only when: dataT != float && dataT != double - // && dataT != half - template - friend std::enable_if_t, vec_t> - operator~(const vec_t &Rhs) { -#ifdef __SYCL_DEVICE_ONLY__ - auto extVec = sycl::bit_cast(Rhs); - vec_t Ret{~extVec}; - if constexpr (std::is_same_v) { - ConvertToDataT(Ret); - } - return Ret; -#else - vec_t Ret{}; - for (size_t I = 0; I < NumElements; ++I) { - Ret[I] = ~Rhs[I]; - } - return Ret; -#endif - } - -#ifdef __SYCL_DEVICE_ONLY__ - using vec_bool_t = vec; - // Required only for std::bool. - static void ConvertToDataT(vec_bool_t &Ret) { - for (size_t I = 0; I < NumElements; ++I) { - Ret[I] = bit_cast(Ret[I]) != 0; - } - } -#endif - - // friends - template friend class __SYCL_EBO vec; -}; - -#undef __SYCL_BINOP -#undef BINOP_BASE - -} // namespace detail -} // namespace _V1 -} // namespace sycl diff --git a/sycl/include/sycl/stream.hpp b/sycl/include/sycl/stream.hpp index f3c70098a3b18..0f873a641595c 100644 --- a/sycl/include/sycl/stream.hpp +++ b/sycl/include/sycl/stream.hpp @@ -30,7 +30,7 @@ #include // for property_list #include // for range #include // for multi_ptr -#include // for vec, SwizzleOp +#include // for vec, Swizzle #include // for size_t, byte #include // for hash, shared_ptr @@ -748,21 +748,17 @@ inline void writeHItem(GlobalBufAccessorT &GlobalFlushBuf, write(GlobalFlushBuf, FlushBufferSize, WIOffset, Buf, Len); } -template struct IsSwizzleOp : std::false_type {}; +template struct IsSwizzle : std::false_type {}; -template class OperationCurrentT, int... Indexes> -struct IsSwizzleOp> - : std::true_type { +template +struct IsSwizzle> : std::true_type { using T = typename VecT::element_type; using Type = typename sycl::vec; }; template using EnableIfSwizzleVec = - typename std::enable_if_t::value, - typename IsSwizzleOp::Type>; + typename std::enable_if_t::value, typename IsSwizzle::Type>; } // namespace detail diff --git a/sycl/include/sycl/swizzles.def b/sycl/include/sycl/swizzles.def deleted file mode 100644 index fd503e445d7fe..0000000000000 --- a/sycl/include/sycl/swizzles.def +++ /dev/null @@ -1,823 +0,0 @@ -//==---------------- swizzles.def --- SYCL types ---------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// Included by types.hpp twice, once for vec<> and once for SwizzleOp<>. - -// TODO: exclude L-Value swizzle like vec.xxxx() - -#if defined(__SYCL_ACCESS) || defined(__SYCL_INDEXER) || \ - defined(__SYCL_EXPAND) || defined(__SYCL_NTH_ARG) || \ - defined(__SYCL_E0) || defined(__SYCL_E1) || defined(__SYCL_E2) || \ - defined(__SYCL_E3) || defined(__SYCL_E4) || defined(__SYCL_E5) || \ - defined(__SYCL_E6) || defined(__SYCL_E7) || defined(__SYCL_E8) -#error "Undefine __SYCL_{ACCESS, INDEXER, EXPAND, NTH_ARG, E[0-8]} macros." -#endif - -#define __SYCL_INDEXER(_X) Indexer<_X>::value - -// Accepts any number of args >= _N, but expands to just the _N-th one. -// Make N equal to the max number of args handled + 1. Here, N == 9. -#define __SYCL_NTH_ARG(_1, _2, _3, _4, _5, _6, _7, _8, _N, ...) _N - -// Some compilers for such defines as '#define M1(_A, ...) M2(__VA_ARGS__)' and -// expression M1(1, 2, 3, 4) may pass (2, 3, 4) to M2 as one argument, -// not as three. To avoid that the following macro is needed. -// So, defining M1 as '#define M1(_A, ...) __SYCL_EXPAND( M2(__VA_ARGS__) )' -// solves the problem. -#define __SYCL_EXPAND(_X) _X - -// These are helper macros to create overrides based on the arity. -#define __SYCL_E0(_M, ...) -#define __SYCL_E1(_M, _X) _M(_X) -#define __SYCL_E2(_M, _X, ...) _M(_X), __SYCL_EXPAND(__SYCL_E1(_M, __VA_ARGS__)) -#define __SYCL_E3(_M, _X, ...) _M(_X), __SYCL_EXPAND(__SYCL_E2(_M, __VA_ARGS__)) -#define __SYCL_E4(_M, _X, ...) _M(_X), __SYCL_EXPAND(__SYCL_E3(_M, __VA_ARGS__)) -#define __SYCL_E5(_M, _X, ...) _M(_X), __SYCL_EXPAND(__SYCL_E4(_M, __VA_ARGS__)) -#define __SYCL_E6(_M, _X, ...) _M(_X), __SYCL_EXPAND(__SYCL_E5(_M, __VA_ARGS__)) -#define __SYCL_E7(_M, _X, ...) _M(_X), __SYCL_EXPAND(__SYCL_E6(_M, __VA_ARGS__)) -#define __SYCL_E8(_M, _X, ...) _M(_X), __SYCL_EXPAND(__SYCL_E7(_M, __VA_ARGS__)) - -// Creates template functions with required number of parameters. -// Each of the parameters X from __VA_ARGS__ is transformed into -// __SYCL_INDEXER(X). -// Currently the max number of parameters handled by this macro is 8. -#define __SYCL_ACCESS(_COND, _NAME, ...) \ - template \ - typename std::enable_if<(_COND), Swizzle< \ - __SYCL_EXPAND(__SYCL_NTH_ARG(__VA_ARGS__, \ - __SYCL_E8, __SYCL_E7, __SYCL_E6, __SYCL_E5, __SYCL_E4, \ - __SYCL_E3, __SYCL_E2, __SYCL_E1, __SYCL_E0) \ - (__SYCL_INDEXER, __VA_ARGS__))>>::type _NAME() { \ - return __SYCL_ACCESS_RETURN; \ - } \ - template \ - typename std::enable_if<(_COND), ConstSwizzle< \ - __SYCL_EXPAND(__SYCL_NTH_ARG(__VA_ARGS__, \ - __SYCL_E8, __SYCL_E7, __SYCL_E6, __SYCL_E5, __SYCL_E4, \ - __SYCL_E3, __SYCL_E2, __SYCL_E1, __SYCL_E0) \ - (__SYCL_INDEXER, __VA_ARGS__))>>::type _NAME() const { \ - return __SYCL_ACCESS_RETURN; \ - } - -#define __SYCL_SCALAR_ACCESS(_COND, _NAME, _INDEX) \ - template \ - typename std::enable_if<(_COND), DataT &>::type _NAME() { \ - return (*__SYCL_ACCESS_RETURN)[_INDEX]; \ - } \ - template \ - typename std::enable_if<(_COND), const DataT &>::type _NAME() const { \ - return (*__SYCL_ACCESS_RETURN)[_INDEX]; \ - } - -//__swizzled_vec__ XYZW_ACCESS() const; -__SYCL_SCALAR_ACCESS(N <= 4, x, 0) -__SYCL_SCALAR_ACCESS(N == 2 || N == 3 || N == 4, y, 1) -__SYCL_SCALAR_ACCESS(N == 3 || N == 4, z, 2) -__SYCL_SCALAR_ACCESS(N == 4, w, 3) - -//__swizzled_vec__ RGBA_ACCESS() const; -__SYCL_SCALAR_ACCESS(N == 4, r, 0) -__SYCL_SCALAR_ACCESS(N == 4, g, 1) -__SYCL_SCALAR_ACCESS(N == 4, b, 2) -__SYCL_SCALAR_ACCESS(N == 4, a, 3) - -//__swizzled_vec__ INDEX_ACCESS() const; -__SYCL_SCALAR_ACCESS(N > 0, s0, 0) -__SYCL_SCALAR_ACCESS(N > 1, s1, 1) -__SYCL_SCALAR_ACCESS(N > 2, s2, 2) -__SYCL_SCALAR_ACCESS(N > 2, s3, 3) -__SYCL_SCALAR_ACCESS(N > 4, s4, 4) -__SYCL_SCALAR_ACCESS(N > 4, s5, 5) -__SYCL_SCALAR_ACCESS(N > 4, s6, 6) -__SYCL_SCALAR_ACCESS(N > 4, s7, 7) -__SYCL_SCALAR_ACCESS(N == 16, s8, 8) -__SYCL_SCALAR_ACCESS(N == 16, s9, 9) -__SYCL_SCALAR_ACCESS(N == 16, sA, 10) -__SYCL_SCALAR_ACCESS(N == 16, sB, 11) -__SYCL_SCALAR_ACCESS(N == 16, sC, 12) -__SYCL_SCALAR_ACCESS(N == 16, sD, 13) -__SYCL_SCALAR_ACCESS(N == 16, sE, 14) -__SYCL_SCALAR_ACCESS(N == 16, sF, 15) - -#ifdef SYCL_SIMPLE_SWIZZLES -//__swizzled_vec__ XYZW_SWIZZLE() const; -__SYCL_ACCESS(N <= 4, xx, 0, 0) -__SYCL_ACCESS(2 <= N && N <= 4, xy, 0, 1) -__SYCL_ACCESS(N == 3 || N == 4, xz, 0, 2) -__SYCL_ACCESS(N == 4, xw, 0, 3) -__SYCL_ACCESS(2 <= N && N <= 4, yx, 1, 0) -__SYCL_ACCESS(2 <= N && N <= 4, yy, 1, 1) -__SYCL_ACCESS(N == 3 || N == 4, yz, 1, 2) -__SYCL_ACCESS(N == 4, yw, 1, 3) -__SYCL_ACCESS(N == 3 || N == 4, zx, 2, 0) -__SYCL_ACCESS(N == 3 || N == 4, zy, 2, 1) -__SYCL_ACCESS(N == 3 || N == 4, zz, 2, 2) -__SYCL_ACCESS(N == 4, zw, 2, 3) -__SYCL_ACCESS(N == 4, wx, 3, 0) -__SYCL_ACCESS(N == 4, wy, 3, 1) -__SYCL_ACCESS(N == 4, wz, 3, 2) -__SYCL_ACCESS(N == 4, ww, 3, 3) -__SYCL_ACCESS(N <= 4, xxx, 0, 0, 0) -__SYCL_ACCESS(2 <= N && N <= 4, xxy, 0, 0, 1) -__SYCL_ACCESS(N == 3 || N == 4, xxz, 0, 0, 2) -__SYCL_ACCESS(N == 4, xxw, 0, 0, 3) -__SYCL_ACCESS(2 <= N && N <= 4, xyx, 0, 1, 0) -__SYCL_ACCESS(2 <= N && N <= 4, xyy, 0, 1, 1) -__SYCL_ACCESS(N == 3 || N == 4, xyz, 0, 1, 2) -__SYCL_ACCESS(N == 4, xyw, 0, 1, 3) -__SYCL_ACCESS(N == 3 || N == 4, xzx, 0, 2, 0) -__SYCL_ACCESS(N == 3 || N == 4, xzy, 0, 2, 1) -__SYCL_ACCESS(N == 3 || N == 4, xzz, 0, 2, 2) -__SYCL_ACCESS(N == 4, xzw, 0, 2, 3) -__SYCL_ACCESS(N == 4, xwx, 0, 3, 0) -__SYCL_ACCESS(N == 4, xwy, 0, 3, 1) -__SYCL_ACCESS(N == 4, xwz, 0, 3, 2) -__SYCL_ACCESS(N == 4, xww, 0, 3, 3) -__SYCL_ACCESS(2 <= N && N <= 4, yxx, 1, 0, 0) -__SYCL_ACCESS(2 <= N && N <= 4, yxy, 1, 0, 1) -__SYCL_ACCESS(N == 3 || N == 4, yxz, 1, 0, 2) -__SYCL_ACCESS(N == 4, yxw, 1, 0, 3) -__SYCL_ACCESS(2 <= N && N <= 4, yyx, 1, 1, 0) -__SYCL_ACCESS(2 <= N && N <= 4, yyy, 1, 1, 1) -__SYCL_ACCESS(N == 3 || N == 4, yyz, 1, 1, 2) -__SYCL_ACCESS(N == 4, yyw, 1, 1, 3) -__SYCL_ACCESS(N == 3 || N == 4, yzx, 1, 2, 0) -__SYCL_ACCESS(N == 3 || N == 4, yzy, 1, 2, 1) -__SYCL_ACCESS(N == 3 || N == 4, yzz, 1, 2, 2) -__SYCL_ACCESS(N == 4, yzw, 1, 2, 3) -__SYCL_ACCESS(N == 4, ywx, 1, 3, 0) -__SYCL_ACCESS(N == 4, ywy, 1, 3, 1) -__SYCL_ACCESS(N == 4, ywz, 1, 3, 2) -__SYCL_ACCESS(N == 4, yww, 1, 3, 3) -__SYCL_ACCESS(N == 3 || N == 4, zxx, 2, 0, 0) -__SYCL_ACCESS(N == 3 || N == 4, zxy, 2, 0, 1) -__SYCL_ACCESS(N == 3 || N == 4, zxz, 2, 0, 2) -__SYCL_ACCESS(N == 4, zxw, 2, 0, 3) -__SYCL_ACCESS(N == 3 || N == 4, zyx, 2, 1, 0) -__SYCL_ACCESS(N == 3 || N == 4, zyy, 2, 1, 1) -__SYCL_ACCESS(N == 3 || N == 4, zyz, 2, 1, 2) -__SYCL_ACCESS(N == 4, zyw, 2, 1, 3) -__SYCL_ACCESS(N == 3 || N == 4, zzx, 2, 2, 0) -__SYCL_ACCESS(N == 3 || N == 4, zzy, 2, 2, 1) -__SYCL_ACCESS(N == 3 || N == 4, zzz, 2, 2, 2) -__SYCL_ACCESS(N == 4, zzw, 2, 2, 3) -__SYCL_ACCESS(N == 4, zwx, 2, 3, 0) -__SYCL_ACCESS(N == 4, zwy, 2, 3, 1) -__SYCL_ACCESS(N == 4, zwz, 2, 3, 2) -__SYCL_ACCESS(N == 4, zww, 2, 3, 3) -__SYCL_ACCESS(N == 4, wxx, 3, 0, 0) -__SYCL_ACCESS(N == 4, wxy, 3, 0, 1) -__SYCL_ACCESS(N == 4, wxz, 3, 0, 2) -__SYCL_ACCESS(N == 4, wxw, 3, 0, 3) -__SYCL_ACCESS(N == 4, wyx, 3, 1, 0) -__SYCL_ACCESS(N == 4, wyy, 3, 1, 1) -__SYCL_ACCESS(N == 4, wyz, 3, 1, 2) -__SYCL_ACCESS(N == 4, wyw, 3, 1, 3) -__SYCL_ACCESS(N == 4, wzx, 3, 2, 0) -__SYCL_ACCESS(N == 4, wzy, 3, 2, 1) -__SYCL_ACCESS(N == 4, wzz, 3, 2, 2) -__SYCL_ACCESS(N == 4, wzw, 3, 2, 3) -__SYCL_ACCESS(N == 4, wwx, 3, 3, 0) -__SYCL_ACCESS(N == 4, wwy, 3, 3, 1) -__SYCL_ACCESS(N == 4, wwz, 3, 3, 2) -__SYCL_ACCESS(N == 4, www, 3, 3, 3) -__SYCL_ACCESS(N <= 4, xxxx, 0, 0, 0, 0) -__SYCL_ACCESS(2 <= N && N <= 4, xxxy, 0, 0, 0, 1) -__SYCL_ACCESS(N == 3 || N == 4, xxxz, 0, 0, 0, 2) -__SYCL_ACCESS(N == 4, xxxw, 0, 0, 0, 3) -__SYCL_ACCESS(2 <= N && N <= 4, xxyx, 0, 0, 1, 0) -__SYCL_ACCESS(2 <= N && N <= 4, xxyy, 0, 0, 1, 1) -__SYCL_ACCESS(N == 3 || N == 4, xxyz, 0, 0, 1, 2) -__SYCL_ACCESS(N == 4, xxyw, 0, 0, 1, 3) -__SYCL_ACCESS(N == 3 || N == 4, xxzx, 0, 0, 2, 0) -__SYCL_ACCESS(N == 3 || N == 4, xxzy, 0, 0, 2, 1) -__SYCL_ACCESS(N == 3 || N == 4, xxzz, 0, 0, 2, 2) -__SYCL_ACCESS(N == 4, xxzw, 0, 0, 2, 3) -__SYCL_ACCESS(N == 4, xxwx, 0, 0, 3, 0) -__SYCL_ACCESS(N == 4, xxwy, 0, 0, 3, 1) -__SYCL_ACCESS(N == 4, xxwz, 0, 0, 3, 2) -__SYCL_ACCESS(N == 4, xxww, 0, 0, 3, 3) -__SYCL_ACCESS(2 <= N && N <= 4, xyxx, 0, 1, 0, 0) -__SYCL_ACCESS(2 <= N && N <= 4, xyxy, 0, 1, 0, 1) -__SYCL_ACCESS(N == 3 || N == 4, xyxz, 0, 1, 0, 2) -__SYCL_ACCESS(N == 4, xyxw, 0, 1, 0, 3) -__SYCL_ACCESS(2 <= N && N <= 4, xyyx, 0, 1, 1, 0) -__SYCL_ACCESS(2 <= N && N <= 4, xyyy, 0, 1, 1, 1) -__SYCL_ACCESS(N == 3 || N == 4, xyyz, 0, 1, 1, 2) -__SYCL_ACCESS(N == 4, xyyw, 0, 1, 1, 3) -__SYCL_ACCESS(N == 3 || N == 4, xyzx, 0, 1, 2, 0) -__SYCL_ACCESS(N == 3 || N == 4, xyzy, 0, 1, 2, 1) -__SYCL_ACCESS(N == 3 || N == 4, xyzz, 0, 1, 2, 2) -__SYCL_ACCESS(N == 4, xyzw, 0, 1, 2, 3) -__SYCL_ACCESS(N == 4, xywx, 0, 1, 3, 0) -__SYCL_ACCESS(N == 4, xywy, 0, 1, 3, 1) -__SYCL_ACCESS(N == 4, xywz, 0, 1, 3, 2) -__SYCL_ACCESS(N == 4, xyww, 0, 1, 3, 3) -__SYCL_ACCESS(N == 3 || N == 4, xzxx, 0, 2, 0, 0) -__SYCL_ACCESS(N == 3 || N == 4, xzxy, 0, 2, 0, 1) -__SYCL_ACCESS(N == 3 || N == 4, xzxz, 0, 2, 0, 2) -__SYCL_ACCESS(N == 4, xzxw, 0, 2, 0, 3) -__SYCL_ACCESS(N == 3 || N == 4, xzyx, 0, 2, 1, 0) -__SYCL_ACCESS(N == 3 || N == 4, xzyy, 0, 2, 1, 1) -__SYCL_ACCESS(N == 3 || N == 4, xzyz, 0, 2, 1, 2) -__SYCL_ACCESS(N == 4, xzyw, 0, 2, 1, 3) -__SYCL_ACCESS(N == 3 || N == 4, xzzx, 0, 2, 2, 0) -__SYCL_ACCESS(N == 3 || N == 4, xzzy, 0, 2, 2, 1) -__SYCL_ACCESS(N == 3 || N == 4, xzzz, 0, 2, 2, 2) -__SYCL_ACCESS(N == 4, xzzw, 0, 2, 2, 3) -__SYCL_ACCESS(N == 4, xzwx, 0, 2, 3, 0) -__SYCL_ACCESS(N == 4, xzwy, 0, 2, 3, 1) -__SYCL_ACCESS(N == 4, xzwz, 0, 2, 3, 2) -__SYCL_ACCESS(N == 4, xzww, 0, 2, 3, 3) -__SYCL_ACCESS(N == 4, xwxx, 0, 3, 0, 0) -__SYCL_ACCESS(N == 4, xwxy, 0, 3, 0, 1) -__SYCL_ACCESS(N == 4, xwxz, 0, 3, 0, 2) -__SYCL_ACCESS(N == 4, xwxw, 0, 3, 0, 3) -__SYCL_ACCESS(N == 4, xwyx, 0, 3, 1, 0) -__SYCL_ACCESS(N == 4, xwyy, 0, 3, 1, 1) -__SYCL_ACCESS(N == 4, xwyz, 0, 3, 1, 2) -__SYCL_ACCESS(N == 4, xwyw, 0, 3, 1, 3) -__SYCL_ACCESS(N == 4, xwzx, 0, 3, 2, 0) -__SYCL_ACCESS(N == 4, xwzy, 0, 3, 2, 1) -__SYCL_ACCESS(N == 4, xwzz, 0, 3, 2, 2) -__SYCL_ACCESS(N == 4, xwzw, 0, 3, 2, 3) -__SYCL_ACCESS(N == 4, xwwx, 0, 3, 3, 0) -__SYCL_ACCESS(N == 4, xwwy, 0, 3, 3, 1) -__SYCL_ACCESS(N == 4, xwwz, 0, 3, 3, 2) -__SYCL_ACCESS(N == 4, xwww, 0, 3, 3, 3) -__SYCL_ACCESS(2 <= N && N <= 4, yxxx, 1, 0, 0, 0) -__SYCL_ACCESS(2 <= N && N <= 4, yxxy, 1, 0, 0, 1) -__SYCL_ACCESS(N == 3 || N == 4, yxxz, 1, 0, 0, 2) -__SYCL_ACCESS(N == 4, yxxw, 1, 0, 0, 3) -__SYCL_ACCESS(2 <= N && N <= 4, yxyx, 1, 0, 1, 0) -__SYCL_ACCESS(2 <= N && N <= 4, yxyy, 1, 0, 1, 1) -__SYCL_ACCESS(N == 3 || N == 4, yxyz, 1, 0, 1, 2) -__SYCL_ACCESS(N == 4, yxyw, 1, 0, 1, 3) -__SYCL_ACCESS(N == 3 || N == 4, yxzx, 1, 0, 2, 0) -__SYCL_ACCESS(N == 3 || N == 4, yxzy, 1, 0, 2, 1) -__SYCL_ACCESS(N == 3 || N == 4, yxzz, 1, 0, 2, 2) -__SYCL_ACCESS(N == 4, yxzw, 1, 0, 2, 3) -__SYCL_ACCESS(N == 4, yxwx, 1, 0, 3, 0) -__SYCL_ACCESS(N == 4, yxwy, 1, 0, 3, 1) -__SYCL_ACCESS(N == 4, yxwz, 1, 0, 3, 2) -__SYCL_ACCESS(N == 4, yxww, 1, 0, 3, 3) -__SYCL_ACCESS(2 <= N && N <= 4, yyxx, 1, 1, 0, 0) -__SYCL_ACCESS(2 <= N && N <= 4, yyxy, 1, 1, 0, 1) -__SYCL_ACCESS(N == 3 || N == 4, yyxz, 1, 1, 0, 2) -__SYCL_ACCESS(N == 4, yyxw, 1, 1, 0, 3) -__SYCL_ACCESS(2 <= N && N <= 4, yyyx, 1, 1, 1, 0) -__SYCL_ACCESS(2 <= N && N <= 4, yyyy, 1, 1, 1, 1) -__SYCL_ACCESS(N == 3 || N == 4, yyyz, 1, 1, 1, 2) -__SYCL_ACCESS(N == 4, yyyw, 1, 1, 1, 3) -__SYCL_ACCESS(N == 3 || N == 4, yyzx, 1, 1, 2, 0) -__SYCL_ACCESS(N == 3 || N == 4, yyzy, 1, 1, 2, 1) -__SYCL_ACCESS(N == 3 || N == 4, yyzz, 1, 1, 2, 2) -__SYCL_ACCESS(N == 4, yyzw, 1, 1, 2, 3) -__SYCL_ACCESS(N == 4, yywx, 1, 1, 3, 0) -__SYCL_ACCESS(N == 4, yywy, 1, 1, 3, 1) -__SYCL_ACCESS(N == 4, yywz, 1, 1, 3, 2) -__SYCL_ACCESS(N == 4, yyww, 1, 1, 3, 3) -__SYCL_ACCESS(N == 3 || N == 4, yzxx, 1, 2, 0, 0) -__SYCL_ACCESS(N == 3 || N == 4, yzxy, 1, 2, 0, 1) -__SYCL_ACCESS(N == 3 || N == 4, yzxz, 1, 2, 0, 2) -__SYCL_ACCESS(N == 4, yzxw, 1, 2, 0, 3) -__SYCL_ACCESS(N == 3 || N == 4, yzyx, 1, 2, 1, 0) -__SYCL_ACCESS(N == 3 || N == 4, yzyy, 1, 2, 1, 1) -__SYCL_ACCESS(N == 3 || N == 4, yzyz, 1, 2, 1, 2) -__SYCL_ACCESS(N == 4, yzyw, 1, 2, 1, 3) -__SYCL_ACCESS(N == 3 || N == 4, yzzx, 1, 2, 2, 0) -__SYCL_ACCESS(N == 3 || N == 4, yzzy, 1, 2, 2, 1) -__SYCL_ACCESS(N == 3 || N == 4, yzzz, 1, 2, 2, 2) -__SYCL_ACCESS(N == 4, yzzw, 1, 2, 2, 3) -__SYCL_ACCESS(N == 4, yzwx, 1, 2, 3, 0) -__SYCL_ACCESS(N == 4, yzwy, 1, 2, 3, 1) -__SYCL_ACCESS(N == 4, yzwz, 1, 2, 3, 2) -__SYCL_ACCESS(N == 4, yzww, 1, 2, 3, 3) -__SYCL_ACCESS(N == 4, ywxx, 1, 3, 0, 0) -__SYCL_ACCESS(N == 4, ywxy, 1, 3, 0, 1) -__SYCL_ACCESS(N == 4, ywxz, 1, 3, 0, 2) -__SYCL_ACCESS(N == 4, ywxw, 1, 3, 0, 3) -__SYCL_ACCESS(N == 4, ywyx, 1, 3, 1, 0) -__SYCL_ACCESS(N == 4, ywyy, 1, 3, 1, 1) -__SYCL_ACCESS(N == 4, ywyz, 1, 3, 1, 2) -__SYCL_ACCESS(N == 4, ywyw, 1, 3, 1, 3) -__SYCL_ACCESS(N == 4, ywzx, 1, 3, 2, 0) -__SYCL_ACCESS(N == 4, ywzy, 1, 3, 2, 1) -__SYCL_ACCESS(N == 4, ywzz, 1, 3, 2, 2) -__SYCL_ACCESS(N == 4, ywzw, 1, 3, 2, 3) -__SYCL_ACCESS(N == 4, ywwx, 1, 3, 3, 0) -__SYCL_ACCESS(N == 4, ywwy, 1, 3, 3, 1) -__SYCL_ACCESS(N == 4, ywwz, 1, 3, 3, 2) -__SYCL_ACCESS(N == 4, ywww, 1, 3, 3, 3) -__SYCL_ACCESS(N == 3 || N == 4, zxxx, 2, 0, 0, 0) -__SYCL_ACCESS(N == 3 || N == 4, zxxy, 2, 0, 0, 1) -__SYCL_ACCESS(N == 3 || N == 4, zxxz, 2, 0, 0, 2) -__SYCL_ACCESS(N == 4, zxxw, 2, 0, 0, 3) -__SYCL_ACCESS(N == 3 || N == 4, zxyx, 2, 0, 1, 0) -__SYCL_ACCESS(N == 3 || N == 4, zxyy, 2, 0, 1, 1) -__SYCL_ACCESS(N == 3 || N == 4, zxyz, 2, 0, 1, 2) -__SYCL_ACCESS(N == 4, zxyw, 2, 0, 1, 3) -__SYCL_ACCESS(N == 3 || N == 4, zxzx, 2, 0, 2, 0) -__SYCL_ACCESS(N == 3 || N == 4, zxzy, 2, 0, 2, 1) -__SYCL_ACCESS(N == 3 || N == 4, zxzz, 2, 0, 2, 2) -__SYCL_ACCESS(N == 4, zxzw, 2, 0, 2, 3) -__SYCL_ACCESS(N == 4, zxwx, 2, 0, 3, 0) -__SYCL_ACCESS(N == 4, zxwy, 2, 0, 3, 1) -__SYCL_ACCESS(N == 4, zxwz, 2, 0, 3, 2) -__SYCL_ACCESS(N == 4, zxww, 2, 0, 3, 3) -__SYCL_ACCESS(N == 3 || N == 4, zyxx, 2, 1, 0, 0) -__SYCL_ACCESS(N == 3 || N == 4, zyxy, 2, 1, 0, 1) -__SYCL_ACCESS(N == 3 || N == 4, zyxz, 2, 1, 0, 2) -__SYCL_ACCESS(N == 4, zyxw, 2, 1, 0, 3) -__SYCL_ACCESS(N == 3 || N == 4, zyyx, 2, 1, 1, 0) -__SYCL_ACCESS(N == 3 || N == 4, zyyy, 2, 1, 1, 1) -__SYCL_ACCESS(N == 3 || N == 4, zyyz, 2, 1, 1, 2) -__SYCL_ACCESS(N == 4, zyyw, 2, 1, 1, 3) -__SYCL_ACCESS(N == 3 || N == 4, zyzx, 2, 1, 2, 0) -__SYCL_ACCESS(N == 3 || N == 4, zyzy, 2, 1, 2, 1) -__SYCL_ACCESS(N == 3 || N == 4, zyzz, 2, 1, 2, 2) -__SYCL_ACCESS(N == 4, zyzw, 2, 1, 2, 3) -__SYCL_ACCESS(N == 4, zywx, 2, 1, 3, 0) -__SYCL_ACCESS(N == 4, zywy, 2, 1, 3, 1) -__SYCL_ACCESS(N == 4, zywz, 2, 1, 3, 2) -__SYCL_ACCESS(N == 4, zyww, 2, 1, 3, 3) -__SYCL_ACCESS(N == 3 || N == 4, zzxx, 2, 2, 0, 0) -__SYCL_ACCESS(N == 3 || N == 4, zzxy, 2, 2, 0, 1) -__SYCL_ACCESS(N == 3 || N == 4, zzxz, 2, 2, 0, 2) -__SYCL_ACCESS(N == 4, zzxw, 2, 2, 0, 3) -__SYCL_ACCESS(N == 3 || N == 4, zzyx, 2, 2, 1, 0) -__SYCL_ACCESS(N == 3 || N == 4, zzyy, 2, 2, 1, 1) -__SYCL_ACCESS(N == 3 || N == 4, zzyz, 2, 2, 1, 2) -__SYCL_ACCESS(N == 4, zzyw, 2, 2, 1, 3) -__SYCL_ACCESS(N == 3 || N == 4, zzzx, 2, 2, 2, 0) -__SYCL_ACCESS(N == 3 || N == 4, zzzy, 2, 2, 2, 1) -__SYCL_ACCESS(N == 3 || N == 4, zzzz, 2, 2, 2, 2) -__SYCL_ACCESS(N == 4, zzzw, 2, 2, 2, 3) -__SYCL_ACCESS(N == 4, zzwx, 2, 2, 3, 0) -__SYCL_ACCESS(N == 4, zzwy, 2, 2, 3, 1) -__SYCL_ACCESS(N == 4, zzwz, 2, 2, 3, 2) -__SYCL_ACCESS(N == 4, zzww, 2, 2, 3, 3) -__SYCL_ACCESS(N == 4, zwxx, 2, 3, 0, 0) -__SYCL_ACCESS(N == 4, zwxy, 2, 3, 0, 1) -__SYCL_ACCESS(N == 4, zwxz, 2, 3, 0, 2) -__SYCL_ACCESS(N == 4, zwxw, 2, 3, 0, 3) -__SYCL_ACCESS(N == 4, zwyx, 2, 3, 1, 0) -__SYCL_ACCESS(N == 4, zwyy, 2, 3, 1, 1) -__SYCL_ACCESS(N == 4, zwyz, 2, 3, 1, 2) -__SYCL_ACCESS(N == 4, zwyw, 2, 3, 1, 3) -__SYCL_ACCESS(N == 4, zwzx, 2, 3, 2, 0) -__SYCL_ACCESS(N == 4, zwzy, 2, 3, 2, 1) -__SYCL_ACCESS(N == 4, zwzz, 2, 3, 2, 2) -__SYCL_ACCESS(N == 4, zwzw, 2, 3, 2, 3) -__SYCL_ACCESS(N == 4, zwwx, 2, 3, 3, 0) -__SYCL_ACCESS(N == 4, zwwy, 2, 3, 3, 1) -__SYCL_ACCESS(N == 4, zwwz, 2, 3, 3, 2) -__SYCL_ACCESS(N == 4, zwww, 2, 3, 3, 3) -__SYCL_ACCESS(N == 4, wxxx, 3, 0, 0, 0) -__SYCL_ACCESS(N == 4, wxxy, 3, 0, 0, 1) -__SYCL_ACCESS(N == 4, wxxz, 3, 0, 0, 2) -__SYCL_ACCESS(N == 4, wxxw, 3, 0, 0, 3) -__SYCL_ACCESS(N == 4, wxyx, 3, 0, 1, 0) -__SYCL_ACCESS(N == 4, wxyy, 3, 0, 1, 1) -__SYCL_ACCESS(N == 4, wxyz, 3, 0, 1, 2) -__SYCL_ACCESS(N == 4, wxyw, 3, 0, 1, 3) -__SYCL_ACCESS(N == 4, wxzx, 3, 0, 2, 0) -__SYCL_ACCESS(N == 4, wxzy, 3, 0, 2, 1) -__SYCL_ACCESS(N == 4, wxzz, 3, 0, 2, 2) -__SYCL_ACCESS(N == 4, wxzw, 3, 0, 2, 3) -__SYCL_ACCESS(N == 4, wxwx, 3, 0, 3, 0) -__SYCL_ACCESS(N == 4, wxwy, 3, 0, 3, 1) -__SYCL_ACCESS(N == 4, wxwz, 3, 0, 3, 2) -__SYCL_ACCESS(N == 4, wxww, 3, 0, 3, 3) -__SYCL_ACCESS(N == 4, wyxx, 3, 1, 0, 0) -__SYCL_ACCESS(N == 4, wyxy, 3, 1, 0, 1) -__SYCL_ACCESS(N == 4, wyxz, 3, 1, 0, 2) -__SYCL_ACCESS(N == 4, wyxw, 3, 1, 0, 3) -__SYCL_ACCESS(N == 4, wyyx, 3, 1, 1, 0) -__SYCL_ACCESS(N == 4, wyyy, 3, 1, 1, 1) -__SYCL_ACCESS(N == 4, wyyz, 3, 1, 1, 2) -__SYCL_ACCESS(N == 4, wyyw, 3, 1, 1, 3) -__SYCL_ACCESS(N == 4, wyzx, 3, 1, 2, 0) -__SYCL_ACCESS(N == 4, wyzy, 3, 1, 2, 1) -__SYCL_ACCESS(N == 4, wyzz, 3, 1, 2, 2) -__SYCL_ACCESS(N == 4, wyzw, 3, 1, 2, 3) -__SYCL_ACCESS(N == 4, wywx, 3, 1, 3, 0) -__SYCL_ACCESS(N == 4, wywy, 3, 1, 3, 1) -__SYCL_ACCESS(N == 4, wywz, 3, 1, 3, 2) -__SYCL_ACCESS(N == 4, wyww, 3, 1, 3, 3) -__SYCL_ACCESS(N == 4, wzxx, 3, 2, 0, 0) -__SYCL_ACCESS(N == 4, wzxy, 3, 2, 0, 1) -__SYCL_ACCESS(N == 4, wzxz, 3, 2, 0, 2) -__SYCL_ACCESS(N == 4, wzxw, 3, 2, 0, 3) -__SYCL_ACCESS(N == 4, wzyx, 3, 2, 1, 0) -__SYCL_ACCESS(N == 4, wzyy, 3, 2, 1, 1) -__SYCL_ACCESS(N == 4, wzyz, 3, 2, 1, 2) -__SYCL_ACCESS(N == 4, wzyw, 3, 2, 1, 3) -__SYCL_ACCESS(N == 4, wzzx, 3, 2, 2, 0) -__SYCL_ACCESS(N == 4, wzzy, 3, 2, 2, 1) -__SYCL_ACCESS(N == 4, wzzz, 3, 2, 2, 2) -__SYCL_ACCESS(N == 4, wzzw, 3, 2, 2, 3) -__SYCL_ACCESS(N == 4, wzwx, 3, 2, 3, 0) -__SYCL_ACCESS(N == 4, wzwy, 3, 2, 3, 1) -__SYCL_ACCESS(N == 4, wzwz, 3, 2, 3, 2) -__SYCL_ACCESS(N == 4, wzww, 3, 2, 3, 3) -__SYCL_ACCESS(N == 4, wwxx, 3, 3, 0, 0) -__SYCL_ACCESS(N == 4, wwxy, 3, 3, 0, 1) -__SYCL_ACCESS(N == 4, wwxz, 3, 3, 0, 2) -__SYCL_ACCESS(N == 4, wwxw, 3, 3, 0, 3) -__SYCL_ACCESS(N == 4, wwyx, 3, 3, 1, 0) -__SYCL_ACCESS(N == 4, wwyy, 3, 3, 1, 1) -__SYCL_ACCESS(N == 4, wwyz, 3, 3, 1, 2) -__SYCL_ACCESS(N == 4, wwyw, 3, 3, 1, 3) -__SYCL_ACCESS(N == 4, wwzx, 3, 3, 2, 0) -__SYCL_ACCESS(N == 4, wwzy, 3, 3, 2, 1) -__SYCL_ACCESS(N == 4, wwzz, 3, 3, 2, 2) -__SYCL_ACCESS(N == 4, wwzw, 3, 3, 2, 3) -__SYCL_ACCESS(N == 4, wwwx, 3, 3, 3, 0) -__SYCL_ACCESS(N == 4, wwwy, 3, 3, 3, 1) -__SYCL_ACCESS(N == 4, wwwz, 3, 3, 3, 2) -__SYCL_ACCESS(N == 4, wwww, 3, 3, 3, 3) - -//__swizzled_vec__ RGBA_SWIZZLE() const; -__SYCL_ACCESS(N == 4, rr, 0, 0) -__SYCL_ACCESS(N == 4, rg, 0, 1) -__SYCL_ACCESS(N == 4, rb, 0, 2) -__SYCL_ACCESS(N == 4, ra, 0, 3) -__SYCL_ACCESS(N == 4, gr, 1, 0) -__SYCL_ACCESS(N == 4, gg, 1, 1) -__SYCL_ACCESS(N == 4, gb, 1, 2) -__SYCL_ACCESS(N == 4, ga, 1, 3) -__SYCL_ACCESS(N == 4, br, 2, 0) -__SYCL_ACCESS(N == 4, bg, 2, 1) -__SYCL_ACCESS(N == 4, bb, 2, 2) -__SYCL_ACCESS(N == 4, ba, 2, 3) -__SYCL_ACCESS(N == 4, ar, 3, 0) -__SYCL_ACCESS(N == 4, ag, 3, 1) -__SYCL_ACCESS(N == 4, ab, 3, 2) -__SYCL_ACCESS(N == 4, aa, 3, 3) -__SYCL_ACCESS(N == 4, rrr, 0, 0, 0) -__SYCL_ACCESS(N == 4, rrg, 0, 0, 1) -__SYCL_ACCESS(N == 4, rrb, 0, 0, 2) -__SYCL_ACCESS(N == 4, rra, 0, 0, 3) -__SYCL_ACCESS(N == 4, rgr, 0, 1, 0) -__SYCL_ACCESS(N == 4, rgg, 0, 1, 1) -__SYCL_ACCESS(N == 4, rgb, 0, 1, 2) -__SYCL_ACCESS(N == 4, rga, 0, 1, 3) -__SYCL_ACCESS(N == 4, rbr, 0, 2, 0) -__SYCL_ACCESS(N == 4, rbg, 0, 2, 1) -__SYCL_ACCESS(N == 4, rbb, 0, 2, 2) -__SYCL_ACCESS(N == 4, rba, 0, 2, 3) -__SYCL_ACCESS(N == 4, rar, 0, 3, 0) -__SYCL_ACCESS(N == 4, rag, 0, 3, 1) -__SYCL_ACCESS(N == 4, rab, 0, 3, 2) -__SYCL_ACCESS(N == 4, raa, 0, 3, 3) -__SYCL_ACCESS(N == 4, grr, 1, 0, 0) -__SYCL_ACCESS(N == 4, grg, 1, 0, 1) -__SYCL_ACCESS(N == 4, grb, 1, 0, 2) -__SYCL_ACCESS(N == 4, gra, 1, 0, 3) -__SYCL_ACCESS(N == 4, ggr, 1, 1, 0) -__SYCL_ACCESS(N == 4, ggg, 1, 1, 1) -__SYCL_ACCESS(N == 4, ggb, 1, 1, 2) -__SYCL_ACCESS(N == 4, gga, 1, 1, 3) -__SYCL_ACCESS(N == 4, gbr, 1, 2, 0) -__SYCL_ACCESS(N == 4, gbg, 1, 2, 1) -__SYCL_ACCESS(N == 4, gbb, 1, 2, 2) -__SYCL_ACCESS(N == 4, gba, 1, 2, 3) -__SYCL_ACCESS(N == 4, gar, 1, 3, 0) -__SYCL_ACCESS(N == 4, gag, 1, 3, 1) -__SYCL_ACCESS(N == 4, gab, 1, 3, 2) -__SYCL_ACCESS(N == 4, gaa, 1, 3, 3) -__SYCL_ACCESS(N == 4, brr, 2, 0, 0) -__SYCL_ACCESS(N == 4, brg, 2, 0, 1) -__SYCL_ACCESS(N == 4, brb, 2, 0, 2) -__SYCL_ACCESS(N == 4, bra, 2, 0, 3) -__SYCL_ACCESS(N == 4, bgr, 2, 1, 0) -__SYCL_ACCESS(N == 4, bgg, 2, 1, 1) -__SYCL_ACCESS(N == 4, bgb, 2, 1, 2) -__SYCL_ACCESS(N == 4, bga, 2, 1, 3) -__SYCL_ACCESS(N == 4, bbr, 2, 2, 0) -__SYCL_ACCESS(N == 4, bbg, 2, 2, 1) -__SYCL_ACCESS(N == 4, bbb, 2, 2, 2) -__SYCL_ACCESS(N == 4, bba, 2, 2, 3) -__SYCL_ACCESS(N == 4, bar, 2, 3, 0) -__SYCL_ACCESS(N == 4, bag, 2, 3, 1) -__SYCL_ACCESS(N == 4, bab, 2, 3, 2) -__SYCL_ACCESS(N == 4, baa, 2, 3, 3) -__SYCL_ACCESS(N == 4, arr, 3, 0, 0) -__SYCL_ACCESS(N == 4, arg, 3, 0, 1) -__SYCL_ACCESS(N == 4, arb, 3, 0, 2) -__SYCL_ACCESS(N == 4, ara, 3, 0, 3) -__SYCL_ACCESS(N == 4, agr, 3, 1, 0) -__SYCL_ACCESS(N == 4, agg, 3, 1, 1) -__SYCL_ACCESS(N == 4, agb, 3, 1, 2) -__SYCL_ACCESS(N == 4, aga, 3, 1, 3) -__SYCL_ACCESS(N == 4, abr, 3, 2, 0) -__SYCL_ACCESS(N == 4, abg, 3, 2, 1) -__SYCL_ACCESS(N == 4, abb, 3, 2, 2) -__SYCL_ACCESS(N == 4, aba, 3, 2, 3) -__SYCL_ACCESS(N == 4, aar, 3, 3, 0) -__SYCL_ACCESS(N == 4, aag, 3, 3, 1) -__SYCL_ACCESS(N == 4, aab, 3, 3, 2) -__SYCL_ACCESS(N == 4, aaa, 3, 3, 3) -__SYCL_ACCESS(N == 4, rrrr, 0, 0, 0, 0) -__SYCL_ACCESS(N == 4, rrrg, 0, 0, 0, 1) -__SYCL_ACCESS(N == 4, rrrb, 0, 0, 0, 2) -__SYCL_ACCESS(N == 4, rrra, 0, 0, 0, 3) -__SYCL_ACCESS(N == 4, rrgr, 0, 0, 1, 0) -__SYCL_ACCESS(N == 4, rrgg, 0, 0, 1, 1) -__SYCL_ACCESS(N == 4, rrgb, 0, 0, 1, 2) -__SYCL_ACCESS(N == 4, rrga, 0, 0, 1, 3) -__SYCL_ACCESS(N == 4, rrbr, 0, 0, 2, 0) -__SYCL_ACCESS(N == 4, rrbg, 0, 0, 2, 1) -__SYCL_ACCESS(N == 4, rrbb, 0, 0, 2, 2) -__SYCL_ACCESS(N == 4, rrba, 0, 0, 2, 3) -__SYCL_ACCESS(N == 4, rrar, 0, 0, 3, 0) -__SYCL_ACCESS(N == 4, rrag, 0, 0, 3, 1) -__SYCL_ACCESS(N == 4, rrab, 0, 0, 3, 2) -__SYCL_ACCESS(N == 4, rraa, 0, 0, 3, 3) -__SYCL_ACCESS(N == 4, rgrr, 0, 1, 0, 0) -__SYCL_ACCESS(N == 4, rgrg, 0, 1, 0, 1) -__SYCL_ACCESS(N == 4, rgrb, 0, 1, 0, 2) -__SYCL_ACCESS(N == 4, rgra, 0, 1, 0, 3) -__SYCL_ACCESS(N == 4, rggr, 0, 1, 1, 0) -__SYCL_ACCESS(N == 4, rggg, 0, 1, 1, 1) -__SYCL_ACCESS(N == 4, rggb, 0, 1, 1, 2) -__SYCL_ACCESS(N == 4, rgga, 0, 1, 1, 3) -__SYCL_ACCESS(N == 4, rgbr, 0, 1, 2, 0) -__SYCL_ACCESS(N == 4, rgbg, 0, 1, 2, 1) -__SYCL_ACCESS(N == 4, rgbb, 0, 1, 2, 2) -__SYCL_ACCESS(N == 4, rgba, 0, 1, 2, 3) -__SYCL_ACCESS(N == 4, rgar, 0, 1, 3, 0) -__SYCL_ACCESS(N == 4, rgag, 0, 1, 3, 1) -__SYCL_ACCESS(N == 4, rgab, 0, 1, 3, 2) -__SYCL_ACCESS(N == 4, rgaa, 0, 1, 3, 3) -__SYCL_ACCESS(N == 4, rbrr, 0, 2, 0, 0) -__SYCL_ACCESS(N == 4, rbrg, 0, 2, 0, 1) -__SYCL_ACCESS(N == 4, rbrb, 0, 2, 0, 2) -__SYCL_ACCESS(N == 4, rbra, 0, 2, 0, 3) -__SYCL_ACCESS(N == 4, rbgr, 0, 2, 1, 0) -__SYCL_ACCESS(N == 4, rbgg, 0, 2, 1, 1) -__SYCL_ACCESS(N == 4, rbgb, 0, 2, 1, 2) -__SYCL_ACCESS(N == 4, rbga, 0, 2, 1, 3) -__SYCL_ACCESS(N == 4, rbbr, 0, 2, 2, 0) -__SYCL_ACCESS(N == 4, rbbg, 0, 2, 2, 1) -__SYCL_ACCESS(N == 4, rbbb, 0, 2, 2, 2) -__SYCL_ACCESS(N == 4, rbba, 0, 2, 2, 3) -__SYCL_ACCESS(N == 4, rbar, 0, 2, 3, 0) -__SYCL_ACCESS(N == 4, rbag, 0, 2, 3, 1) -__SYCL_ACCESS(N == 4, rbab, 0, 2, 3, 2) -__SYCL_ACCESS(N == 4, rbaa, 0, 2, 3, 3) -__SYCL_ACCESS(N == 4, rarr, 0, 3, 0, 0) -__SYCL_ACCESS(N == 4, rarg, 0, 3, 0, 1) -__SYCL_ACCESS(N == 4, rarb, 0, 3, 0, 2) -__SYCL_ACCESS(N == 4, rara, 0, 3, 0, 3) -__SYCL_ACCESS(N == 4, ragr, 0, 3, 1, 0) -__SYCL_ACCESS(N == 4, ragg, 0, 3, 1, 1) -__SYCL_ACCESS(N == 4, ragb, 0, 3, 1, 2) -__SYCL_ACCESS(N == 4, raga, 0, 3, 1, 3) -__SYCL_ACCESS(N == 4, rabr, 0, 3, 2, 0) -__SYCL_ACCESS(N == 4, rabg, 0, 3, 2, 1) -__SYCL_ACCESS(N == 4, rabb, 0, 3, 2, 2) -__SYCL_ACCESS(N == 4, raba, 0, 3, 2, 3) -__SYCL_ACCESS(N == 4, raar, 0, 3, 3, 0) -__SYCL_ACCESS(N == 4, raag, 0, 3, 3, 1) -__SYCL_ACCESS(N == 4, raab, 0, 3, 3, 2) -__SYCL_ACCESS(N == 4, raaa, 0, 3, 3, 3) -__SYCL_ACCESS(N == 4, grrr, 1, 0, 0, 0) -__SYCL_ACCESS(N == 4, grrg, 1, 0, 0, 1) -__SYCL_ACCESS(N == 4, grrb, 1, 0, 0, 2) -__SYCL_ACCESS(N == 4, grra, 1, 0, 0, 3) -__SYCL_ACCESS(N == 4, grgr, 1, 0, 1, 0) -__SYCL_ACCESS(N == 4, grgg, 1, 0, 1, 1) -__SYCL_ACCESS(N == 4, grgb, 1, 0, 1, 2) -__SYCL_ACCESS(N == 4, grga, 1, 0, 1, 3) -__SYCL_ACCESS(N == 4, grbr, 1, 0, 2, 0) -__SYCL_ACCESS(N == 4, grbg, 1, 0, 2, 1) -__SYCL_ACCESS(N == 4, grbb, 1, 0, 2, 2) -__SYCL_ACCESS(N == 4, grba, 1, 0, 2, 3) -__SYCL_ACCESS(N == 4, grar, 1, 0, 3, 0) -__SYCL_ACCESS(N == 4, grag, 1, 0, 3, 1) -__SYCL_ACCESS(N == 4, grab, 1, 0, 3, 2) -__SYCL_ACCESS(N == 4, graa, 1, 0, 3, 3) -__SYCL_ACCESS(N == 4, ggrr, 1, 1, 0, 0) -__SYCL_ACCESS(N == 4, ggrg, 1, 1, 0, 1) -__SYCL_ACCESS(N == 4, ggrb, 1, 1, 0, 2) -__SYCL_ACCESS(N == 4, ggra, 1, 1, 0, 3) -__SYCL_ACCESS(N == 4, gggr, 1, 1, 1, 0) -__SYCL_ACCESS(N == 4, gggg, 1, 1, 1, 1) -__SYCL_ACCESS(N == 4, gggb, 1, 1, 1, 2) -__SYCL_ACCESS(N == 4, ggga, 1, 1, 1, 3) -__SYCL_ACCESS(N == 4, ggbr, 1, 1, 2, 0) -__SYCL_ACCESS(N == 4, ggbg, 1, 1, 2, 1) -__SYCL_ACCESS(N == 4, ggbb, 1, 1, 2, 2) -__SYCL_ACCESS(N == 4, ggba, 1, 1, 2, 3) -__SYCL_ACCESS(N == 4, ggar, 1, 1, 3, 0) -__SYCL_ACCESS(N == 4, ggag, 1, 1, 3, 1) -__SYCL_ACCESS(N == 4, ggab, 1, 1, 3, 2) -__SYCL_ACCESS(N == 4, ggaa, 1, 1, 3, 3) -__SYCL_ACCESS(N == 4, gbrr, 1, 2, 0, 0) -__SYCL_ACCESS(N == 4, gbrg, 1, 2, 0, 1) -__SYCL_ACCESS(N == 4, gbrb, 1, 2, 0, 2) -__SYCL_ACCESS(N == 4, gbra, 1, 2, 0, 3) -__SYCL_ACCESS(N == 4, gbgr, 1, 2, 1, 0) -__SYCL_ACCESS(N == 4, gbgg, 1, 2, 1, 1) -__SYCL_ACCESS(N == 4, gbgb, 1, 2, 1, 2) -__SYCL_ACCESS(N == 4, gbga, 1, 2, 1, 3) -__SYCL_ACCESS(N == 4, gbbr, 1, 2, 2, 0) -__SYCL_ACCESS(N == 4, gbbg, 1, 2, 2, 1) -__SYCL_ACCESS(N == 4, gbbb, 1, 2, 2, 2) -__SYCL_ACCESS(N == 4, gbba, 1, 2, 2, 3) -__SYCL_ACCESS(N == 4, gbar, 1, 2, 3, 0) -__SYCL_ACCESS(N == 4, gbag, 1, 2, 3, 1) -__SYCL_ACCESS(N == 4, gbab, 1, 2, 3, 2) -__SYCL_ACCESS(N == 4, gbaa, 1, 2, 3, 3) -__SYCL_ACCESS(N == 4, garr, 1, 3, 0, 0) -__SYCL_ACCESS(N == 4, garg, 1, 3, 0, 1) -__SYCL_ACCESS(N == 4, garb, 1, 3, 0, 2) -__SYCL_ACCESS(N == 4, gara, 1, 3, 0, 3) -__SYCL_ACCESS(N == 4, gagr, 1, 3, 1, 0) -__SYCL_ACCESS(N == 4, gagg, 1, 3, 1, 1) -__SYCL_ACCESS(N == 4, gagb, 1, 3, 1, 2) -__SYCL_ACCESS(N == 4, gaga, 1, 3, 1, 3) -__SYCL_ACCESS(N == 4, gabr, 1, 3, 2, 0) -__SYCL_ACCESS(N == 4, gabg, 1, 3, 2, 1) -__SYCL_ACCESS(N == 4, gabb, 1, 3, 2, 2) -__SYCL_ACCESS(N == 4, gaba, 1, 3, 2, 3) -__SYCL_ACCESS(N == 4, gaar, 1, 3, 3, 0) -__SYCL_ACCESS(N == 4, gaag, 1, 3, 3, 1) -__SYCL_ACCESS(N == 4, gaab, 1, 3, 3, 2) -__SYCL_ACCESS(N == 4, gaaa, 1, 3, 3, 3) -__SYCL_ACCESS(N == 4, brrr, 2, 0, 0, 0) -__SYCL_ACCESS(N == 4, brrg, 2, 0, 0, 1) -__SYCL_ACCESS(N == 4, brrb, 2, 0, 0, 2) -__SYCL_ACCESS(N == 4, brra, 2, 0, 0, 3) -__SYCL_ACCESS(N == 4, brgr, 2, 0, 1, 0) -__SYCL_ACCESS(N == 4, brgg, 2, 0, 1, 1) -__SYCL_ACCESS(N == 4, brgb, 2, 0, 1, 2) -__SYCL_ACCESS(N == 4, brga, 2, 0, 1, 3) -__SYCL_ACCESS(N == 4, brbr, 2, 0, 2, 0) -__SYCL_ACCESS(N == 4, brbg, 2, 0, 2, 1) -__SYCL_ACCESS(N == 4, brbb, 2, 0, 2, 2) -__SYCL_ACCESS(N == 4, brba, 2, 0, 2, 3) -__SYCL_ACCESS(N == 4, brar, 2, 0, 3, 0) -__SYCL_ACCESS(N == 4, brag, 2, 0, 3, 1) -__SYCL_ACCESS(N == 4, brab, 2, 0, 3, 2) -__SYCL_ACCESS(N == 4, braa, 2, 0, 3, 3) -__SYCL_ACCESS(N == 4, bgrr, 2, 1, 0, 0) -__SYCL_ACCESS(N == 4, bgrg, 2, 1, 0, 1) -__SYCL_ACCESS(N == 4, bgrb, 2, 1, 0, 2) -__SYCL_ACCESS(N == 4, bgra, 2, 1, 0, 3) -__SYCL_ACCESS(N == 4, bggr, 2, 1, 1, 0) -__SYCL_ACCESS(N == 4, bggg, 2, 1, 1, 1) -__SYCL_ACCESS(N == 4, bggb, 2, 1, 1, 2) -__SYCL_ACCESS(N == 4, bgga, 2, 1, 1, 3) -__SYCL_ACCESS(N == 4, bgbr, 2, 1, 2, 0) -__SYCL_ACCESS(N == 4, bgbg, 2, 1, 2, 1) -__SYCL_ACCESS(N == 4, bgbb, 2, 1, 2, 2) -__SYCL_ACCESS(N == 4, bgba, 2, 1, 2, 3) -__SYCL_ACCESS(N == 4, bgar, 2, 1, 3, 0) -__SYCL_ACCESS(N == 4, bgag, 2, 1, 3, 1) -__SYCL_ACCESS(N == 4, bgab, 2, 1, 3, 2) -__SYCL_ACCESS(N == 4, bgaa, 2, 1, 3, 3) -__SYCL_ACCESS(N == 4, bbrr, 2, 2, 0, 0) -__SYCL_ACCESS(N == 4, bbrg, 2, 2, 0, 1) -__SYCL_ACCESS(N == 4, bbrb, 2, 2, 0, 2) -__SYCL_ACCESS(N == 4, bbra, 2, 2, 0, 3) -__SYCL_ACCESS(N == 4, bbgr, 2, 2, 1, 0) -__SYCL_ACCESS(N == 4, bbgg, 2, 2, 1, 1) -__SYCL_ACCESS(N == 4, bbgb, 2, 2, 1, 2) -__SYCL_ACCESS(N == 4, bbga, 2, 2, 1, 3) -__SYCL_ACCESS(N == 4, bbbr, 2, 2, 2, 0) -__SYCL_ACCESS(N == 4, bbbg, 2, 2, 2, 1) -__SYCL_ACCESS(N == 4, bbbb, 2, 2, 2, 2) -__SYCL_ACCESS(N == 4, bbba, 2, 2, 2, 3) -__SYCL_ACCESS(N == 4, bbar, 2, 2, 3, 0) -__SYCL_ACCESS(N == 4, bbag, 2, 2, 3, 1) -__SYCL_ACCESS(N == 4, bbab, 2, 2, 3, 2) -__SYCL_ACCESS(N == 4, bbaa, 2, 2, 3, 3) -__SYCL_ACCESS(N == 4, barr, 2, 3, 0, 0) -__SYCL_ACCESS(N == 4, barg, 2, 3, 0, 1) -__SYCL_ACCESS(N == 4, barb, 2, 3, 0, 2) -__SYCL_ACCESS(N == 4, bara, 2, 3, 0, 3) -__SYCL_ACCESS(N == 4, bagr, 2, 3, 1, 0) -__SYCL_ACCESS(N == 4, bagg, 2, 3, 1, 1) -__SYCL_ACCESS(N == 4, bagb, 2, 3, 1, 2) -__SYCL_ACCESS(N == 4, baga, 2, 3, 1, 3) -__SYCL_ACCESS(N == 4, babr, 2, 3, 2, 0) -__SYCL_ACCESS(N == 4, babg, 2, 3, 2, 1) -__SYCL_ACCESS(N == 4, babb, 2, 3, 2, 2) -__SYCL_ACCESS(N == 4, baba, 2, 3, 2, 3) -__SYCL_ACCESS(N == 4, baar, 2, 3, 3, 0) -__SYCL_ACCESS(N == 4, baag, 2, 3, 3, 1) -__SYCL_ACCESS(N == 4, baab, 2, 3, 3, 2) -__SYCL_ACCESS(N == 4, baaa, 2, 3, 3, 3) -__SYCL_ACCESS(N == 4, arrr, 3, 0, 0, 0) -__SYCL_ACCESS(N == 4, arrg, 3, 0, 0, 1) -__SYCL_ACCESS(N == 4, arrb, 3, 0, 0, 2) -__SYCL_ACCESS(N == 4, arra, 3, 0, 0, 3) -__SYCL_ACCESS(N == 4, argr, 3, 0, 1, 0) -__SYCL_ACCESS(N == 4, argg, 3, 0, 1, 1) -__SYCL_ACCESS(N == 4, argb, 3, 0, 1, 2) -__SYCL_ACCESS(N == 4, arga, 3, 0, 1, 3) -__SYCL_ACCESS(N == 4, arbr, 3, 0, 2, 0) -__SYCL_ACCESS(N == 4, arbg, 3, 0, 2, 1) -__SYCL_ACCESS(N == 4, arbb, 3, 0, 2, 2) -__SYCL_ACCESS(N == 4, arba, 3, 0, 2, 3) -__SYCL_ACCESS(N == 4, arar, 3, 0, 3, 0) -__SYCL_ACCESS(N == 4, arag, 3, 0, 3, 1) -__SYCL_ACCESS(N == 4, arab, 3, 0, 3, 2) -__SYCL_ACCESS(N == 4, araa, 3, 0, 3, 3) -__SYCL_ACCESS(N == 4, agrr, 3, 1, 0, 0) -__SYCL_ACCESS(N == 4, agrg, 3, 1, 0, 1) -__SYCL_ACCESS(N == 4, agrb, 3, 1, 0, 2) -__SYCL_ACCESS(N == 4, agra, 3, 1, 0, 3) -__SYCL_ACCESS(N == 4, aggr, 3, 1, 1, 0) -__SYCL_ACCESS(N == 4, aggg, 3, 1, 1, 1) -__SYCL_ACCESS(N == 4, aggb, 3, 1, 1, 2) -__SYCL_ACCESS(N == 4, agga, 3, 1, 1, 3) -__SYCL_ACCESS(N == 4, agbr, 3, 1, 2, 0) -__SYCL_ACCESS(N == 4, agbg, 3, 1, 2, 1) -__SYCL_ACCESS(N == 4, agbb, 3, 1, 2, 2) -__SYCL_ACCESS(N == 4, agba, 3, 1, 2, 3) -__SYCL_ACCESS(N == 4, agar, 3, 1, 3, 0) -__SYCL_ACCESS(N == 4, agag, 3, 1, 3, 1) -__SYCL_ACCESS(N == 4, agab, 3, 1, 3, 2) -__SYCL_ACCESS(N == 4, agaa, 3, 1, 3, 3) -__SYCL_ACCESS(N == 4, abrr, 3, 2, 0, 0) -__SYCL_ACCESS(N == 4, abrg, 3, 2, 0, 1) -__SYCL_ACCESS(N == 4, abrb, 3, 2, 0, 2) -__SYCL_ACCESS(N == 4, abra, 3, 2, 0, 3) -__SYCL_ACCESS(N == 4, abgr, 3, 2, 1, 0) -__SYCL_ACCESS(N == 4, abgg, 3, 2, 1, 1) -__SYCL_ACCESS(N == 4, abgb, 3, 2, 1, 2) -__SYCL_ACCESS(N == 4, abga, 3, 2, 1, 3) -__SYCL_ACCESS(N == 4, abbr, 3, 2, 2, 0) -__SYCL_ACCESS(N == 4, abbg, 3, 2, 2, 1) -__SYCL_ACCESS(N == 4, abbb, 3, 2, 2, 2) -__SYCL_ACCESS(N == 4, abba, 3, 2, 2, 3) -__SYCL_ACCESS(N == 4, abar, 3, 2, 3, 0) -__SYCL_ACCESS(N == 4, abag, 3, 2, 3, 1) -__SYCL_ACCESS(N == 4, abab, 3, 2, 3, 2) -__SYCL_ACCESS(N == 4, abaa, 3, 2, 3, 3) -__SYCL_ACCESS(N == 4, aarr, 3, 3, 0, 0) -__SYCL_ACCESS(N == 4, aarg, 3, 3, 0, 1) -__SYCL_ACCESS(N == 4, aarb, 3, 3, 0, 2) -__SYCL_ACCESS(N == 4, aara, 3, 3, 0, 3) -__SYCL_ACCESS(N == 4, aagr, 3, 3, 1, 0) -__SYCL_ACCESS(N == 4, aagg, 3, 3, 1, 1) -__SYCL_ACCESS(N == 4, aagb, 3, 3, 1, 2) -__SYCL_ACCESS(N == 4, aaga, 3, 3, 1, 3) -__SYCL_ACCESS(N == 4, aabr, 3, 3, 2, 0) -__SYCL_ACCESS(N == 4, aabg, 3, 3, 2, 1) -__SYCL_ACCESS(N == 4, aabb, 3, 3, 2, 2) -__SYCL_ACCESS(N == 4, aaba, 3, 3, 2, 3) -__SYCL_ACCESS(N == 4, aaar, 3, 3, 3, 0) -__SYCL_ACCESS(N == 4, aaag, 3, 3, 3, 1) -__SYCL_ACCESS(N == 4, aaab, 3, 3, 3, 2) -__SYCL_ACCESS(N == 4, aaaa, 3, 3, 3, 3) - -#endif // #ifdef SYCL_SIMPLE_SWIZZLES - -//__swizzled_vec__ lo()/hi() const; -__SYCL_ACCESS(N == 2, lo, 0) -__SYCL_ACCESS(N == 3, lo, 0, 1) -__SYCL_ACCESS(N == 4, lo, 0, 1) -__SYCL_ACCESS(N == 8, lo, 0, 1, 2, 3) -__SYCL_ACCESS(N == 16, lo, 0, 1, 2, 3, 4, 5, 6, 7) -__SYCL_ACCESS(N == 2, hi, 1) -__SYCL_ACCESS(N == 3, hi, 2, 3) -__SYCL_ACCESS(N == 4, hi, 2, 3) -__SYCL_ACCESS(N == 8, hi, 4, 5, 6, 7) -__SYCL_ACCESS(N == 16, hi, 8, 9, 10, 11, 12, 13, 14, 15) -//__swizzled_vec__ odd()/even() const; -__SYCL_ACCESS(N == 2, odd, 1) -__SYCL_ACCESS(N == 3, odd, 1, 3) -__SYCL_ACCESS(N == 4, odd, 1, 3) -__SYCL_ACCESS(N == 8, odd, 1, 3, 5, 7) -__SYCL_ACCESS(N == 16, odd, 1, 3, 5, 7, 9, 11, 13, 15) -__SYCL_ACCESS(N == 2, even, 0) -__SYCL_ACCESS(N == 3, even, 0, 2) -__SYCL_ACCESS(N == 4, even, 0, 2) -__SYCL_ACCESS(N == 8, even, 0, 2, 4, 6) -__SYCL_ACCESS(N == 16, even, 0, 2, 4, 6, 8, 10, 12, 14) - -#undef __SYCL_E0 -#undef __SYCL_E1 -#undef __SYCL_E2 -#undef __SYCL_E3 -#undef __SYCL_E4 -#undef __SYCL_E5 -#undef __SYCL_E6 -#undef __SYCL_E7 -#undef __SYCL_E8 - -#undef __SYCL_NTH_ARG -#undef __SYCL_EXPAND -#undef __SYCL_INDEXER -#undef __SYCL_ACCESS -#undef __SYCL_SCALAR_ACCESS diff --git a/sycl/include/sycl/vector.hpp b/sycl/include/sycl/vector.hpp index ea935032ba445..7e54017f8329c 100644 --- a/sycl/include/sycl/vector.hpp +++ b/sycl/include/sycl/vector.hpp @@ -33,12 +33,12 @@ #include // for vector_basic_list #include // for is_sigeninteger, is_s... #include // for memcpy -#include // for is_contained -#include // for is_floating_point -#include -#include // for convertImpl -#include // for vector_alignment -#include // for StorageT, half, Vec16... +#include +#include // for is_contained +#include // for is_floating_point +#include // for convertImpl +#include // for vector_alignment +#include // for StorageT, half, Vec16... #include // bfloat16 @@ -84,19 +84,6 @@ struct elem { }; namespace detail { -template class OperationCurrentT, int... Indexes> -class SwizzleOp; - -// Special type indicating that SwizzleOp should just read value from vector - -// not trying to perform any operations. Should not be called. -template class GetOp { -public: - using DataT = T; - DataT getValue(size_t) const { return (DataT)0; } - DataT operator()(DataT, DataT) { return (DataT)0; } -}; - // Templated vs. non-templated conversion operator behaves differently when two // conversions are needed as in the case below: // @@ -111,14 +98,676 @@ template class GetOp { // // must go throw `v.x()` returning a swizzle, then its `operator==` returning // vec and we want that code to compile. -template +template struct ScalarConversionOperatorMixIn {}; -template -struct ScalarConversionOperatorMixIn> { - operator T() const { return (*static_cast(this))[0]; } +template +struct ScalarConversionOperatorMixIn> { + operator T() const { return (*static_cast(this))[0]; } +}; + +// Everything could have been much easier if we had C++20 concepts, then all the +// operators could be provided in a single mixin class with proper `requires` +// clauses on each overload. Until then, we have to have at least a separate +// mixing for each requirement (e.g. not byte, neither byte nor fp, not fp, +// etc.). Grouping like that would also be somewhat confusing, so we just create +// a separate mixin for each overload/narrow set of overloads and just "merge" +// them all back later. + +template +struct IncDecMixin {}; + +template +struct IncDecMixin>> { + friend SelfOperandTy &operator++(SelfOperandTy &x) { + x += DataT{1}; + return x; + } + friend SelfOperandTy &operator--(SelfOperandTy &x) { + x -= DataT{1}; + return x; + } +}; + +template +struct IncDecMixin>> + : public IncDecMixin { + friend auto operator++(SelfOperandTy &x, int) { + auto tmp = +x; + x += DataT{1}; + return tmp; + } + friend auto operator--(SelfOperandTy &x, int) { + auto tmp = +x; + x -= DataT{1}; + return tmp; + } +}; + +// TODO: The specification doesn't mention this specifically, but that's what +// the implementation has been doing and it seems to be a reasonable thing to +// do. Otherwise shift operators for byte element type would have to be disabled +// completely to follow C++ standard approach. +template +struct ByteShiftsMixin {}; + +#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) +template +struct ByteShiftsMixin>> { + friend auto operator<<(const Self &lhs, int shift) { + vec tmp; + for (int i = 0; i < N; ++i) + tmp[i] = lhs[i] << shift; + return tmp; + } + friend auto operator>>(const Self &lhs, int shift) { + vec tmp; + for (int i = 0; i < N; ++i) + tmp[i] = lhs[i] >> shift; + return tmp; + } +}; + +template +struct ByteShiftsMixin>> + : public ByteShiftsMixin { + friend OpAssignSelfOperandTy &operator<<=(OpAssignSelfOperandTy &lhs, + int shift) { + lhs = lhs << shift; + return lhs; + } + friend OpAssignSelfOperandTy &operator>>=(OpAssignSelfOperandTy &lhs, + int shift) { + lhs = lhs >> shift; + return lhs; + } +}; +#endif + +// We use std::plus and similar to "map" template parameter to an +// overloaded operator. These three below are missing from ``. +struct ShiftLeft { + template + constexpr auto operator()(T &&lhs, U &&rhs) const + -> decltype(std::forward(lhs) << std::forward(rhs)) { + return std::forward(lhs) << std::forward(rhs); + } +}; +struct ShiftRight { + template + constexpr auto operator()(T &&lhs, + U &&rhs) const -> decltype(std::forward(lhs) >> + std::forward(rhs)) { + return std::forward(lhs) >> std::forward(rhs); + } +}; + +struct UnaryPlus { + template + constexpr auto operator()(T &&arg) const -> decltype(+std::forward(arg)) { + return +std::forward(arg); + } +}; + +template +static constexpr bool not_fp = + !std::is_same_v && !std::is_same_v && + !std::is_same_v; + +// To provide information about operators availability depending on vec/swizzle +// element type. +template +inline constexpr bool is_op_available = false; + +#define __SYCL_OP_AVAILABILITY(OP, COND) \ + template inline constexpr bool is_op_available = COND; + +// clang-format off +__SYCL_OP_AVAILABILITY(std::plus , !detail::is_byte_v) +__SYCL_OP_AVAILABILITY(std::minus , !detail::is_byte_v) +__SYCL_OP_AVAILABILITY(std::multiplies , !detail::is_byte_v) +__SYCL_OP_AVAILABILITY(std::divides , !detail::is_byte_v) +__SYCL_OP_AVAILABILITY(std::modulus , !detail::is_byte_v && not_fp) + +__SYCL_OP_AVAILABILITY(std::bit_and , not_fp) +__SYCL_OP_AVAILABILITY(std::bit_or , not_fp) +__SYCL_OP_AVAILABILITY(std::bit_xor , not_fp) + +__SYCL_OP_AVAILABILITY(std::equal_to , true) +__SYCL_OP_AVAILABILITY(std::not_equal_to , true) +__SYCL_OP_AVAILABILITY(std::less , !detail::is_byte_v) +__SYCL_OP_AVAILABILITY(std::greater , !detail::is_byte_v) +__SYCL_OP_AVAILABILITY(std::less_equal , !detail::is_byte_v) +__SYCL_OP_AVAILABILITY(std::greater_equal , !detail::is_byte_v) + +__SYCL_OP_AVAILABILITY(std::logical_and , !detail::is_byte_v) +__SYCL_OP_AVAILABILITY(std::logical_or , !detail::is_byte_v) + +__SYCL_OP_AVAILABILITY(ShiftLeft , !detail::is_byte_v && not_fp) +__SYCL_OP_AVAILABILITY(ShiftRight , !detail::is_byte_v && not_fp) + +// Unary +__SYCL_OP_AVAILABILITY(std::negate , !detail::is_byte_v) +__SYCL_OP_AVAILABILITY(std::logical_not , !detail::is_byte_v) +__SYCL_OP_AVAILABILITY(std::bit_not , not_fp) +__SYCL_OP_AVAILABILITY(UnaryPlus , !detail::is_byte_v) +// clang-format on + +#undef __SYCL_OP_AVAILABILITY + +// clang-format off +#define __SYCL_PROCESS_BINARY_OPS(PROCESS_OP, DELIMITER) \ + PROCESS_OP(std::plus) \ +DELIMITER PROCESS_OP(std::minus) \ +DELIMITER PROCESS_OP(std::multiplies) \ +DELIMITER PROCESS_OP(std::divides) \ +DELIMITER PROCESS_OP(std::modulus) \ +DELIMITER PROCESS_OP(std::bit_and) \ +DELIMITER PROCESS_OP(std::bit_or) \ +DELIMITER PROCESS_OP(std::bit_xor) \ +DELIMITER PROCESS_OP(std::equal_to) \ +DELIMITER PROCESS_OP(std::not_equal_to) \ +DELIMITER PROCESS_OP(std::less) \ +DELIMITER PROCESS_OP(std::greater) \ +DELIMITER PROCESS_OP(std::less_equal) \ +DELIMITER PROCESS_OP(std::greater_equal) \ +DELIMITER PROCESS_OP(std::logical_and) \ +DELIMITER PROCESS_OP(std::logical_or) \ +DELIMITER PROCESS_OP(ShiftLeft) \ +DELIMITER PROCESS_OP(ShiftRight) + +#define __SYCL_PROCESS_BINARY_OPASSIGN_OPS(PROCESS_OP, DELIMITER) \ + PROCESS_OP(std::plus) \ +DELIMITER PROCESS_OP(std::minus) \ +DELIMITER PROCESS_OP(std::multiplies) \ +DELIMITER PROCESS_OP(std::divides) \ +DELIMITER PROCESS_OP(std::modulus) \ +DELIMITER PROCESS_OP(std::bit_and) \ +DELIMITER PROCESS_OP(std::bit_or) \ +DELIMITER PROCESS_OP(std::bit_xor) \ +DELIMITER PROCESS_OP(ShiftLeft) \ +DELIMITER PROCESS_OP(ShiftRight) + +#define __SYCL_PROCESS_UNARY_OPS(PROCESS_OP, DELIMITER) \ + PROCESS_OP(std::negate) \ +DELIMITER PROCESS_OP(std::logical_not) \ +DELIMITER PROCESS_OP(std::bit_not) \ +DELIMITER PROCESS_OP(UnaryPlus) +// clang-format on + +// Need to separate binop/opassign because const vec swizzles don't have the +// latter. +template +struct NonTemplateBinaryOpMixin {}; +template +struct NonTemplateBinaryOpAssignMixin {}; + +template class __SYCL_EBO Swizzle; + +template +struct SwizzleTemplateBinaryOpMixin {}; +template +struct SwizzleTemplateBinaryOpAssignMixin {}; + +#define __SYCL_BINARY_OP_MIXIN(OP, BINOP) \ + template \ + struct NonTemplateBinaryOpMixin< \ + Lhs, Rhs, Impl, DataT, OP, \ + std::enable_if_t>> { \ + friend auto operator BINOP(const Lhs &lhs, const Rhs &rhs) { \ + return Impl{}(lhs, rhs, OP{}); \ + } \ + }; \ + template \ + struct SwizzleTemplateBinaryOpMixin< \ + Self, VecT, DataT, N, OP, \ + std::enable_if_t>> { \ + template && \ + N == sizeof...(OtherIndexes)>> \ + friend auto \ + operator BINOP(const Self &lhs, \ + const Swizzle &rhs) { \ + using ResultVec = vec; \ + return OP{}(static_cast(lhs), static_cast(rhs)); \ + } \ + template && \ + N == sizeof...(OtherIndexes) && \ + std::is_const_v != std::is_const_v>> \ + friend auto operator BINOP(const Swizzle &lhs, \ + const Self &rhs) { \ + using ResultVec = vec; \ + return OP{}(static_cast(lhs), static_cast(rhs)); \ + } \ + }; + +#define __SYCL_BINARY_OP_AND_OPASSIGN_MIXIN(OP, BINOP, OPASSIGN) \ + __SYCL_BINARY_OP_MIXIN(OP, BINOP) \ + template \ + struct NonTemplateBinaryOpAssignMixin< \ + Lhs, Rhs, DataT, OP, std::enable_if_t>> { \ + friend Lhs &operator OPASSIGN(Lhs & lhs, const Rhs & rhs) { \ + lhs = OP{}(lhs, rhs); \ + return lhs; \ + } \ + }; \ + template \ + struct SwizzleTemplateBinaryOpAssignMixin< \ + Self, VecT, DataT, N, OP, \ + std::enable_if_t>> { \ + template && \ + N == sizeof...(OtherIndexes)>> \ + friend const Self & \ + operator OPASSIGN(const Self & lhs, \ + const Swizzle &rhs) { \ + using ResultVec = vec; \ + lhs = OP{}(static_cast(lhs), static_cast(rhs)); \ + return lhs; \ + } \ + template && \ + N == sizeof...(OtherIndexes) && \ + std::is_const_v != std::is_const_v>> \ + friend auto \ + operator OPASSIGN(const Swizzle &lhs, \ + const Self &rhs) { \ + using ResultVec = vec; \ + lhs = OP{}(static_cast(lhs), static_cast(rhs)); \ + return lhs; \ + } \ + }; + +template +struct UnaryOpMixin {}; + +#define __SYCL_UNARY_OP_MIXIN(OP, UOP) \ + template \ + struct UnaryOpMixin>> { \ + friend auto operator UOP(const T &x) { return Impl{}(x, OP{}); } \ + }; + +// clang-format off + __SYCL_BINARY_OP_AND_OPASSIGN_MIXIN(std::plus , +, +=) + __SYCL_BINARY_OP_AND_OPASSIGN_MIXIN(std::minus , -, -=) + __SYCL_BINARY_OP_AND_OPASSIGN_MIXIN(std::multiplies , *, *=) + __SYCL_BINARY_OP_AND_OPASSIGN_MIXIN(std::divides , /, /=) + __SYCL_BINARY_OP_AND_OPASSIGN_MIXIN(std::modulus , %, %=) + + __SYCL_BINARY_OP_AND_OPASSIGN_MIXIN(std::bit_and , &, &=) + __SYCL_BINARY_OP_AND_OPASSIGN_MIXIN(std::bit_or , |, |=) + __SYCL_BINARY_OP_AND_OPASSIGN_MIXIN(std::bit_xor , ^, ^=) + + __SYCL_BINARY_OP_MIXIN(std::equal_to , ==) + __SYCL_BINARY_OP_MIXIN(std::not_equal_to , !=) + __SYCL_BINARY_OP_MIXIN(std::less , <) + __SYCL_BINARY_OP_MIXIN(std::greater , >) + __SYCL_BINARY_OP_MIXIN(std::less_equal , <=) + __SYCL_BINARY_OP_MIXIN(std::greater_equal , >=) + + __SYCL_BINARY_OP_MIXIN(std::logical_and , &&) + __SYCL_BINARY_OP_MIXIN(std::logical_or , ||) + + // TODO: versions for std::byte + __SYCL_BINARY_OP_AND_OPASSIGN_MIXIN(ShiftLeft , <<, <<=) + __SYCL_BINARY_OP_AND_OPASSIGN_MIXIN(ShiftRight , >>, >>=) + + __SYCL_UNARY_OP_MIXIN(std::negate , -) + __SYCL_UNARY_OP_MIXIN(std::logical_not , !) + __SYCL_UNARY_OP_MIXIN(std::bit_not , ~) + __SYCL_UNARY_OP_MIXIN(UnaryPlus , +) +// clang-format on + +#undef __SYCL_OP_MIXIN +#undef __SYCL_BINARY_OP_AND_OPASSIGN_MIXIN +#undef __SYCL_BINARY_OP_MIXIN + +#define __SYCL_COMMA , + +// clang-format off +#define __SYCL_MIXIN_FOR_BINARY(OP) \ +public NonTemplateBinaryOpMixin + +#define __SYCL_MIXIN_FOR_BINARY_OPASSIGN(OP) \ +public NonTemplateBinaryOpAssignMixin + +#define __SYCL_MIXIN_FOR_TEMPLATE_BINARY(OP) \ +public SwizzleTemplateBinaryOpMixin + +#define __SYCL_MIXIN_FOR_TEMPLATE_BINARY_OPASSIGN(OP) \ +public SwizzleTemplateBinaryOpAssignMixin + +#define __SYCL_MIXIN_FOR_UNARY(OP) \ +public UnaryOpMixin +// clang-format on + +template +struct __SYCL_EBO NonTemplateBinaryOpsMixin + : __SYCL_PROCESS_BINARY_OPS(__SYCL_MIXIN_FOR_BINARY, __SYCL_COMMA) {}; + +template +struct __SYCL_EBO NonTemplateBinaryOpAssignOpsMixin + : __SYCL_PROCESS_BINARY_OPASSIGN_OPS(__SYCL_MIXIN_FOR_BINARY_OPASSIGN, + __SYCL_COMMA) {}; + +template +struct __SYCL_EBO SwizzleTemplateBinaryOpsMixin + : __SYCL_PROCESS_BINARY_OPS(__SYCL_MIXIN_FOR_TEMPLATE_BINARY, + __SYCL_COMMA) {}; + +template +struct __SYCL_EBO SwizzleTemplateBinaryOpAssignOpsMixin + : __SYCL_PROCESS_BINARY_OPASSIGN_OPS( + __SYCL_MIXIN_FOR_TEMPLATE_BINARY_OPASSIGN, __SYCL_COMMA) {}; + +template +struct __SYCL_EBO UnaryOpsMixin + : __SYCL_PROCESS_UNARY_OPS(__SYCL_MIXIN_FOR_UNARY, __SYCL_COMMA) {}; + +#undef __SYCL_MIXIN_FOR_UNARY +#undef __SYCL_MIXIN_FOR_TEMPLATE_BINARY_OPASSIGN +#undef __SYCL_MIXIN_FOR_BINARY_OPASSIGN +#undef __SYCL_MIXIN_FOR_TEMPLATE_BINARY +#undef __SYCL_MIXIN_FOR_BINARY + +#undef __SYCL_COMMA +#undef __SYCL_PROCESS_BINARY_OPS +#undef __SYCL_PROCESS_UNARY_OPS + +struct SwizzleImpl { +private: + template static constexpr int num_elements() { + if constexpr (is_vec_or_swizzle_v) + return T::size(); + else + return 1; + } + +public: + template + auto operator()(const T0 &Lhs, const T1 &Rhs, OpTy &&Op) { + static_assert(std::is_same_v, get_elem_type_t>); + constexpr auto N = (std::max)(num_elements(), num_elements()); + using ResultVec = vec, N>; + return Op(static_cast(Lhs), static_cast(Rhs)); + } + template auto operator()(const T &X, OpTy &&Op) { + using ResultVec = vec; + return Op(static_cast(X)); + } +}; + +struct VectorImpl { +private: +#ifdef __SYCL_DEVICE_ONLY__ + static constexpr bool is_host = false; +#else + static constexpr bool is_host = true; +#endif + + template static constexpr int num_elements() { + if constexpr (is_vec_or_swizzle_v) + return T::size(); + else + return 1; + } + +public: + template + auto operator()(const T0 &Lhs, const T1 &Rhs, OpTy &&Op) { + static_assert(std::is_same_v, get_elem_type_t>); + constexpr auto N = (std::max)(num_elements(), num_elements()); + using DataT = get_elem_type_t; + constexpr bool is_logical = + std::is_same_v> || + std::is_same_v> || + std::is_same_v> || + std::is_same_v> || + std::is_same_v> || + std::is_same_v> || + std::is_same_v> || + std::is_same_v>; + auto Get = [](const auto &a, int idx) { + if constexpr (is_vec_v>>) + return a[idx]; + else + return a; + }; + using ResultVec = + vec, + DataT>, + N>; + if constexpr (is_host || std::is_same_v || + std::is_same_v || is_logical) { + // TODO: Optimized device impl for `is_logical`. + ResultVec tmp{}; + for (int i = 0; i < N; ++i) + if constexpr (is_logical) + tmp[i] = Op(Get(Lhs, i), Get(Rhs, i)) ? -1 : 0; + else + tmp[i] = Op(Get(Lhs, i), Get(Rhs, i)); + return tmp; + } else { + using vec_t = vec; + using vector_t = typename vec_t::vector_t; + if constexpr (is_logical) { + return ResultVec{static_cast( + Op(static_cast(vec_t{Lhs}), + static_cast(vec_t{Rhs})))}; + } else { + return ResultVec{Op(static_cast(vec_t{Lhs}), + static_cast(vec_t{Rhs}))}; + } + } + } + template auto operator()(const T &X, OpTy &&Op) { + // TODO: optimized device impl. + constexpr bool is_logical = std::is_same_v>; + if constexpr (is_logical) { + vec, + T::size()> + tmp; + for (int i = 0; i < T::size(); ++i) + tmp[i] = Op(X[i]) ? -1 : 0; + return tmp; + } else { + T tmp; + for (int i = 0; i < T::size(); ++i) + tmp[i] = Op(X[i]); + return tmp; + } + } }; +template +struct __SYCL_EBO SwizzleMixins + : public NamedSwizzlesMixinConst, + public NonTemplateBinaryOpsMixin, + public NonTemplateBinaryOpsMixin, + public NonTemplateBinaryOpsMixin, SwizzleImpl, DataT>, + public NonTemplateBinaryOpsMixin, Self, SwizzleImpl, DataT>, + public UnaryOpsMixin, + public SwizzleTemplateBinaryOpsMixin {}; + +template +struct __SYCL_EBO SwizzleMixins + : public SwizzleMixins, + public NonTemplateBinaryOpAssignOpsMixin, + public NonTemplateBinaryOpAssignOpsMixin, + DataT>, + // The next line isn't in the spec (yet?) + public NonTemplateBinaryOpAssignOpsMixin, Self, DataT>, + public SwizzleTemplateBinaryOpAssignOpsMixin {}; + +template +inline constexpr bool is_assignable_swizzle = + !std::is_const_v && []() constexpr { + int Idxs[] = {Indexes...}; + for (std::size_t i = 1; i < sizeof...(Indexes); ++i) { + for (std::size_t j = 0; j < i; ++j) + if (Idxs[j] == Idxs[i]) + // Repeating index + return false; + } + + return true; + }(); + +template class __SYCL_EBO Swizzle; + +template +struct SwizzleBase { + const Self &operator=(const Self &) = delete; +}; + +template +struct SwizzleBase { + template + void load(size_t offset, + multi_ptr ptr) const { + vec v; + v.load(offset, ptr); + *static_cast(this) = v; + } + + template + std::enable_if_t && + sizeof...(OtherIndexes) == N, + const Self &> + operator=(const Swizzle &rhs) { + return (*this = static_cast>(rhs)); + } + + const Self &operator=(const vec &rhs) const { + for (int i = 0; i < N; ++i) + (*static_cast(this))[i] = rhs[i]; + + return *static_cast(this); + } + + const Self &operator=(const DataT &rhs) const { + for (int i = 0; i < N; ++i) + (*static_cast(this))[i] = rhs; + + return *static_cast(this); + } +}; + +template +class __SYCL_EBO Swizzle + : public SwizzleBase, typename VecT::element_type, + sizeof...(Indexes), + is_assignable_swizzle>, + public ScalarConversionOperatorMixIn, + typename VecT::element_type, + sizeof...(Indexes)>, + public IncDecMixin, + typename VecT::element_type, + is_assignable_swizzle>, + public ByteShiftsMixin, + const Swizzle, + typename VecT::element_type, sizeof...(Indexes), + is_assignable_swizzle>, + public SwizzleMixins, VecT, + typename VecT::element_type, sizeof...(Indexes), + is_assignable_swizzle> { + using DataT = typename VecT::element_type; + static constexpr int NumElements = sizeof...(Indexes); + using ResultVec = vec; + + // Get underlying vec index for (*this)[idx] access. + static constexpr auto get_vec_idx(int idx) { + int counter = 0; + int result = -1; + ((result = counter++ == idx ? Indexes : result), ...); + return result; + } + +public: + using SwizzleBase, typename VecT::element_type, + sizeof...(Indexes), + is_assignable_swizzle>::operator=; + + using element_type = DataT; + using value_type = DataT; + +#ifdef __SYCL_DEVICE_ONLY__ + using vector_t = typename ResultVec::vector_t; +#endif + + Swizzle() = delete; + Swizzle(const Swizzle &) = delete; + + explicit Swizzle(VecT &Vec) : Vec(Vec) {} + +#ifdef __SYCL_DEVICE_ONLY__ + operator vector_t() const { + return static_cast(static_cast(*this)); + } +#endif + + static constexpr size_t byte_size() noexcept { + return ResultVec::byte_size(); + } + static constexpr size_t size() noexcept { return ResultVec::size(); } + + __SYCL2020_DEPRECATED( + "get_size() is deprecated, please use byte_size() instead") + size_t get_size() const { return static_cast(*this).get_size(); } + + __SYCL2020_DEPRECATED("get_count() is deprecated, please use size() instead") + size_t get_count() const { + return static_cast(*this).get_count(); + }; + + template + vec convert() const { + return static_cast(*this) + .template convert(); + } + + template asT as() const { + return static_cast(*this).template as(); + } + + template + void store(size_t offset, + multi_ptr ptr) const { + return static_cast(*this).store(offset, ptr); + } + + operator ResultVec() const { return ResultVec{Vec[Indexes]...}; } + + template auto swizzle() const { + return Vec.template swizzle(); + } + + auto &operator[](int index) const { return Vec[get_vec_idx(index)]; } + +public: + VecT &Vec; +}; } // namespace detail ///////////////////////// class sycl::vec ///////////////////////// @@ -126,9 +775,28 @@ struct ScalarConversionOperatorMixIn> { // SYCL devices as well as in host C++ code. template class __SYCL_EBO vec - : public detail::vec_arith, - public detail::ScalarConversionOperatorMixIn, - DataT, NumElements> { + : public detail::ScalarConversionOperatorMixIn, + DataT, NumElements>, + public detail::IncDecMixin, DataT, + /* AllowAssignOps = */ true>, + public detail::ByteShiftsMixin< + vec, vec, DataT, NumElements, + /* AllowAssignOps = */ true>, + public detail::NamedSwizzlesMixinBoth, + NumElements>, + public detail::NonTemplateBinaryOpsMixin, + vec, + detail::VectorImpl, DataT>, + public detail::NonTemplateBinaryOpsMixin, DataT, + detail::VectorImpl, DataT>, + public detail::NonTemplateBinaryOpsMixin, + detail::VectorImpl, DataT>, + public detail::UnaryOpsMixin, detail::VectorImpl, + DataT>, + public detail::NonTemplateBinaryOpAssignOpsMixin< + vec, vec, DataT>, + public detail::NonTemplateBinaryOpAssignOpsMixin, + DataT, DataT> { static_assert(NumElements == 1 || NumElements == 2 || NumElements == 3 || NumElements == 4 || NumElements == 8 || NumElements == 16, @@ -172,7 +840,8 @@ class __SYCL_EBO vec static constexpr int getNumElements() { return NumElements; } - // SizeChecker is needed for vec(const argTN &... args) ctor to validate args. + // SizeChecker is needed for vec(const argTN &... args) ctor to validate + // args. template struct SizeChecker : std::conditional_t {}; @@ -189,36 +858,35 @@ class __SYCL_EBO vec VecToArray(const vec &V, std::index_sequence) { return {static_cast(V[Is])...}; } - template class T4, int... T5, std::size_t... Is> + template static constexpr std::array - VecToArray(const detail::SwizzleOp, T2, T3, T4, T5...> &V, + VecToArray(const detail::Swizzle, T5...> &V, std::index_sequence) { - return {static_cast(V.getValue(Is))...}; + return {static_cast(V[Is])...}; } - template class T4, int... T5, std::size_t... Is> + template static constexpr std::array - VecToArray(const detail::SwizzleOp, T2, T3, T4, T5...> &V, + VecToArray(const detail::Swizzle, T5...> &V, std::index_sequence) { - return {static_cast(V.getValue(Is))...}; + return {static_cast(V[Is])...}; } + template static constexpr std::array FlattenVecArgHelper(const vec &A) { return VecToArray(A, std::make_index_sequence()); } - template class T4, int... T5> - static constexpr std::array FlattenVecArgHelper( - const detail::SwizzleOp, T2, T3, T4, T5...> &A) { - return VecToArray(A, std::make_index_sequence()); + template + static constexpr std::array + FlattenVecArgHelper(const detail::Swizzle, Indexes_...> &A) { + return VecToArray(A, + std::make_index_sequence()); } - template class T4, int... T5> - static constexpr std::array FlattenVecArgHelper( - const detail::SwizzleOp, T2, T3, T4, T5...> &A) { - return VecToArray(A, std::make_index_sequence()); + template + static constexpr std::array + FlattenVecArgHelper(const detail::Swizzle, Indexes_...> &A) { + return VecToArray(A, + std::make_index_sequence()); } template static constexpr auto FlattenVecArgHelper(const T &A) { @@ -244,27 +912,23 @@ class __SYCL_EBO vec Counter + (num_elements) <= MaxValue, \ SizeChecker, \ std::false_type> {}; \ - template class T4, int... T5, \ + template \ - struct SizeChecker< \ - Counter, MaxValue, \ - detail::SwizzleOp, T2, T3, T4, T5...>, \ - tail...> \ + struct SizeChecker, Indexes_...>, \ + tail...> \ : std::conditional_t< \ - Counter + sizeof...(T5) <= MaxValue, \ - SizeChecker, \ + Counter + sizeof...(Indexes_) <= MaxValue, \ + SizeChecker, \ std::false_type> {}; \ - template class T4, int... T5, \ + template \ struct SizeChecker< \ Counter, MaxValue, \ - detail::SwizzleOp, T2, T3, T4, T5...>, \ - tail...> \ + detail::Swizzle, Indexes_...>, tail...> \ : std::conditional_t< \ - Counter + sizeof...(T5) <= MaxValue, \ - SizeChecker, \ + Counter + sizeof...(Indexes_) <= MaxValue, \ + SizeChecker, \ std::false_type> {}; __SYCL_ALLOW_VECTOR_SIZES(1) @@ -275,22 +939,20 @@ class __SYCL_EBO vec __SYCL_ALLOW_VECTOR_SIZES(16) #undef __SYCL_ALLOW_VECTOR_SIZES - // TypeChecker is needed for vec(const argTN &... args) ctor to validate args. + // TypeChecker is needed for vec(const argTN &... args) ctor to validate + // args. template struct TypeChecker : std::is_convertible {}; #define __SYCL_ALLOW_VECTOR_TYPES(num_elements) \ template \ struct TypeChecker, DataT_> : std::true_type {}; \ - template class T4, int... T5> \ + template \ + struct TypeChecker, Indexes_...>, \ + DataT_> : std::true_type {}; \ + template \ struct TypeChecker< \ - detail::SwizzleOp, T2, T3, T4, T5...>, DataT_> \ - : std::true_type {}; \ - template class T4, int... T5> \ - struct TypeChecker< \ - detail::SwizzleOp, T2, T3, T4, T5...>, \ - DataT_> : std::true_type {}; + detail::Swizzle, Indexes_...>, DataT_> \ + : std::true_type {}; __SYCL_ALLOW_VECTOR_TYPES(1) __SYCL_ALLOW_VECTOR_TYPES(2) @@ -300,15 +962,10 @@ class __SYCL_EBO vec __SYCL_ALLOW_VECTOR_TYPES(16) #undef __SYCL_ALLOW_VECTOR_TYPES - template - using Swizzle = - detail::SwizzleOp, detail::GetOp, - detail::GetOp, Indexes...>; + template using Swizzle = detail::Swizzle; template - using ConstSwizzle = - detail::SwizzleOp, detail::GetOp, - detail::GetOp, Indexes...>; + using ConstSwizzle = detail::Swizzle; // Shortcuts for args validation in vec(const argTN &... args) ctor. template @@ -526,38 +1183,18 @@ class __SYCL_EBO vec template asT as() const { return sycl::bit_cast(*this); } template Swizzle swizzle() { - return this; + return Swizzle{*this}; } template ConstSwizzle swizzle() const { - return this; + return ConstSwizzle{*this}; } const DataT &operator[](int i) const { return m_Data[i]; } DataT &operator[](int i) { return m_Data[i]; } - // Begin hi/lo, even/odd, xyzw, and rgba swizzles. @{ -private: - // Indexer used in the swizzles.def - // Currently it is defined as a template struct. Replacing it with a constexpr - // function would activate a bug in MSVC that is fixed only in v19.20. - // Until then MSVC does not recognize such constexpr functions as const and - // thus does not let using them in template parameters inside swizzle.def. - template struct Indexer { - static constexpr int value = Index; - }; - -public: -#ifdef __SYCL_ACCESS_RETURN -#error "Undefine __SYCL_ACCESS_RETURN macro" -#endif -#define __SYCL_ACCESS_RETURN this -#include "swizzles.def" -#undef __SYCL_ACCESS_RETURN - // }@ End of hi/lo, even/odd, xyzw, and rgba swizzles. - template void load(size_t Offset, multi_ptr Ptr) { for (int I = 0; I < NumElements; I++) { @@ -619,15 +1256,8 @@ class __SYCL_EBO vec // the element type in bytes multiplied by the number of elements." static constexpr int alignment = (std::min)((size_t)64, sizeof(DataType)); alignas(alignment) DataType m_Data; - // friends - template class T4, - int... T5> - friend class detail::SwizzleOp; template friend class __SYCL_EBO vec; - // To allow arithmetic operators access private members of vec. - template friend class detail::vec_arith; - template friend class detail::vec_arith_common; }; ///////////////////////// class sycl::vec ///////////////////////// @@ -638,845 +1268,5 @@ template vec; #endif -namespace detail { - -// Special type for working SwizzleOp with scalars, stores a scalar and gives -// the scalar at any index. Provides interface is compatible with SwizzleOp -// operations -template class GetScalarOp { -public: - using DataT = T; - GetScalarOp(DataT Data) : m_Data(Data) {} - DataT getValue(size_t) const { return m_Data; } - -private: - DataT m_Data; -}; -template -using rel_t = detail::select_cl_scalar_integral_signed_t; - -template struct EqualTo { - constexpr rel_t operator()(const T &Lhs, const T &Rhs) const { - return (Lhs == Rhs) ? -1 : 0; - } -}; - -template struct NotEqualTo { - constexpr rel_t operator()(const T &Lhs, const T &Rhs) const { - return (Lhs != Rhs) ? -1 : 0; - } -}; - -template struct GreaterEqualTo { - constexpr rel_t operator()(const T &Lhs, const T &Rhs) const { - return (Lhs >= Rhs) ? -1 : 0; - } -}; - -template struct LessEqualTo { - constexpr rel_t operator()(const T &Lhs, const T &Rhs) const { - return (Lhs <= Rhs) ? -1 : 0; - } -}; - -template struct GreaterThan { - constexpr rel_t operator()(const T &Lhs, const T &Rhs) const { - return (Lhs > Rhs) ? -1 : 0; - } -}; - -template struct LessThan { - constexpr rel_t operator()(const T &Lhs, const T &Rhs) const { - return (Lhs < Rhs) ? -1 : 0; - } -}; - -template struct LogicalAnd { - constexpr rel_t operator()(const T &Lhs, const T &Rhs) const { - return (Lhs && Rhs) ? -1 : 0; - } -}; - -template struct LogicalOr { - constexpr rel_t operator()(const T &Lhs, const T &Rhs) const { - return (Lhs || Rhs) ? -1 : 0; - } -}; - -template struct RShift { - constexpr T operator()(const T &Lhs, const T &Rhs) const { - return Lhs >> Rhs; - } -}; - -template struct LShift { - constexpr T operator()(const T &Lhs, const T &Rhs) const { - return Lhs << Rhs; - } -}; - -///////////////////////// class SwizzleOp ///////////////////////// -// SwizzleOP represents expression templates that operate on vec. -// Actual computation performed on conversion or assignment operators. -template class OperationCurrentT, int... Indexes> -class SwizzleOp { - using DataT = typename VecT::element_type; - // Certain operators return a vector with a different element type. Also, the - // left and right operand types may differ. CommonDataT selects a result type - // based on these types to ensure that the result value can be represented. - // - // Example 1: - // sycl::vec vec{...}; - // auto result = 300u + vec.x(); - // - // CommonDataT is std::common_type_t since - // it's larger than unsigned char. - // - // Example 2: - // sycl::vec vec{...}; - // auto result = vec.template swizzle() && vec; - // - // CommonDataT is DataT since operator&& returns a vector with element type - // int8_t, which is larger than bool. - // - // Example 3: - // sycl::vec vec{...}; auto swlo = vec.lo(); - // auto result = swlo == swlo; - // - // CommonDataT is DataT since operator== returns a vector with element type - // int8_t, which is the same size as std::byte. std::common_type_t - // can't be used here since there's no type that int8_t and std::byte can both - // be implicitly converted to. - using OpLeftDataT = typename OperationLeftT::DataT; - using OpRightDataT = typename OperationRightT::DataT; - using CommonDataT = std::conditional_t< - sizeof(DataT) >= sizeof(std::common_type_t), - DataT, std::common_type_t>; - static constexpr int getNumElements() { return sizeof...(Indexes); } - - using rel_t = detail::rel_t; - using vec_t = vec; - using vec_rel_t = vec; - - template class OperationCurrentT_, int... Idx_> - using NewLHOp = SwizzleOp, - OperationRightT_, OperationCurrentT_, Idx_...>; - - template class OperationCurrentT_, int... Idx_> - using NewRelOp = SwizzleOp, - SwizzleOp, - OperationRightT_, OperationCurrentT_, Idx_...>; - - template class OperationCurrentT_, int... Idx_> - using NewRHOp = SwizzleOp, - OperationCurrentT_, Idx_...>; - - template - using EnableIfOneIndex = typename std::enable_if_t< - 1 == IdxNum && SwizzleOp::getNumElements() == IdxNum, T>; - - template - using EnableIfMultipleIndexes = typename std::enable_if_t< - 1 != IdxNum && SwizzleOp::getNumElements() == IdxNum, T>; - - template - using EnableIfScalarType = typename std::enable_if_t< - std::is_convertible_v && - (std::is_fundamental_v || - detail::is_half_or_bf16_v>)>; - - template - using EnableIfNoScalarType = typename std::enable_if_t< - !std::is_convertible_v || - !(std::is_fundamental_v || - detail::is_half_or_bf16_v>)>; - - template - using Swizzle = - SwizzleOp, GetOp, GetOp, Indices...>; - - template - using ConstSwizzle = - SwizzleOp, GetOp, GetOp, Indices...>; - -public: - using element_type = DataT; - using value_type = DataT; - -#ifdef __SYCL_DEVICE_ONLY__ - using vector_t = typename vec_t::vector_t; -#endif // __SYCL_DEVICE_ONLY__ - - const DataT &operator[](int i) const { - std::array Idxs{Indexes...}; - return (*m_Vector)[Idxs[i]]; - } - - template - std::enable_if_t, DataT> &operator[](int i) { - std::array Idxs{Indexes...}; - return (*m_Vector)[Idxs[i]]; - } - - __SYCL2020_DEPRECATED("get_count() is deprecated, please use size() instead") - size_t get_count() const { return size(); } - static constexpr size_t size() noexcept { return getNumElements(); } - - template - __SYCL2020_DEPRECATED( - "get_size() is deprecated, please use byte_size() instead") - size_t get_size() const { - return byte_size(); - } - - template size_t byte_size() const noexcept { - return sizeof(DataT) * (Num == 3 ? 4 : Num); - } - - template , - typename = EnableIfScalarType> - operator T() const { - return getValue(0); - } - - template > - friend NewRHOp, std::multiplies, Indexes...> - operator*(const T &Lhs, const SwizzleOp &Rhs) { - return NewRHOp, std::multiplies, Indexes...>( - Rhs.m_Vector, GetScalarOp(Lhs), Rhs); - } - - template > - friend NewRHOp, std::plus, Indexes...> - operator+(const T &Lhs, const SwizzleOp &Rhs) { - return NewRHOp, std::plus, Indexes...>( - Rhs.m_Vector, GetScalarOp(Lhs), Rhs); - } - - template > - friend NewRHOp, std::divides, Indexes...> - operator/(const T &Lhs, const SwizzleOp &Rhs) { - return NewRHOp, std::divides, Indexes...>( - Rhs.m_Vector, GetScalarOp(Lhs), Rhs); - } - - // TODO: Check that Rhs arg is suitable. -#ifdef __SYCL_OPASSIGN -#error "Undefine __SYCL_OPASSIGN macro." -#endif -#define __SYCL_OPASSIGN(OPASSIGN, OP) \ - friend const SwizzleOp &operator OPASSIGN(const SwizzleOp & Lhs, \ - const DataT & Rhs) { \ - Lhs.operatorHelper(vec_t(Rhs)); \ - return Lhs; \ - } \ - template \ - friend const SwizzleOp &operator OPASSIGN(const SwizzleOp & Lhs, \ - const RhsOperation & Rhs) { \ - Lhs.operatorHelper(Rhs); \ - return Lhs; \ - } \ - friend const SwizzleOp &operator OPASSIGN(const SwizzleOp & Lhs, \ - const vec_t & Rhs) { \ - Lhs.operatorHelper(Rhs); \ - return Lhs; \ - } - - __SYCL_OPASSIGN(+=, std::plus) - __SYCL_OPASSIGN(-=, std::minus) - __SYCL_OPASSIGN(*=, std::multiplies) - __SYCL_OPASSIGN(/=, std::divides) - __SYCL_OPASSIGN(%=, std::modulus) - __SYCL_OPASSIGN(&=, std::bit_and) - __SYCL_OPASSIGN(|=, std::bit_or) - __SYCL_OPASSIGN(^=, std::bit_xor) - __SYCL_OPASSIGN(>>=, RShift) - __SYCL_OPASSIGN(<<=, LShift) -#undef __SYCL_OPASSIGN - -#ifdef __SYCL_UOP -#error "Undefine __SYCL_UOP macro" -#endif -#define __SYCL_UOP(UOP, OPASSIGN) \ - friend const SwizzleOp &operator UOP(const SwizzleOp & sv) { \ - sv OPASSIGN static_cast(1); \ - return sv; \ - } \ - friend vec_t operator UOP(const SwizzleOp &sv, int) { \ - vec_t Ret = sv; \ - sv OPASSIGN static_cast(1); \ - return Ret; \ - } - - __SYCL_UOP(++, +=) - __SYCL_UOP(--, -=) -#undef __SYCL_UOP - - template - friend typename std::enable_if_t< - std::is_same_v && !detail::is_vgenfloat_v, vec_t> - operator~(const SwizzleOp &Rhs) { - vec_t Tmp = Rhs; - return ~Tmp; - } - - friend vec_rel_t operator!(const SwizzleOp &Rhs) { - vec_t Tmp = Rhs; - return !Tmp; - } - - friend vec_t operator+(const SwizzleOp &Rhs) { - vec_t Tmp = Rhs; - return +Tmp; - } - - friend vec_t operator-(const SwizzleOp &Rhs) { - vec_t Tmp = Rhs; - return -Tmp; - } - -// scalar BINOP vec<> -// scalar BINOP SwizzleOp -// vec<> BINOP SwizzleOp -#ifdef __SYCL_BINOP -#error "Undefine __SYCL_BINOP macro" -#endif -#define __SYCL_BINOP(BINOP, COND) \ - template \ - friend std::enable_if_t<(COND), vec_t> operator BINOP( \ - const DataT & Lhs, const SwizzleOp & Rhs) { \ - vec_t Tmp = Rhs; \ - return Lhs BINOP Tmp; \ - } \ - template \ - friend std::enable_if_t<(COND), vec_t> operator BINOP(const SwizzleOp & Lhs, \ - const DataT & Rhs) { \ - vec_t Tmp = Lhs; \ - return Tmp BINOP Rhs; \ - } \ - template \ - friend std::enable_if_t<(COND), vec_t> operator BINOP( \ - const vec_t & Lhs, const SwizzleOp & Rhs) { \ - vec_t Tmp = Rhs; \ - return Lhs BINOP Tmp; \ - } \ - template \ - friend std::enable_if_t<(COND), vec_t> operator BINOP(const SwizzleOp & Lhs, \ - const vec_t & Rhs) { \ - vec_t Tmp = Lhs; \ - return Tmp BINOP Rhs; \ - } - - __SYCL_BINOP(+, (!detail::is_byte_v)) - __SYCL_BINOP(-, (!detail::is_byte_v)) - __SYCL_BINOP(*, (!detail::is_byte_v)) - __SYCL_BINOP(/, (!detail::is_byte_v)) - __SYCL_BINOP(%, (!detail::is_byte_v)) - __SYCL_BINOP(&, true) - __SYCL_BINOP(|, true) - __SYCL_BINOP(^, true) - // We have special <<, >> operators for std::byte. - __SYCL_BINOP(>>, (!detail::is_byte_v)) - __SYCL_BINOP(<<, (!detail::is_byte_v)) - - template - friend std::enable_if_t, vec_t> - operator>>(const SwizzleOp &Lhs, const int shift) { - vec_t Tmp = Lhs; - return Tmp >> shift; - } - - template - friend std::enable_if_t, vec_t> - operator<<(const SwizzleOp &Lhs, const int shift) { - vec_t Tmp = Lhs; - return Tmp << shift; - } -#undef __SYCL_BINOP - -// scalar RELLOGOP vec<> -// scalar RELLOGOP SwizzleOp -// vec<> RELLOGOP SwizzleOp -#ifdef __SYCL_RELLOGOP -#error "Undefine __SYCL_RELLOGOP macro" -#endif -#define __SYCL_RELLOGOP(RELLOGOP, COND) \ - template \ - friend std::enable_if_t<(COND), vec_rel_t> operator RELLOGOP( \ - const DataT & Lhs, const SwizzleOp & Rhs) { \ - vec_t Tmp = Rhs; \ - return Lhs RELLOGOP Tmp; \ - } \ - template \ - friend std::enable_if_t<(COND), vec_rel_t> operator RELLOGOP( \ - const SwizzleOp & Lhs, const DataT & Rhs) { \ - vec_t Tmp = Lhs; \ - return Tmp RELLOGOP Rhs; \ - } \ - template \ - friend std::enable_if_t<(COND), vec_rel_t> operator RELLOGOP( \ - const vec_t & Lhs, const SwizzleOp & Rhs) { \ - vec_t Tmp = Rhs; \ - return Lhs RELLOGOP Tmp; \ - } \ - template \ - friend std::enable_if_t<(COND), vec_rel_t> operator RELLOGOP( \ - const SwizzleOp & Lhs, const vec_t & Rhs) { \ - vec_t Tmp = Lhs; \ - return Tmp RELLOGOP Rhs; \ - } - - __SYCL_RELLOGOP(==, (!detail::is_byte_v)) - __SYCL_RELLOGOP(!=, (!detail::is_byte_v)) - __SYCL_RELLOGOP(>, (!detail::is_byte_v)) - __SYCL_RELLOGOP(<, (!detail::is_byte_v)) - __SYCL_RELLOGOP(>=, (!detail::is_byte_v)) - __SYCL_RELLOGOP(<=, (!detail::is_byte_v)) - __SYCL_RELLOGOP(&&, (!detail::is_byte_v && !detail::is_vgenfloat_v)) - __SYCL_RELLOGOP(||, (!detail::is_byte_v && !detail::is_vgenfloat_v)) -#undef __SYCL_RELLOGOP - - template > - SwizzleOp &operator=(const vec &Rhs) { - std::array Idxs{Indexes...}; - for (size_t I = 0; I < Idxs.size(); ++I) { - (*m_Vector)[Idxs[I]] = Rhs[I]; - } - return *this; - } - - template > - SwizzleOp &operator=(const DataT &Rhs) { - std::array Idxs{Indexes...}; - (*m_Vector)[Idxs[0]] = Rhs; - return *this; - } - - template = true> - SwizzleOp &operator=(const DataT &Rhs) { - std::array Idxs{Indexes...}; - for (auto Idx : Idxs) { - (*m_Vector)[Idx] = Rhs; - } - return *this; - } - - template > - SwizzleOp &operator=(DataT &&Rhs) { - std::array Idxs{Indexes...}; - (*m_Vector)[Idxs[0]] = Rhs; - return *this; - } - - template > - NewLHOp, std::multiplies, Indexes...> - operator*(const T &Rhs) const { - return NewLHOp, std::multiplies, Indexes...>( - m_Vector, *this, GetScalarOp(Rhs)); - } - - template > - NewLHOp - operator*(const RhsOperation &Rhs) const { - return NewLHOp(m_Vector, *this, - Rhs); - } - - template > - NewLHOp, std::plus, Indexes...> operator+(const T &Rhs) const { - return NewLHOp, std::plus, Indexes...>(m_Vector, *this, - GetScalarOp(Rhs)); - } - - template > - NewLHOp - operator+(const RhsOperation &Rhs) const { - return NewLHOp(m_Vector, *this, Rhs); - } - - template > - NewLHOp, std::minus, Indexes...> - operator-(const T &Rhs) const { - return NewLHOp, std::minus, Indexes...>(m_Vector, *this, - GetScalarOp(Rhs)); - } - - template > - NewLHOp - operator-(const RhsOperation &Rhs) const { - return NewLHOp(m_Vector, *this, Rhs); - } - - template > - NewLHOp, std::divides, Indexes...> - operator/(const T &Rhs) const { - return NewLHOp, std::divides, Indexes...>( - m_Vector, *this, GetScalarOp(Rhs)); - } - - template > - NewLHOp - operator/(const RhsOperation &Rhs) const { - return NewLHOp(m_Vector, *this, - Rhs); - } - - template > - NewLHOp, std::modulus, Indexes...> - operator%(const T &Rhs) const { - return NewLHOp, std::modulus, Indexes...>( - m_Vector, *this, GetScalarOp(Rhs)); - } - - template > - NewLHOp - operator%(const RhsOperation &Rhs) const { - return NewLHOp(m_Vector, *this, - Rhs); - } - - template > - NewLHOp, std::bit_and, Indexes...> - operator&(const T &Rhs) const { - return NewLHOp, std::bit_and, Indexes...>( - m_Vector, *this, GetScalarOp(Rhs)); - } - - template > - NewLHOp - operator&(const RhsOperation &Rhs) const { - return NewLHOp(m_Vector, *this, - Rhs); - } - - template > - NewLHOp, std::bit_or, Indexes...> - operator|(const T &Rhs) const { - return NewLHOp, std::bit_or, Indexes...>( - m_Vector, *this, GetScalarOp(Rhs)); - } - - template > - NewLHOp - operator|(const RhsOperation &Rhs) const { - return NewLHOp(m_Vector, *this, Rhs); - } - - template > - NewLHOp, std::bit_xor, Indexes...> - operator^(const T &Rhs) const { - return NewLHOp, std::bit_xor, Indexes...>( - m_Vector, *this, GetScalarOp(Rhs)); - } - - template > - NewLHOp - operator^(const RhsOperation &Rhs) const { - return NewLHOp(m_Vector, *this, - Rhs); - } - - template > - NewLHOp, RShift, Indexes...> operator>>(const T &Rhs) const { - return NewLHOp, RShift, Indexes...>(m_Vector, *this, - GetScalarOp(Rhs)); - } - - template > - NewLHOp - operator>>(const RhsOperation &Rhs) const { - return NewLHOp(m_Vector, *this, Rhs); - } - - template > - NewLHOp, LShift, Indexes...> operator<<(const T &Rhs) const { - return NewLHOp, LShift, Indexes...>(m_Vector, *this, - GetScalarOp(Rhs)); - } - - template > - NewLHOp - operator<<(const RhsOperation &Rhs) const { - return NewLHOp(m_Vector, *this, Rhs); - } - - template < - typename T1, typename T2, typename T3, template class T4, - int... T5, - typename = typename std::enable_if_t> - SwizzleOp &operator=(const SwizzleOp &Rhs) { - std::array Idxs{Indexes...}; - for (size_t I = 0; I < Idxs.size(); ++I) { - (*m_Vector)[Idxs[I]] = Rhs.getValue(I); - } - return *this; - } - - template < - typename T1, typename T2, typename T3, template class T4, - int... T5, - typename = typename std::enable_if_t> - SwizzleOp &operator=(SwizzleOp &&Rhs) { - std::array Idxs{Indexes...}; - for (size_t I = 0; I < Idxs.size(); ++I) { - (*m_Vector)[Idxs[I]] = Rhs.getValue(I); - } - return *this; - } - - template > - NewRelOp, EqualTo, Indexes...> operator==(const T &Rhs) const { - return NewRelOp, EqualTo, Indexes...>(NULL, *this, - GetScalarOp(Rhs)); - } - - template > - NewRelOp - operator==(const RhsOperation &Rhs) const { - return NewRelOp(NULL, *this, Rhs); - } - - template > - NewRelOp, NotEqualTo, Indexes...> - operator!=(const T &Rhs) const { - return NewRelOp, NotEqualTo, Indexes...>( - NULL, *this, GetScalarOp(Rhs)); - } - - template > - NewRelOp - operator!=(const RhsOperation &Rhs) const { - return NewRelOp(NULL, *this, Rhs); - } - - template > - NewRelOp, GreaterEqualTo, Indexes...> - operator>=(const T &Rhs) const { - return NewRelOp, GreaterEqualTo, Indexes...>( - NULL, *this, GetScalarOp(Rhs)); - } - - template > - NewRelOp - operator>=(const RhsOperation &Rhs) const { - return NewRelOp(NULL, *this, Rhs); - } - - template > - NewRelOp, LessEqualTo, Indexes...> - operator<=(const T &Rhs) const { - return NewRelOp, LessEqualTo, Indexes...>( - NULL, *this, GetScalarOp(Rhs)); - } - - template > - NewRelOp - operator<=(const RhsOperation &Rhs) const { - return NewRelOp(NULL, *this, Rhs); - } - - template > - NewRelOp, GreaterThan, Indexes...> - operator>(const T &Rhs) const { - return NewRelOp, GreaterThan, Indexes...>( - NULL, *this, GetScalarOp(Rhs)); - } - - template > - NewRelOp - operator>(const RhsOperation &Rhs) const { - return NewRelOp(NULL, *this, Rhs); - } - - template > - NewRelOp, LessThan, Indexes...> operator<(const T &Rhs) const { - return NewRelOp, LessThan, Indexes...>(NULL, *this, - GetScalarOp(Rhs)); - } - - template > - NewRelOp - operator<(const RhsOperation &Rhs) const { - return NewRelOp(NULL, *this, Rhs); - } - - template > - NewRelOp, LogicalAnd, Indexes...> - operator&&(const T &Rhs) const { - return NewRelOp, LogicalAnd, Indexes...>( - NULL, *this, GetScalarOp(Rhs)); - } - - template > - NewRelOp - operator&&(const RhsOperation &Rhs) const { - return NewRelOp(NULL, *this, Rhs); - } - - template > - NewRelOp, LogicalOr, Indexes...> - operator||(const T &Rhs) const { - return NewRelOp, LogicalOr, Indexes...>(NULL, *this, - GetScalarOp(Rhs)); - } - - template > - NewRelOp - operator||(const RhsOperation &Rhs) const { - return NewRelOp(NULL, *this, Rhs); - } - - // Begin hi/lo, even/odd, xyzw, and rgba swizzles. -private: - // Indexer used in the swizzles.def. - // Currently it is defined as a template struct. Replacing it with a constexpr - // function would activate a bug in MSVC that is fixed only in v19.20. - // Until then MSVC does not recognize such constexpr functions as const and - // thus does not let using them in template parameters inside swizzle.def. - template struct Indexer { - static constexpr int IDXs[sizeof...(Indexes)] = {Indexes...}; - static constexpr int value = IDXs[Index >= getNumElements() ? 0 : Index]; - }; - -public: -#ifdef __SYCL_ACCESS_RETURN -#error "Undefine __SYCL_ACCESS_RETURN macro" -#endif -#define __SYCL_ACCESS_RETURN m_Vector -#include "swizzles.def" -#undef __SYCL_ACCESS_RETURN - // End of hi/lo, even/odd, xyzw, and rgba swizzles. - - // Leave store() interface to automatic conversion to vec<>. - // Load to vec_t and then assign to swizzle. - template - void load(size_t offset, multi_ptr ptr) { - vec_t Tmp; - Tmp.load(offset, ptr); - *this = Tmp; - } - - template - vec convert() const { - // First materialize the swizzle to vec_t and then apply convert() to it. - vec_t Tmp; - std::array Idxs{Indexes...}; - for (size_t I = 0; I < Idxs.size(); ++I) { - Tmp[I] = (*m_Vector)[Idxs[I]]; - } - return Tmp.template convert(); - } - - template asT as() const { - // First materialize the swizzle to vec_t and then apply as() to it. - vec_t Tmp = *this; - static_assert((sizeof(Tmp) == sizeof(asT)), - "The new SYCL vec type must have the same storage size in " - "bytes as this SYCL swizzled vec"); - static_assert( - detail::is_contained::value || - detail::is_contained::value, - "asT must be SYCL vec of a different element type and " - "number of elements specified by asT"); - return Tmp.template as(); - } - -private: - SwizzleOp(const SwizzleOp &Rhs) - : m_Vector(Rhs.m_Vector), m_LeftOperation(Rhs.m_LeftOperation), - m_RightOperation(Rhs.m_RightOperation) {} - - SwizzleOp(VecT *Vector, OperationLeftT LeftOperation, - OperationRightT RightOperation) - : m_Vector(Vector), m_LeftOperation(LeftOperation), - m_RightOperation(RightOperation) {} - - SwizzleOp(VecT *Vector) : m_Vector(Vector) {} - - SwizzleOp(SwizzleOp &&Rhs) - : m_Vector(Rhs.m_Vector), m_LeftOperation(std::move(Rhs.m_LeftOperation)), - m_RightOperation(std::move(Rhs.m_RightOperation)) {} - - // Either performing CurrentOperation on results of left and right operands - // or reading values from actual vector. Perform implicit type conversion when - // the number of elements == 1 - - template - CommonDataT getValue(EnableIfOneIndex Index) const { - if (std::is_same_v, GetOp>) { - std::array Idxs{Indexes...}; - return (*m_Vector)[Idxs[Index]]; - } - auto Op = OperationCurrentT(); - return Op(m_LeftOperation.getValue(Index), - m_RightOperation.getValue(Index)); - } - - template - DataT getValue(EnableIfMultipleIndexes Index) const { - if (std::is_same_v, GetOp>) { - std::array Idxs{Indexes...}; - return (*m_Vector)[Idxs[Index]]; - } - auto Op = OperationCurrentT(); - return Op(m_LeftOperation.getValue(Index), - m_RightOperation.getValue(Index)); - } - - template