From 0aa7a9a1b624a5b42255d199782e1b0fe6f7f996 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Thu, 18 Apr 2024 16:28:46 -0700 Subject: [PATCH 01/18] Add copy constructor --- sycl/source/detail/scheduler/commands.cpp | 1 + xpti/src/xpti_proxy.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 056a4239fde1c..3a48fd50f8259 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -81,6 +81,7 @@ struct DemangleHandle { char *p; DemangleHandle(char *ptr) : p(ptr) {} + DemangleHandle(const DemangleHandle &) = delete; DemangleHandle &operator=(const DemangleHandle &) = delete; ~DemangleHandle() { std::free(p); } diff --git a/xpti/src/xpti_proxy.cpp b/xpti/src/xpti_proxy.cpp index a09b970060033..9d91a10bd5d00 100644 --- a/xpti/src/xpti_proxy.cpp +++ b/xpti/src/xpti_proxy.cpp @@ -100,6 +100,7 @@ class ProxyLoader { tryToEnable(); } + ProxyLoader(const ProxyLoader &) = delete; ProxyLoader &operator=(const ProxyLoader &) = delete; ~ProxyLoader() { From 15f309495cc0f9d004dba467e44816e429d75685 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Tue, 28 May 2024 17:22:46 -0700 Subject: [PATCH 02/18] Refactor vector/byte.cpp E2E test --- sycl/test-e2e/Basic/vector/byte.cpp | 449 +++++++++++++++------------- 1 file changed, 249 insertions(+), 200 deletions(-) diff --git a/sycl/test-e2e/Basic/vector/byte.cpp b/sycl/test-e2e/Basic/vector/byte.cpp index 4611da03110fa..c64817007f1e6 100644 --- a/sycl/test-e2e/Basic/vector/byte.cpp +++ b/sycl/test-e2e/Basic/vector/byte.cpp @@ -19,7 +19,7 @@ #include // std::ignore int main() { - std::byte bt{7}; + std::byte bt{2}; // constructors sycl::vec vb1(bt); sycl::vec vb2{bt, bt}; @@ -47,8 +47,20 @@ int main() { auto cnv = vi2.convert(); auto cnv2 = vb1.convert(); + assert(cnv[0] == std::byte{1} && cnv[1] == std::byte{1}); + assert(cnv2[0] == 3); + auto asint = vb2.template as>(); auto asbyte = vi2.template as>(); + + // 0000 0010 0000 0010 = 514 + assert(asint[0] == 514); + + // 0000 0000 0000 0001 0000 0000 0000 0001 + assert(asbyte[0] == std::byte{1} && asbyte[1] == std::byte{0} && + asbyte[2] == std::byte{0} && asbyte[3] == std::byte{0} && + asbyte[4] == std::byte{1} && asbyte[5] == std::byte{0} && + asbyte[6] == std::byte{0} && asbyte[7] == std::byte{0}); } // load() and store() @@ -78,7 +90,7 @@ int main() { .wait(); } assert(std_vec[0] == std::byte{2}); - assert(std_vec[1] == std::byte{7}); + assert(std_vec[1] == std::byte{2}); // swizzle { @@ -92,212 +104,249 @@ int main() { // hi/lo, even/odd sycl::vec vbsw(std::byte{0}, std::byte{1}, std::byte{2}, std::byte{3}); + sycl::vec vbswhi = vbsw.hi(); - assert(vbswhi[0] == std::byte{2}); + assert(vbswhi[0] == std::byte{2} && vbswhi[1] == std::byte{3}); + vbswhi = vbsw.lo(); + assert(vbswhi[0] == std::byte{0} && vbswhi[1] == std::byte{1}); + vbswhi = vbsw.odd(); + assert(vbswhi[0] == std::byte{1} && vbswhi[1] == std::byte{3}); + vbswhi = vbsw.even(); + assert(vbswhi[0] == std::byte{0} && vbswhi[1] == std::byte{2}); } // operatorOP for vec and for swizzle { - sycl::vec vop1{std::byte{4}, std::byte{9}, std::byte{25}}; - sycl::vec vop2{std::byte{2}, std::byte{3}, std::byte{5}}; - sycl::vec vop3{std::byte{5}, std::byte{6}, std::byte{2}, - std::byte{3}}; - - // binary op for 2 vec - auto vop = vop1 + vop2; - assert(vop[0] == std::byte{6}); - vop = vop1 - vop2; - vop = vop1 * vop2; - vop = vop1 / vop2; - assert(vop[0] == std::byte{2}); - vop = vop1 % vop2; - - // binary op for 2 swizzle - auto swlo = vop3.lo(); - auto swhi = vop3.hi(); - auto swplus = swlo + swhi; - sycl::vec vec_test = swplus; - assert(vec_test.x() == std::byte{7} && vec_test.y() == std::byte{9}); - auto swominus = swlo - swhi; - auto swmul = swlo * swhi; - vec_test = swmul; - assert(vec_test.x() == std::byte{10} && vec_test.y() == std::byte{18}); - auto swdiv = swlo / swhi; - - // binary op for 1 vec - vop = vop1 + std::byte{3}; - vop = vop1 - std::byte{3}; - assert(vop[1] == std::byte{6}); - vop = vop1 * std::byte{3}; - vop = vop1 / std::byte{3}; - vop = vop1 % std::byte{3}; - assert(vop[0] == std::byte{1}); - - vop = std::byte{3} + vop1; - assert(vop[0] == std::byte{7}); - vop = std::byte{3} - vop1; - vop = std::byte{3} * vop1; - assert(vop[2] == std::byte{75}); - vop = std::byte{3} / vop1; - - // binary op for 1 swizzle - auto swplus1 = swlo + std::byte{3}; - auto swminus1 = swlo - std::byte{3}; - vec_test = swminus1; - assert(vec_test.x() == std::byte{2} && vec_test.y() == std::byte{3}); - auto swmul1 = swlo * std::byte{3}; - auto swdiv1 = swlo / std::byte{3}; - vec_test = swdiv1; - assert(vec_test.x() == std::byte{1} && vec_test.y() == std::byte{2}); - - auto swplus2 = std::byte{3} + swlo; - vec_test = swplus2; - assert(vec_test.x() == std::byte{8} && vec_test.y() == std::byte{9}); - auto swminus2 = std::byte{3} - swlo; - auto swmul2 = std::byte{3} * swlo; - vec_test = swmul2; - assert(vec_test.x() == std::byte{15} && vec_test.y() == std::byte{18}); - auto swdiv2 = std::byte{3} / swlo; - - // operatorOP= for 2 vec - sycl::vec vbuf{std::byte{4}, std::byte{5}, std::byte{6}}; - vop = vbuf += vop1; - assert(vop[0] == std::byte{8}); - vop = vbuf -= vop1; - vop = vbuf *= vop1; - vop = vbuf /= vop1; - vop = vbuf %= vop1; - - // operatorOP= for 2 swizzle - swlo += swhi; - swlo -= swhi; - vec_test = swlo; - assert(vec_test.x() == std::byte{5} && vec_test.y() == std::byte{6}); - swlo *= swhi; - swlo /= swhi; - swlo %= swhi; - - // operatorOP= for 1 vec - vop = vop1 += std::byte{3}; - assert(vop[0] == std::byte{7}); - vop = vop1 -= std::byte{3}; - vop = vop1 *= std::byte{3}; - vop = vop1 /= std::byte{3}; - vop = vop1 %= std::byte{3}; - - // operatorOP= for 1 swizzle - - swlo += std::byte{3}; - swlo -= std::byte{1}; - vec_test = swlo; - assert(vec_test.x() == std::byte{3} && vec_test.y() == std::byte{2}); - swlo *= std::byte{3}; - swlo /= std::byte{3}; - swlo %= std::byte{3}; - - // unary operator++ and -- for vec - vop1 = sycl::vec(std::byte{4}, std::byte{9}, std::byte{25}); - vop1++; - vop1--; - vop = ++vop1; - assert(vop[2] == std::byte{26}); - --vop1; - - // unary operator++ and -- for swizzle - swlo++; - swlo--; - vec_test = swlo; - assert(vec_test.x() == std::byte{0} && vec_test.y() == std::byte{2}); - - // logical binary op for 2 vec - vop = vop1 & vop2; - vop = vop1 | vop2; - vop = vop1 ^ vop2; - - // logical binary op for 2 swizzle - auto swand = swlo & swhi; - auto swor = swlo | swhi; - auto swxor = swlo ^ swhi; - - // logical binary op for 1 vec - vop = vop1 & std::byte{3}; - vop = vop1 | std::byte{3}; - vop = vop1 ^ std::byte{3}; - vop = std::byte{3} & vop1; - vop = std::byte{3} | vop1; - vop = std::byte{3} ^ vop1; - - // logical binary op for 1 swizzle - auto swand2 = swlo & std::byte{3}; - auto swor2 = swlo | std::byte{3}; - auto swxor2 = swlo ^ std::byte{3}; - - auto swand3 = std::byte{3} & swlo; - auto swor3 = std::byte{3} | swlo; - auto swxor3 = std::byte{3} ^ swlo; - - // bit binary op for 2 vec - vop = vop1 && vop2; - vop = vop1 || vop2; - vop = vop1 >> vop2; - vop = vop1 << vop2; - - vop = vop1 >> std::byte{3}; - vop = vop1 << std::byte{3}; - vop = std::byte{3} >> vop1; - vop = std::byte{3} << vop1; - - // bit binary op for 2 swizzle - swlo >> swhi; - swlo << swhi; - swlo >> std::byte{3}; - swlo << std::byte{3}; - auto right = std::byte{3} >> swhi; - auto left = std::byte{3} << swhi; - - // condition op for 2 vec - auto vres = vop1 == vop2; - vres = vop1 != vop2; - vres = vop1 > vop2; - vres = vop1 < vop2; - vres = vop1 >= vop2; - vres = vop1 <= vop2; - - vres = vop1 == std::byte{3}; - vres = vop1 != std::byte{3}; - vres = vop1 > std::byte{3}; - vres = vop1 < std::byte{3}; - vres = vop1 >= std::byte{3}; - vres = vop1 <= std::byte{3}; - - vres = std::byte{3} == vop1; - vres = std::byte{3} != vop1; - vres = std::byte{3} > vop1; - vres = std::byte{3} < vop1; - vres = std::byte{3} >= vop1; - vres = std::byte{3} <= vop1; - - // condition op for 2 swizzle - auto swres = swhi == swlo; - auto swres1 = swhi != swlo; - auto swres2 = swhi > swlo; - auto swres3 = swhi < swlo; - auto swres4 = swhi >= swlo; - auto swres5 = swhi <= swlo; - auto swres6 = swhi == std::byte{3}; - auto swres7 = swhi != std::byte{3}; - auto swres8 = swhi > std::byte{3}; - auto swres9 = swhi < std::byte{3}; - auto swres10 = swhi >= std::byte{3}; - auto swres11 = swhi <= std::byte{3}; - - sycl::vec voptest{std::byte{4}, std::byte{9}, std::byte{25}}; - auto bitv1 = ~vop3; - auto bitv2 = !vop3; - auto bitw = ~swhi; + sycl::vec VecByte3A{std::byte{4}, std::byte{9}, + std::byte{25}}; + sycl::vec VecByte3B{std::byte{2}, std::byte{3}, std::byte{5}}; + sycl::vec VecByte4A{std::byte{5}, std::byte{6}, std::byte{2}, + std::byte{3}}; + + // Test bitwise operations on vec and swizzles. + // Adding asserts on vec<> operations, and not swizzle operations, + // should suffice as swizzles just delegates the operation to vec<> + // class. + { + auto SwizByte2A = VecByte4A.lo(); + auto SwizByte2B = VecByte4A.hi(); + + // logical binary op for 2 vec + auto VecByte3And = VecByte3A & VecByte3B; + auto VecByte3Or = VecByte3A | VecByte3B; + auto VecByte3Xor = VecByte3A ^ VecByte3B; + assert(VecByte3And[0] == (VecByte3A[0] & VecByte3B[0])); + assert(VecByte3Or[1] == (VecByte3A[1] | VecByte3B[1])); + assert(VecByte3Xor[2] == (VecByte3A[2] ^ VecByte3B[2])); + + // logical binary op for 2 swizzle + auto swand = SwizByte2A & SwizByte2B; + auto swor = SwizByte2A | SwizByte2B; + auto swxor = SwizByte2A ^ SwizByte2B; + + // Check order of operands for bitwise operators. + auto BitWiseAnd1 = VecByte3A & std::byte{3}; + auto BitWiseOr1 = VecByte3A | std::byte{3}; + auto BitWiseXor1 = VecByte3A ^ std::byte{3}; + auto BitWiseAnd2 = std::byte{3} & VecByte3A; + auto BitWiseOr2 = std::byte{3} | VecByte3A; + auto BitWiseXor2 = std::byte{3} ^ VecByte3A; + assert(BitWiseAnd1[0] == BitWiseAnd2[0]); + assert(BitWiseOr1[1] == BitWiseOr2[1]); + assert(BitWiseXor1[2] == BitWiseXor2[2]); + + // logical binary op for 1 swizzle + auto swand2 = SwizByte2A & std::byte{3}; + auto swor2 = SwizByte2A | std::byte{3}; + auto swxor2 = SwizByte2A ^ std::byte{3}; + + auto swand3 = std::byte{3} & SwizByte2A; + auto swor3 = std::byte{3} | SwizByte2A; + auto swxor3 = std::byte{3} ^ SwizByte2A; + + // bit-wise negation test + auto VecByte4Neg = ~VecByte4A; + assert(VecByte4Neg[0] == ~VecByte4A[0]); + + auto bitw = ~SwizByte2B; + } + + // std::byte is not an arithmetic type or a character type, so std::byte + // and vec should not support artithmetic operations. In the + // new implementation of vec<> class, the following will be removed. + { + // binary op for 2 vec + auto vop = VecByte3A + VecByte3B; + assert(vop[0] == std::byte{6}); + vop = VecByte3A - VecByte3B; + vop = VecByte3A * VecByte3B; + vop = VecByte3A / VecByte3B; + assert(vop[0] == std::byte{2}); + vop = VecByte3A % VecByte3B; + + // binary op for 2 swizzle + auto swlo = VecByte4A.lo(); + auto swhi = VecByte4A.hi(); + auto swplus = swlo + swhi; + sycl::vec vec_test = swplus; + assert(vec_test.x() == std::byte{7} && vec_test.y() == std::byte{9}); + auto swominus = swlo - swhi; + auto swmul = swlo * swhi; + vec_test = swmul; + assert(vec_test.x() == std::byte{10} && vec_test.y() == std::byte{18}); + auto swdiv = swlo / swhi; + + // binary op for 1 vec + vop = VecByte3A + std::byte{3}; + vop = VecByte3A - std::byte{3}; + assert(vop[1] == std::byte{6}); + vop = VecByte3A * std::byte{3}; + vop = VecByte3A / std::byte{3}; + vop = VecByte3A % std::byte{3}; + assert(vop[0] == std::byte{1}); + + vop = std::byte{3} + VecByte3A; + assert(vop[0] == std::byte{7}); + vop = std::byte{3} - VecByte3A; + vop = std::byte{3} * VecByte3A; + assert(vop[2] == std::byte{75}); + vop = std::byte{3} / VecByte3A; + + // binary op for 1 swizzle + auto swplus1 = swlo + std::byte{3}; + auto swminus1 = swlo - std::byte{3}; + vec_test = swminus1; + assert(vec_test.x() == std::byte{2} && vec_test.y() == std::byte{3}); + auto swmul1 = swlo * std::byte{3}; + auto swdiv1 = swlo / std::byte{3}; + vec_test = swdiv1; + assert(vec_test.x() == std::byte{1} && vec_test.y() == std::byte{2}); + + auto swplus2 = std::byte{3} + swlo; + vec_test = swplus2; + assert(vec_test.x() == std::byte{8} && vec_test.y() == std::byte{9}); + auto swminus2 = std::byte{3} - swlo; + auto swmul2 = std::byte{3} * swlo; + vec_test = swmul2; + assert(vec_test.x() == std::byte{15} && vec_test.y() == std::byte{18}); + auto swdiv2 = std::byte{3} / swlo; + + // operatorOP= for 2 vec + sycl::vec vbuf{std::byte{4}, std::byte{5}, std::byte{6}}; + vop = vbuf += VecByte3A; + assert(vop[0] == std::byte{8}); + vop = vbuf -= VecByte3A; + vop = vbuf *= VecByte3A; + vop = vbuf /= VecByte3A; + vop = vbuf %= VecByte3A; + + // operatorOP= for 2 swizzle + swlo += swhi; + swlo -= swhi; + vec_test = swlo; + assert(vec_test.x() == std::byte{5} && vec_test.y() == std::byte{6}); + swlo *= swhi; + swlo /= swhi; + swlo %= swhi; + + // operatorOP= for 1 vec + vop = VecByte3A += std::byte{3}; + assert(vop[0] == std::byte{7}); + vop = VecByte3A -= std::byte{3}; + vop = VecByte3A *= std::byte{3}; + vop = VecByte3A /= std::byte{3}; + vop = VecByte3A %= std::byte{3}; + + // operatorOP= for 1 swizzle + swlo += std::byte{3}; + swlo -= std::byte{1}; + vec_test = swlo; + assert(vec_test.x() == std::byte{3} && vec_test.y() == std::byte{2}); + swlo *= std::byte{3}; + swlo /= std::byte{3}; + swlo %= std::byte{3}; + + // unary operator++ and -- for vec + VecByte3A = + sycl::vec(std::byte{4}, std::byte{9}, std::byte{25}); + VecByte3A++; + VecByte3A--; + vop = ++VecByte3A; + assert(vop[2] == std::byte{26}); + --VecByte3A; + + // unary operator++ and -- for swizzle + swlo++; + swlo--; + vec_test = swlo; + assert(vec_test.x() == std::byte{0} && vec_test.y() == std::byte{2}); + } + + // Logical operations on vec and swizzles. + { + // condition op for 2 vec + auto vres = VecByte3A == VecByte3B; + vres = VecByte3A != VecByte3B; + vres = VecByte3A > VecByte3B; + vres = VecByte3A < VecByte3B; + vres = VecByte3A >= VecByte3B; + vres = VecByte3A <= VecByte3B; + + vres = VecByte3A == std::byte{3}; + vres = VecByte3A != std::byte{3}; + vres = VecByte3A > std::byte{3}; + vres = VecByte3A < std::byte{3}; + vres = VecByte3A >= std::byte{3}; + vres = VecByte3A <= std::byte{3}; + + vres = std::byte{3} == VecByte3A; + vres = std::byte{3} != VecByte3A; + vres = std::byte{3} > VecByte3A; + vres = std::byte{3} < VecByte3A; + vres = std::byte{3} >= VecByte3A; + vres = std::byte{3} <= VecByte3A; + + auto swlo = VecByte4A.lo(); + auto swhi = VecByte4A.hi(); + + // condition op for 2 swizzle + auto swres = swhi == swlo; + auto swres1 = swhi != swlo; + auto swres2 = swhi > swlo; + auto swres3 = swhi < swlo; + auto swres4 = swhi >= swlo; + auto swres5 = swhi <= swlo; + auto swres6 = swhi == std::byte{3}; + auto swres7 = swhi != std::byte{3}; + auto swres8 = swhi > std::byte{3}; + auto swres9 = swhi < std::byte{3}; + auto swres10 = swhi >= std::byte{3}; + auto swres11 = swhi <= std::byte{3}; + + // bit binary operations + auto vop = VecByte3A && VecByte3B; + vop = VecByte3A || VecByte3B; + + auto vop1 = VecByte3A >> VecByte3B; + vop1 = VecByte3A << VecByte3B; + + vop1 = VecByte3A >> std::byte{3}; + vop1 = VecByte3A << std::byte{3}; + vop1 = std::byte{3} >> VecByte3A; + vop1 = std::byte{3} << VecByte3A; + + swlo >> swhi; + swlo << swhi; + swlo >> std::byte{3}; + swlo << std::byte{3}; + auto right = std::byte{3} >> swhi; + auto left = std::byte{3} << swhi; + + auto bitv2 = !VecByte4A; + } } return 0; From 534176061e031f1c72089bd8ade2049901314f11 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Tue, 28 May 2024 18:29:39 -0700 Subject: [PATCH 03/18] Restrict vec and swizzle opperations to types mentioned in the SPEC --- .../sycl/detail/generic_type_traits.hpp | 10 + sycl/include/sycl/vector_preview.hpp | 333 +++++++++++------- sycl/test-e2e/Basic/vector/byte.cpp | 27 ++ 3 files changed, 245 insertions(+), 125 deletions(-) diff --git a/sycl/include/sycl/detail/generic_type_traits.hpp b/sycl/include/sycl/detail/generic_type_traits.hpp index 3b0ce7988f576..57f571e1a842f 100644 --- a/sycl/include/sycl/detail/generic_type_traits.hpp +++ b/sycl/include/sycl/detail/generic_type_traits.hpp @@ -252,6 +252,16 @@ inline constexpr bool is_genfloatptr_marray_v = (IsDecorated == access::decorated::yes || IsDecorated == access::decorated::no); +template +using is_byte_t = typename +#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) + std::is_same; +#else + std::false_type; +#endif + +template inline constexpr bool is_byte_v = is_byte_t::value; + template using make_floating_point_t = make_type_t; diff --git a/sycl/include/sycl/vector_preview.hpp b/sycl/include/sycl/vector_preview.hpp index f1bf7fcfcc24d..8c1660ec1d338 100644 --- a/sycl/include/sycl/vector_preview.hpp +++ b/sycl/include/sycl/vector_preview.hpp @@ -983,13 +983,21 @@ template class vec { } } -#ifdef __SYCL_BINOP -#error "Undefine __SYCL_BINOP macro" +#if defined(__SYCL_BINOP) || defined(BINOP_BASE) +#error "Undefine __SYCL_BINOP and BINOP_BASE macro" #endif -#ifdef __SYCL_USE_EXT_VECTOR_TYPE__ -#define __SYCL_BINOP(BINOP, OPASSIGN, CONVERT) \ - friend vec operator BINOP(const vec &Lhs, const vec &Rhs) { \ +// Note: vec<>/SwizzleOp logical value is 0/-1 logic, as opposed to 0/1 logic. +// As far as CTS validation is concerned, 0/-1 logic also applies when +// NumElements is equal to one, which is somewhat inconsistent with being +// transparent with scalar data. +// TODO: Determine if vec<, NumElements=1> is needed at all, remove this +// inconsistency if not by disallowing one-element vectors (as in OpenCL) +#ifdef __SYCL_DEVICE_ONLY__ +#define BINOP_BASE(BINOP, OPASSIGN, CONVERT, COND) \ + template \ + friend typename std::enable_if_t<(COND), vec> operator BINOP( \ + const vec & Lhs, const vec & Rhs) { \ vec Ret; \ if constexpr (IsUsingArrayOnDevice) { \ for (size_t I = 0; I < NumElements; ++I) { \ @@ -1002,28 +1010,13 @@ template class vec { } \ } \ return Ret; \ - } \ - friend vec operator BINOP(const vec &Lhs, const DataT &Rhs) { \ - return Lhs BINOP vec(Rhs); \ - } \ - friend vec operator BINOP(const DataT &Lhs, const vec &Rhs) { \ - return vec(Lhs) BINOP Rhs; \ - } \ - friend vec &operator OPASSIGN(vec & Lhs, const vec & Rhs) { \ - Lhs = Lhs BINOP Rhs; \ - return Lhs; \ - } \ - template \ - friend typename std::enable_if_t operator OPASSIGN( \ - vec & Lhs, const DataT & Rhs) { \ - Lhs = Lhs BINOP vec(Rhs); \ - return Lhs; \ } +#else // __SYCL_DEVICE_ONLY__ -#else // __SYCL_USE_EXT_VECTOR_TYPE__ - -#define __SYCL_BINOP(BINOP, OPASSIGN, CONVERT) \ - friend vec operator BINOP(const vec &Lhs, const vec &Rhs) { \ +#define BINOP_BASE(BINOP, OPASSIGN, CONVERT, COND) \ + template \ + friend typename std::enable_if_t<(COND), vec> operator BINOP( \ + const vec & Lhs, const vec & Rhs) { \ vec Ret{}; \ if constexpr (NativeVec) \ Ret.m_Data = Lhs.m_Data BINOP Rhs.m_Data; \ @@ -1032,41 +1025,87 @@ template class vec { Ret.setValue(I, (DataT)(vec_data::get(Lhs.getValue( \ I)) BINOP vec_data::get(Rhs.getValue(I)))); \ return Ret; \ - } \ - friend vec operator BINOP(const vec &Lhs, const DataT &Rhs) { \ + } +#endif // __SYCL_DEVICE_ONLY__ + +#define __SYCL_BINOP(BINOP, OPASSIGN, CONVERT, COND) \ + BINOP_BASE(BINOP, OPASSIGN, CONVERT, COND) \ + \ + template \ + friend typename std::enable_if_t<(COND), vec> operator BINOP( \ + const vec & Lhs, const DataT & Rhs) { \ return Lhs BINOP vec(Rhs); \ } \ - friend vec operator BINOP(const DataT &Lhs, const vec &Rhs) { \ + template \ + friend typename std::enable_if_t<(COND), vec> operator BINOP( \ + const DataT & Lhs, const vec & Rhs) { \ return vec(Lhs) BINOP Rhs; \ } \ - friend vec &operator OPASSIGN(vec & Lhs, const vec & Rhs) { \ + template \ + friend typename std::enable_if_t<(COND), vec> &operator OPASSIGN( \ + vec & Lhs, const vec & Rhs) { \ Lhs = Lhs BINOP Rhs; \ return Lhs; \ } \ - template \ - friend typename std::enable_if_t operator OPASSIGN( \ - vec & Lhs, const DataT & Rhs) { \ + template \ + friend typename std::enable_if_t<(Num != 1) && (COND), vec &> \ + operator OPASSIGN(vec & Lhs, const DataT & Rhs) { \ Lhs = Lhs BINOP vec(Rhs); \ return Lhs; \ } -#endif // __SYCL_USE_EXT_VECTOR_TYPE__ - - __SYCL_BINOP(+, +=, true) - __SYCL_BINOP(-, -=, true) - __SYCL_BINOP(*, *=, false) - __SYCL_BINOP(/, /=, false) - - // TODO: The following OPs are available only when: DataT != cl_float && - // DataT != cl_double && DataT != cl_half - __SYCL_BINOP(%, %=, false) - __SYCL_BINOP(|, |=, false) - __SYCL_BINOP(&, &=, false) - __SYCL_BINOP(^, ^=, false) - __SYCL_BINOP(>>, >>=, false) - __SYCL_BINOP(<<, <<=, true) + // std::byte is not an arithmetic type. + __SYCL_BINOP(+, +=, true, (!detail::is_byte_v)) + __SYCL_BINOP(-, -=, true, (!detail::is_byte_v)) + __SYCL_BINOP(*, *=, false, (!detail::is_byte_v)) + __SYCL_BINOP(/, /=, false, (!detail::is_byte_v)) + + // The following OPs are available only when: DataT != cl_float && + // DataT != cl_double && DataT != cl_half && DataT != BF16. + __SYCL_BINOP(%, %=, false, + (!detail::is_vgenfloat_v && (!detail::is_byte_v))) + // Bitwise operations are allowed for std::byte. + __SYCL_BINOP(|, |=, false, (!detail::is_vgenfloat_v)) + __SYCL_BINOP(&, &=, false, (!detail::is_vgenfloat_v)) + __SYCL_BINOP(^, ^=, false, (!detail::is_vgenfloat_v)) + __SYCL_BINOP(>>, >>=, false, + (!detail::is_vgenfloat_v && (!detail::is_byte_v))) + __SYCL_BINOP(<<, <<=, true, + (!detail::is_vgenfloat_v && (!detail::is_byte_v))) + +#undef BINOP_BASE #undef __SYCL_BINOP -#undef __SYCL_BINOP_HELP + + // Special <<, >> operators for std::byte. + // std::byte is not an arithmetic type and it only supports the following + // overloads of >> and << operators. + // + // 1 template + // constexpr std::byte operator<<( std::byte b, IntegerType shift ) + // noexcept; + // 2 template + // constexpr std::byte operator>>( std::byte b, IntegerType shift ) + // noexcept; +#define __SYCL_SHIFT_BYTE(OP, OPASSIGN) \ + template \ + friend typename std::enable_if_t<(detail::is_byte_v), vec> operator OP( \ + const vec & Lhs, int shift) { \ + vec Ret; \ + for (size_t I = 0; I < NumElements; ++I) { \ + Ret[I] = Lhs[I] OP shift; \ + } \ + return Ret; \ + } \ + template \ + friend typename std::enable_if_t<(detail::is_byte_v), vec &> \ + operator OPASSIGN(vec & Lhs, int shift) { \ + Lhs = Lhs OP shift; \ + return Lhs; \ + } + + __SYCL_SHIFT_BYTE(<<, <<=) + __SYCL_SHIFT_BYTE(>>, >>=) +#undef __SYCL_SHIFT_BYTE // Note: vec<>/SwizzleOp logical value is 0/-1 logic, as opposed to 0/1 logic. // As far as CTS validation is concerned, 0/-1 logic also applies when @@ -1075,15 +1114,15 @@ template class vec { // TODO: Determine if vec<, NumElements=1> is needed at all, remove this // inconsistency if not by disallowing one-element vectors (as in OpenCL) -#ifdef __SYCL_RELLOGOP -#error "Undefine __SYCL_RELLOGOP macro" +#if defined(__SYCL_RELLOGOP) || defined(RELLOGOP_BASE) +#error "Undefine __SYCL_RELLOGOP and RELLOGOP_BASE macro." #endif -// Use __SYCL_DEVICE_ONLY__ macro because cast to OpenCL vector type is defined -// by SYCL device compiler only. + #ifdef __SYCL_DEVICE_ONLY__ -#define __SYCL_RELLOGOP(RELLOGOP) \ - friend vec operator RELLOGOP(const vec & Lhs, \ - const vec & Rhs) { \ +#define RELLOGOP_BASE(RELLOGOP, COND) \ + template \ + friend typename std::enable_if_t<(COND), vec> \ + operator RELLOGOP(const vec & Lhs, const vec & Rhs) { \ vec Ret{}; \ /* This special case is needed since there are no standard operator|| */ \ /* or operator&& functions for std::array. */ \ @@ -1104,20 +1143,12 @@ template class vec { Ret *= -1; \ } \ return Ret; \ - } \ - friend vec operator RELLOGOP(const vec & Lhs, \ - const DataT & Rhs) { \ - return Lhs RELLOGOP vec(Rhs); \ - } \ - friend vec operator RELLOGOP(const DataT & Lhs, \ - const vec & Rhs) { \ - return vec(Lhs) RELLOGOP Rhs; \ } - -#else -#define __SYCL_RELLOGOP(RELLOGOP) \ - friend vec operator RELLOGOP(const vec & Lhs, \ - const vec & Rhs) { \ +#else // __SYCL_DEVICE_ONLY__ +#define RELLOGOP_BASE(RELLOGOP, COND) \ + template \ + friend typename std::enable_if_t<(COND), vec> \ + operator RELLOGOP(const vec & Lhs, const vec & Rhs) { \ vec Ret{}; \ for (size_t I = 0; I < NumElements; ++I) { \ /* We cannot use SetValue here as the operator is not a friend of*/ \ @@ -1126,49 +1157,64 @@ template class vec { Lhs.getValue(I)) RELLOGOP vec_data::get(Rhs.getValue(I)))); \ } \ return Ret; \ - } \ - friend vec operator RELLOGOP(const vec & Lhs, \ - const DataT & Rhs) { \ + } +#endif + +#define __SYCL_RELLOGOP(RELLOGOP, COND) \ + RELLOGOP_BASE(RELLOGOP, COND) \ + \ + template \ + friend typename std::enable_if_t<(COND), vec> \ + operator RELLOGOP(const vec & Lhs, const DataT & Rhs) { \ return Lhs RELLOGOP vec(Rhs); \ } \ - friend vec operator RELLOGOP(const DataT & Lhs, \ - const vec & Rhs) { \ + template \ + friend typename std::enable_if_t<(COND), vec> \ + operator RELLOGOP(const DataT & Lhs, const vec & Rhs) { \ return vec(Lhs) RELLOGOP Rhs; \ } -#endif - __SYCL_RELLOGOP(==) - __SYCL_RELLOGOP(!=) - __SYCL_RELLOGOP(>) - __SYCL_RELLOGOP(<) - __SYCL_RELLOGOP(>=) - __SYCL_RELLOGOP(<=) - // TODO: limit to integral types. - __SYCL_RELLOGOP(&&) - __SYCL_RELLOGOP(||) + // OP is: ==, !=, <, >, <=, >=, &&, || + // vec operatorOP(const vec &Rhs) const; + // vec operatorOP(const DataT &Rhs) const; + __SYCL_RELLOGOP(==, (!detail::is_byte_v)) + __SYCL_RELLOGOP(!=, (!detail::is_byte_v)) + __SYCL_RELLOGOP(>, (!detail::is_byte_v)) + __SYCL_RELLOGOP(<, (!detail::is_byte_v)) + __SYCL_RELLOGOP(>=, (!detail::is_byte_v)) + __SYCL_RELLOGOP(<=, (!detail::is_byte_v)) + + // Only available to integral types. + __SYCL_RELLOGOP(&&, (!detail::is_vgenfloat_v) && (!detail::is_byte_v)) + __SYCL_RELLOGOP(||, (!detail::is_vgenfloat_v) && (!detail::is_byte_v)) #undef __SYCL_RELLOGOP +#undef RELLOGOP_BASE #ifdef __SYCL_UOP #error "Undefine __SYCL_UOP macro" #endif -#define __SYCL_UOP(UOP, OPASSIGN) \ - friend vec &operator UOP(vec & Rhs) { \ +#define __SYCL_UOP(UOP, OPASSIGN, COND) \ + template \ + friend typename std::enable_if_t<(COND), vec &> operator UOP(vec & Rhs) { \ Rhs OPASSIGN vec_data::get(1); \ return Rhs; \ } \ - friend vec operator UOP(vec &Lhs, int) { \ + template \ + friend typename std::enable_if_t<(COND), vec> operator UOP(vec & Lhs, int) { \ vec Ret(Lhs); \ Lhs OPASSIGN vec_data::get(1); \ return Ret; \ } - __SYCL_UOP(++, +=) - __SYCL_UOP(--, -=) + __SYCL_UOP(++, +=, (!detail::is_byte_v)) + __SYCL_UOP(--, -=, (!detail::is_byte_v)) #undef __SYCL_UOP // operator~() available only when: dataT != float && dataT != double // && dataT != half - friend vec operator~(const vec &Rhs) { + template + friend typename std::enable_if_t, vec> + operator~(const vec &Rhs) { if constexpr (IsUsingArrayOnDevice || IsUsingArrayOnHost) { vec Ret{}; for (size_t I = 0; I < NumElements; ++I) { @@ -1184,8 +1230,11 @@ template class vec { } } - // operator! - friend vec, NumElements> operator!(const vec &Rhs) { + // operator!. Not available for std::byte. + template + friend typename std::enable_if_t<(!detail::is_byte_v), + vec, NumElements>> + operator!(const vec &Rhs) { if constexpr (IsUsingArrayOnDevice || IsUsingArrayOnHost) { vec Ret{}; for (size_t I = 0; I < NumElements; ++I) { @@ -1207,8 +1256,10 @@ template class vec { } } - // operator + - friend vec operator+(const vec &Lhs) { + // operator +. Not available for std::byte as it is not an arithmetic type. + template + friend typename std::enable_if_t<(!detail::is_byte_v), vec> + operator+(const vec &Lhs) { if constexpr (IsUsingArrayOnDevice || IsUsingArrayOnHost) { vec Ret{}; for (size_t I = 0; I < NumElements; ++I) @@ -1220,8 +1271,10 @@ template class vec { } } - // operator - - friend vec operator-(const vec &Lhs) { + // operator -. Not available for std::byte as it is not an arithmetic type. + template + friend typename std::enable_if_t<(!detail::is_byte_v), vec> + operator-(const vec &Lhs) { namespace oneapi = sycl::ext::oneapi; vec Ret{}; if constexpr (IsBfloat16 && NumElements == 1) { @@ -1661,7 +1714,7 @@ class SwizzleOp { template friend typename std::enable_if_t< - std::is_same_v && std::is_integral_v>, vec_t> + std::is_same_v && !detail::is_vgenfloat_v, vec_t> operator~(const SwizzleOp &Rhs) { vec_t Tmp = Rhs; return ~Tmp; @@ -1688,34 +1741,57 @@ class SwizzleOp { #ifdef __SYCL_BINOP #error "Undefine __SYCL_BINOP macro" #endif -#define __SYCL_BINOP(BINOP) \ - friend vec_t operator BINOP(const DataT &Lhs, const SwizzleOp &Rhs) { \ +#define __SYCL_BINOP(BINOP, COND) \ + template \ + friend typename std::enable_if_t<(COND), vec_t> operator BINOP( \ + const DataT & Lhs, const SwizzleOp & Rhs) { \ vec_t Tmp = Rhs; \ return Lhs BINOP Tmp; \ } \ - friend vec_t operator BINOP(const SwizzleOp &Lhs, const DataT &Rhs) { \ + template \ + friend typename std::enable_if_t<(COND), vec_t> operator BINOP( \ + const SwizzleOp & Lhs, const DataT & Rhs) { \ vec_t Tmp = Lhs; \ return Tmp BINOP Rhs; \ } \ - friend vec_t operator BINOP(const vec_t &Lhs, const SwizzleOp &Rhs) { \ + template \ + friend typename std::enable_if_t<(COND), vec_t> operator BINOP( \ + const vec_t & Lhs, const SwizzleOp & Rhs) { \ vec_t Tmp = Rhs; \ return Lhs BINOP Tmp; \ } \ - friend vec_t operator BINOP(const SwizzleOp &Lhs, const vec_t &Rhs) { \ + template \ + friend typename std::enable_if_t<(COND), vec_t> operator BINOP( \ + const SwizzleOp & Lhs, const vec_t & Rhs) { \ vec_t Tmp = Lhs; \ return Tmp BINOP Rhs; \ } - __SYCL_BINOP(+) - __SYCL_BINOP(-) - __SYCL_BINOP(*) - __SYCL_BINOP(/) - __SYCL_BINOP(%) - __SYCL_BINOP(&) - __SYCL_BINOP(|) - __SYCL_BINOP(^) - __SYCL_BINOP(>>) - __SYCL_BINOP(<<) + __SYCL_BINOP(+, (!detail::is_byte_v)) + __SYCL_BINOP(-, (!detail::is_byte_v)) + __SYCL_BINOP(*, (!detail::is_byte_v)) + __SYCL_BINOP(/, (!detail::is_byte_v)) + __SYCL_BINOP(%, (!detail::is_byte_v)) + __SYCL_BINOP(&, true) + __SYCL_BINOP(|, true) + __SYCL_BINOP(^, true) + // We have special <<, >> operators for std::byte. + __SYCL_BINOP(>>, (!detail::is_byte_v)) + __SYCL_BINOP(<<, (!detail::is_byte_v)) + + template + friend typename std::enable_if_t, vec_t> + operator>>(const SwizzleOp &Lhs, const int shift) { + vec_t Tmp = Lhs; + return Tmp >> shift; + } + + template + friend typename std::enable_if_t, vec_t> + operator<<(const SwizzleOp &Lhs, const int shift) { + vec_t Tmp = Lhs; + return Tmp << shift; + } #undef __SYCL_BINOP // scalar RELLOGOP vec<> @@ -1724,33 +1800,40 @@ class SwizzleOp { #ifdef __SYCL_RELLOGOP #error "Undefine __SYCL_RELLOGOP macro" #endif -#define __SYCL_RELLOGOP(RELLOGOP) \ - friend vec_rel_t operator RELLOGOP(const DataT &Lhs, const SwizzleOp &Rhs) { \ +#define __SYCL_RELLOGOP(RELLOGOP, COND) \ + template \ + friend typename std::enable_if_t<(COND), vec_rel_t> operator RELLOGOP( \ + const DataT & Lhs, const SwizzleOp & Rhs) { \ vec_t Tmp = Rhs; \ return Lhs RELLOGOP Tmp; \ } \ - friend vec_rel_t operator RELLOGOP(const SwizzleOp &Lhs, const DataT &Rhs) { \ + template \ + friend typename std::enable_if_t<(COND), vec_rel_t> operator RELLOGOP( \ + const SwizzleOp & Lhs, const DataT & Rhs) { \ vec_t Tmp = Lhs; \ return Tmp RELLOGOP Rhs; \ } \ - friend vec_rel_t operator RELLOGOP(const vec_t &Lhs, const SwizzleOp &Rhs) { \ + template \ + friend typename std::enable_if_t<(COND), vec_rel_t> operator RELLOGOP( \ + const vec_t & Lhs, const SwizzleOp & Rhs) { \ vec_t Tmp = Rhs; \ return Lhs RELLOGOP Tmp; \ } \ - friend vec_rel_t operator RELLOGOP(const SwizzleOp &Lhs, const vec_t &Rhs) { \ + template \ + friend typename std::enable_if_t<(COND), vec_rel_t> operator RELLOGOP( \ + const SwizzleOp & Lhs, const vec_t & Rhs) { \ vec_t Tmp = Lhs; \ return Tmp RELLOGOP Rhs; \ } - __SYCL_RELLOGOP(==) - __SYCL_RELLOGOP(!=) - __SYCL_RELLOGOP(>) - __SYCL_RELLOGOP(<) - __SYCL_RELLOGOP(>=) - __SYCL_RELLOGOP(<=) - // TODO: limit to integral types. - __SYCL_RELLOGOP(&&) - __SYCL_RELLOGOP(||) + __SYCL_RELLOGOP(==, (!detail::is_byte_v)) + __SYCL_RELLOGOP(!=, (!detail::is_byte_v)) + __SYCL_RELLOGOP(>, (!detail::is_byte_v)) + __SYCL_RELLOGOP(<, (!detail::is_byte_v)) + __SYCL_RELLOGOP(>=, (!detail::is_byte_v)) + __SYCL_RELLOGOP(<=, (!detail::is_byte_v)) + __SYCL_RELLOGOP(&&, (!detail::is_byte_v && !detail::is_vgenfloat_v)) + __SYCL_RELLOGOP(||, (!detail::is_byte_v && !detail::is_vgenfloat_v)) #undef __SYCL_RELLOGOP template should not support artithmetic operations. In the // new implementation of vec<> class, the following will be removed. +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES { // binary op for 2 vec auto vop = VecByte3A + VecByte3B; @@ -347,6 +348,32 @@ int main() { auto bitv2 = !VecByte4A; } +#else + { + // std::byte is not an arithmetic type and it only supports the following + // overloads of >> and << operators. + // + // 1 template + // constexpr std::byte operator<<( std::byte b, IntegerType shift ) + // noexcept; + // 2 template + // constexpr std::byte operator>>( std::byte b, IntegerType shift ) + // noexcept; + auto VecByte3Shift = VecByte3A << 3; + assert(VecByte3Shift[0] == VecByte3A[0] << 3 && + VecByte3Shift[1] == VecByte3A[1] << 3 && + VecByte3Shift[2] == VecByte3A[2] << 3); + + VecByte3Shift = VecByte3A >> 1; + assert(VecByte3Shift[0] == VecByte3A[0] >> 1 && + VecByte3Shift[1] == VecByte3A[1] >> 1 && + VecByte3Shift[2] == VecByte3A[2] >> 1); + + auto SwizByte3Shift = VecByte4A.lo(); + SwizByte3Shift >> 3; + SwizByte3Shift << 3; + } +#endif // __INTEL_PREVIEW_BREAKING_CHANGES } return 0; From b75b835fa9a755c12085893897080af08edcee91 Mon Sep 17 00:00:00 2001 From: Udit Agarwal Date: Thu, 30 May 2024 08:12:39 -0700 Subject: [PATCH 04/18] Update byte.cpp --- sycl/test-e2e/Basic/vector/byte.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test-e2e/Basic/vector/byte.cpp b/sycl/test-e2e/Basic/vector/byte.cpp index 96b4f89cc5bf0..a9bc7b8dee1f7 100644 --- a/sycl/test-e2e/Basic/vector/byte.cpp +++ b/sycl/test-e2e/Basic/vector/byte.cpp @@ -90,7 +90,7 @@ int main() { .wait(); } assert(std_vec[0] == std::byte{2}); - assert(std_vec[1] == std::byte{2}); + assert(std_vec[1] == std::byte{7}); // swizzle { From e5fe42135cf28e7ad4334b77fbab8c5c2fe46095 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Thu, 30 May 2024 08:40:23 -0700 Subject: [PATCH 05/18] Add asserts for byte.cpp E2E --- sycl/test-e2e/Basic/vector/byte.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sycl/test-e2e/Basic/vector/byte.cpp b/sycl/test-e2e/Basic/vector/byte.cpp index a9bc7b8dee1f7..e9f53d5909b9f 100644 --- a/sycl/test-e2e/Basic/vector/byte.cpp +++ b/sycl/test-e2e/Basic/vector/byte.cpp @@ -374,9 +374,12 @@ int main() { VecByte3Shift[1] == VecByte3A[1] >> 1 && VecByte3Shift[2] == VecByte3A[2] >> 1); - auto SwizByte3Shift = VecByte4A.lo(); - SwizByte3Shift >> 3; - SwizByte3Shift << 3; + auto SwizByte2Shift = VecByte4A.lo(); + using VecType = sycl::vec; + auto SwizShiftRight = (VecType) (SwizByte2Shift >> 3); + auto SwizShiftLeft = (VecType) (SwizByte2Shift << 3); + assert(SwizShiftRight[0] == SwizByte2Shift[0] >> 3 && + SwizShiftLeft[1] == SwizByte2Shift[1] << 3); } #endif // __INTEL_PREVIEW_BREAKING_CHANGES } From fd270100e12359156f0f712545d55f0d027dec90 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Thu, 30 May 2024 16:50:44 -0700 Subject: [PATCH 06/18] Address reviews; Fix formatting --- .../sycl/detail/generic_type_traits.hpp | 4 +- sycl/include/sycl/vector_preview.hpp | 84 +++++++++---------- sycl/test-e2e/Basic/vector/byte.cpp | 4 +- 3 files changed, 46 insertions(+), 46 deletions(-) diff --git a/sycl/include/sycl/detail/generic_type_traits.hpp b/sycl/include/sycl/detail/generic_type_traits.hpp index 57f571e1a842f..a58493877c3c4 100644 --- a/sycl/include/sycl/detail/generic_type_traits.hpp +++ b/sycl/include/sycl/detail/generic_type_traits.hpp @@ -253,14 +253,14 @@ inline constexpr bool is_genfloatptr_marray_v = IsDecorated == access::decorated::no); template -using is_byte_t = typename +using is_byte = typename #if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) std::is_same; #else std::false_type; #endif -template inline constexpr bool is_byte_v = is_byte_t::value; +template inline constexpr bool is_byte_v = is_byte::value; template using make_floating_point_t = make_type_t; diff --git a/sycl/include/sycl/vector_preview.hpp b/sycl/include/sycl/vector_preview.hpp index 8c1660ec1d338..436e10d697d08 100644 --- a/sycl/include/sycl/vector_preview.hpp +++ b/sycl/include/sycl/vector_preview.hpp @@ -996,8 +996,8 @@ template class vec { #ifdef __SYCL_DEVICE_ONLY__ #define BINOP_BASE(BINOP, OPASSIGN, CONVERT, COND) \ template \ - friend typename std::enable_if_t<(COND), vec> operator BINOP( \ - const vec & Lhs, const vec & Rhs) { \ + friend std::enable_if_t<(COND), vec> operator BINOP(const vec & Lhs, \ + const vec & Rhs) { \ vec Ret; \ if constexpr (IsUsingArrayOnDevice) { \ for (size_t I = 0; I < NumElements; ++I) { \ @@ -1015,8 +1015,8 @@ template class vec { #define BINOP_BASE(BINOP, OPASSIGN, CONVERT, COND) \ template \ - friend typename std::enable_if_t<(COND), vec> operator BINOP( \ - const vec & Lhs, const vec & Rhs) { \ + friend std::enable_if_t<(COND), vec> operator BINOP(const vec & Lhs, \ + const vec & Rhs) { \ vec Ret{}; \ if constexpr (NativeVec) \ Ret.m_Data = Lhs.m_Data BINOP Rhs.m_Data; \ @@ -1032,24 +1032,24 @@ template class vec { BINOP_BASE(BINOP, OPASSIGN, CONVERT, COND) \ \ template \ - friend typename std::enable_if_t<(COND), vec> operator BINOP( \ - const vec & Lhs, const DataT & Rhs) { \ + friend std::enable_if_t<(COND), vec> operator BINOP(const vec & Lhs, \ + const DataT & Rhs) { \ return Lhs BINOP vec(Rhs); \ } \ template \ - friend typename std::enable_if_t<(COND), vec> operator BINOP( \ - const DataT & Lhs, const vec & Rhs) { \ + friend std::enable_if_t<(COND), vec> operator BINOP(const DataT & Lhs, \ + const vec & Rhs) { \ return vec(Lhs) BINOP Rhs; \ } \ template \ - friend typename std::enable_if_t<(COND), vec> &operator OPASSIGN( \ - vec & Lhs, const vec & Rhs) { \ + friend std::enable_if_t<(COND), vec> &operator OPASSIGN(vec & Lhs, \ + const vec & Rhs) { \ Lhs = Lhs BINOP Rhs; \ return Lhs; \ } \ template \ - friend typename std::enable_if_t<(Num != 1) && (COND), vec &> \ - operator OPASSIGN(vec & Lhs, const DataT & Rhs) { \ + friend std::enable_if_t<(Num != 1) && (COND), vec &> operator OPASSIGN( \ + vec & Lhs, const DataT & Rhs) { \ Lhs = Lhs BINOP vec(Rhs); \ return Lhs; \ } @@ -1088,7 +1088,7 @@ template class vec { // noexcept; #define __SYCL_SHIFT_BYTE(OP, OPASSIGN) \ template \ - friend typename std::enable_if_t<(detail::is_byte_v), vec> operator OP( \ + friend std::enable_if_t<(detail::is_byte_v), vec> operator OP( \ const vec & Lhs, int shift) { \ vec Ret; \ for (size_t I = 0; I < NumElements; ++I) { \ @@ -1097,8 +1097,8 @@ template class vec { return Ret; \ } \ template \ - friend typename std::enable_if_t<(detail::is_byte_v), vec &> \ - operator OPASSIGN(vec & Lhs, int shift) { \ + friend std::enable_if_t<(detail::is_byte_v), vec &> operator OPASSIGN( \ + vec & Lhs, int shift) { \ Lhs = Lhs OP shift; \ return Lhs; \ } @@ -1121,8 +1121,8 @@ template class vec { #ifdef __SYCL_DEVICE_ONLY__ #define RELLOGOP_BASE(RELLOGOP, COND) \ template \ - friend typename std::enable_if_t<(COND), vec> \ - operator RELLOGOP(const vec & Lhs, const vec & Rhs) { \ + friend std::enable_if_t<(COND), vec> operator RELLOGOP( \ + const vec & Lhs, const vec & Rhs) { \ vec Ret{}; \ /* This special case is needed since there are no standard operator|| */ \ /* or operator&& functions for std::array. */ \ @@ -1147,8 +1147,8 @@ template class vec { #else // __SYCL_DEVICE_ONLY__ #define RELLOGOP_BASE(RELLOGOP, COND) \ template \ - friend typename std::enable_if_t<(COND), vec> \ - operator RELLOGOP(const vec & Lhs, const vec & Rhs) { \ + friend std::enable_if_t<(COND), vec> operator RELLOGOP( \ + const vec & Lhs, const vec & Rhs) { \ vec Ret{}; \ for (size_t I = 0; I < NumElements; ++I) { \ /* We cannot use SetValue here as the operator is not a friend of*/ \ @@ -1164,13 +1164,13 @@ template class vec { RELLOGOP_BASE(RELLOGOP, COND) \ \ template \ - friend typename std::enable_if_t<(COND), vec> \ - operator RELLOGOP(const vec & Lhs, const DataT & Rhs) { \ + friend std::enable_if_t<(COND), vec> operator RELLOGOP( \ + const vec & Lhs, const DataT & Rhs) { \ return Lhs RELLOGOP vec(Rhs); \ } \ template \ - friend typename std::enable_if_t<(COND), vec> \ - operator RELLOGOP(const DataT & Lhs, const vec & Rhs) { \ + friend std::enable_if_t<(COND), vec> operator RELLOGOP( \ + const DataT & Lhs, const vec & Rhs) { \ return vec(Lhs) RELLOGOP Rhs; \ } @@ -1195,12 +1195,12 @@ template class vec { #endif #define __SYCL_UOP(UOP, OPASSIGN, COND) \ template \ - friend typename std::enable_if_t<(COND), vec &> operator UOP(vec & Rhs) { \ + friend std::enable_if_t<(COND), vec &> operator UOP(vec & Rhs) { \ Rhs OPASSIGN vec_data::get(1); \ return Rhs; \ } \ template \ - friend typename std::enable_if_t<(COND), vec> operator UOP(vec & Lhs, int) { \ + friend std::enable_if_t<(COND), vec> operator UOP(vec & Lhs, int) { \ vec Ret(Lhs); \ Lhs OPASSIGN vec_data::get(1); \ return Ret; \ @@ -1213,7 +1213,7 @@ template class vec { // operator~() available only when: dataT != float && dataT != double // && dataT != half template - friend typename std::enable_if_t, vec> + friend std::enable_if_t, vec> operator~(const vec &Rhs) { if constexpr (IsUsingArrayOnDevice || IsUsingArrayOnHost) { vec Ret{}; @@ -1232,8 +1232,8 @@ template class vec { // operator!. Not available for std::byte. template - friend typename std::enable_if_t<(!detail::is_byte_v), - vec, NumElements>> + friend std::enable_if_t<(!detail::is_byte_v), + vec, NumElements>> operator!(const vec &Rhs) { if constexpr (IsUsingArrayOnDevice || IsUsingArrayOnHost) { vec Ret{}; @@ -1258,7 +1258,7 @@ template class vec { // operator +. Not available for std::byte as it is not an arithmetic type. template - friend typename std::enable_if_t<(!detail::is_byte_v), vec> + friend std::enable_if_t<(!detail::is_byte_v), vec> operator+(const vec &Lhs) { if constexpr (IsUsingArrayOnDevice || IsUsingArrayOnHost) { vec Ret{}; @@ -1273,7 +1273,7 @@ template class vec { // operator -. Not available for std::byte as it is not an arithmetic type. template - friend typename std::enable_if_t<(!detail::is_byte_v), vec> + friend std::enable_if_t<(!detail::is_byte_v), vec> operator-(const vec &Lhs) { namespace oneapi = sycl::ext::oneapi; vec Ret{}; @@ -1743,26 +1743,26 @@ class SwizzleOp { #endif #define __SYCL_BINOP(BINOP, COND) \ template \ - friend typename std::enable_if_t<(COND), vec_t> operator BINOP( \ + friend std::enable_if_t<(COND), vec_t> operator BINOP( \ const DataT & Lhs, const SwizzleOp & Rhs) { \ vec_t Tmp = Rhs; \ return Lhs BINOP Tmp; \ } \ template \ - friend typename std::enable_if_t<(COND), vec_t> operator BINOP( \ - const SwizzleOp & Lhs, const DataT & Rhs) { \ + friend std::enable_if_t<(COND), vec_t> operator BINOP(const SwizzleOp & Lhs, \ + const DataT & Rhs) { \ vec_t Tmp = Lhs; \ return Tmp BINOP Rhs; \ } \ template \ - friend typename std::enable_if_t<(COND), vec_t> operator BINOP( \ + friend std::enable_if_t<(COND), vec_t> operator BINOP( \ const vec_t & Lhs, const SwizzleOp & Rhs) { \ vec_t Tmp = Rhs; \ return Lhs BINOP Tmp; \ } \ template \ - friend typename std::enable_if_t<(COND), vec_t> operator BINOP( \ - const SwizzleOp & Lhs, const vec_t & Rhs) { \ + friend std::enable_if_t<(COND), vec_t> operator BINOP(const SwizzleOp & Lhs, \ + const vec_t & Rhs) { \ vec_t Tmp = Lhs; \ return Tmp BINOP Rhs; \ } @@ -1780,14 +1780,14 @@ class SwizzleOp { __SYCL_BINOP(<<, (!detail::is_byte_v)) template - friend typename std::enable_if_t, vec_t> + friend std::enable_if_t, vec_t> operator>>(const SwizzleOp &Lhs, const int shift) { vec_t Tmp = Lhs; return Tmp >> shift; } template - friend typename std::enable_if_t, vec_t> + friend std::enable_if_t, vec_t> operator<<(const SwizzleOp &Lhs, const int shift) { vec_t Tmp = Lhs; return Tmp << shift; @@ -1802,25 +1802,25 @@ class SwizzleOp { #endif #define __SYCL_RELLOGOP(RELLOGOP, COND) \ template \ - friend typename std::enable_if_t<(COND), vec_rel_t> operator RELLOGOP( \ + friend std::enable_if_t<(COND), vec_rel_t> operator RELLOGOP( \ const DataT & Lhs, const SwizzleOp & Rhs) { \ vec_t Tmp = Rhs; \ return Lhs RELLOGOP Tmp; \ } \ template \ - friend typename std::enable_if_t<(COND), vec_rel_t> operator RELLOGOP( \ + friend std::enable_if_t<(COND), vec_rel_t> operator RELLOGOP( \ const SwizzleOp & Lhs, const DataT & Rhs) { \ vec_t Tmp = Lhs; \ return Tmp RELLOGOP Rhs; \ } \ template \ - friend typename std::enable_if_t<(COND), vec_rel_t> operator RELLOGOP( \ + friend std::enable_if_t<(COND), vec_rel_t> operator RELLOGOP( \ const vec_t & Lhs, const SwizzleOp & Rhs) { \ vec_t Tmp = Rhs; \ return Lhs RELLOGOP Tmp; \ } \ template \ - friend typename std::enable_if_t<(COND), vec_rel_t> operator RELLOGOP( \ + friend std::enable_if_t<(COND), vec_rel_t> operator RELLOGOP( \ const SwizzleOp & Lhs, const vec_t & Rhs) { \ vec_t Tmp = Lhs; \ return Tmp RELLOGOP Rhs; \ diff --git a/sycl/test-e2e/Basic/vector/byte.cpp b/sycl/test-e2e/Basic/vector/byte.cpp index e9f53d5909b9f..3d1c372f79837 100644 --- a/sycl/test-e2e/Basic/vector/byte.cpp +++ b/sycl/test-e2e/Basic/vector/byte.cpp @@ -376,8 +376,8 @@ int main() { auto SwizByte2Shift = VecByte4A.lo(); using VecType = sycl::vec; - auto SwizShiftRight = (VecType) (SwizByte2Shift >> 3); - auto SwizShiftLeft = (VecType) (SwizByte2Shift << 3); + auto SwizShiftRight = (VecType)(SwizByte2Shift >> 3); + auto SwizShiftLeft = (VecType)(SwizByte2Shift << 3); assert(SwizShiftRight[0] == SwizByte2Shift[0] >> 3 && SwizShiftLeft[1] == SwizByte2Shift[1] << 3); } From 23475a03062da274873d9d43c6f64cdd84f68b4f Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Mon, 3 Jun 2024 23:33:17 -0700 Subject: [PATCH 07/18] Seperate out math operators in a class. --- sycl/include/sycl/detail/vector_arith.hpp | 388 ++++++++++++++++++++++ sycl/include/sycl/vector_preview.hpp | 348 +------------------ 2 files changed, 396 insertions(+), 340 deletions(-) create mode 100644 sycl/include/sycl/detail/vector_arith.hpp diff --git a/sycl/include/sycl/detail/vector_arith.hpp b/sycl/include/sycl/detail/vector_arith.hpp new file mode 100644 index 0000000000000..d9dc881be122c --- /dev/null +++ b/sycl/include/sycl/detail/vector_arith.hpp @@ -0,0 +1,388 @@ +//=== vector_arith.hpp --- Implementation of arithmetic ops on sycl::vec ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include // for half, cl_char, cl_int +#include // for is_sigeninteger, is_s... +#include // for is_contained +#include // for is_floating_point + +#include // bfloat16 + +#include +#include // for enable_if_t, is_same + +namespace sycl { +inline namespace _V1 { + +template class vec; + +namespace detail { + +// Element type for relational operator return value. +template +using rel_t = typename std::conditional_t< + sizeof(DataT) == sizeof(opencl::cl_char), opencl::cl_char, + typename std::conditional_t< + sizeof(DataT) == sizeof(opencl::cl_short), opencl::cl_short, + typename std::conditional_t< + sizeof(DataT) == sizeof(opencl::cl_int), opencl::cl_int, + typename std::conditional_t>>>; + +// Macros to populate binary operation on sycl::vec. +#if defined(__SYCL_BINOP) || defined(BINOP_BASE) +#error "Undefine __SYCL_BINOP and BINOP_BASE macro" +#endif + +#ifdef __SYCL_DEVICE_ONLY__ +#define BINOP_BASE(BINOP, OPASSIGN, CONVERT, COND) \ + template \ + friend std::enable_if_t<(COND), vec_t> operator BINOP(const vec_t & Lhs, \ + const vec_t & Rhs) { \ + vec_t Ret; \ + if constexpr (vec_t::IsUsingArrayOnDevice) { \ + for (size_t I = 0; I < NumElements; ++I) { \ + Ret.setValue(I, (Lhs.getValue(I) BINOP Rhs.getValue(I))); \ + } \ + } else { \ + Ret.m_Data = Lhs.m_Data BINOP Rhs.m_Data; \ + if constexpr (std::is_same_v && CONVERT) { \ + Ret.ConvertToDataT(); \ + } \ + } \ + return Ret; \ + } +#else // __SYCL_DEVICE_ONLY__ + +#define BINOP_BASE(BINOP, OPASSIGN, CONVERT, COND) \ + template \ + friend std::enable_if_t<(COND), vec_t> operator BINOP(const vec_t & Lhs, \ + const vec_t & Rhs) { \ + vec_t Ret{}; \ + for (size_t I = 0; I < NumElements; ++I) \ + Ret.setValue(I, (DataT)(vec_data::get(Lhs.getValue( \ + I)) BINOP vec_data::get(Rhs.getValue(I)))); \ + return Ret; \ + } +#endif // __SYCL_DEVICE_ONLY__ + +#define __SYCL_BINOP(BINOP, OPASSIGN, CONVERT, COND) \ + BINOP_BASE(BINOP, OPASSIGN, CONVERT, COND) \ + \ + template \ + friend std::enable_if_t<(COND), vec_t> operator BINOP(const vec_t & Lhs, \ + const DataT & Rhs) { \ + return Lhs BINOP vec_t(Rhs); \ + } \ + template \ + friend std::enable_if_t<(COND), vec_t> operator BINOP(const DataT & Lhs, \ + const vec_t & Rhs) { \ + return vec_t(Lhs) BINOP Rhs; \ + } \ + template \ + friend std::enable_if_t<(COND), vec_t> &operator OPASSIGN(vec_t & Lhs, \ + const vec_t & Rhs) { \ + Lhs = Lhs BINOP Rhs; \ + return Lhs; \ + } \ + template \ + friend std::enable_if_t<(Num != 1) && (COND), vec_t &> operator OPASSIGN( \ + vec_t & Lhs, const DataT & Rhs) { \ + Lhs = Lhs BINOP vec_t(Rhs); \ + return Lhs; \ + } + +/**************************************************************** + * vec_arith_common + * / | \ + * / | \ + * vec_arith vec_arith ... vec_arith + * \ | / + * \ | / + * sycl::vec + * + * vec_arith_common is the base class for vec_arith. It contains + * the common math operators of sycl::vec for all types. + * vec_arith is the derived class that contains the math operators + * specialized for certain types. sycl::vec inherits from vec_arith. + * *************************************************************/ +template class vec_arith_common; +template struct vec_helper; + +template +class vec_arith : public vec_arith_common { +public: + using vec_t = vec; + using ocl_t = rel_t; + template using vec_data = vec_helper; + + // operator!. + friend vec, NumElements> + operator!(const vec_t &Rhs) { + if constexpr (vec_t::IsUsingArrayOnDevice || vec_t::IsUsingArrayOnHost) { + vec_t Ret{}; + for (size_t I = 0; I < NumElements; ++I) { + Ret.setValue(I, !vec_data::get(Rhs.getValue(I))); + } + return Ret.template as, NumElements>>(); + } else { + return vec_t{(typename vec::DataType) !Rhs.m_Data} + .template as, NumElements>>(); + } + } + + // operator +. + friend vec_t operator+(const vec_t &Lhs) { + if constexpr (vec_t::IsUsingArrayOnDevice || vec_t::IsUsingArrayOnHost) { + vec_t Ret{}; + for (size_t I = 0; I < NumElements; ++I) + Ret.setValue( + I, vec_data::get(+vec_data::get(Lhs.getValue(I)))); + return Ret; + } else { + return vec_t{+Lhs.m_Data}; + } + } + + // operator -. + friend vec_t operator-(const vec_t &Lhs) { + namespace oneapi = sycl::ext::oneapi; + vec_t Ret{}; + if constexpr (vec_t::IsBfloat16 && NumElements == 1) { + oneapi::bfloat16 v = oneapi::detail::bitsToBfloat16(Lhs.m_Data); + oneapi::bfloat16 w = -v; + Ret.m_Data = oneapi::detail::bfloat16ToBits(w); + } else if constexpr (vec_t::IsBfloat16) { + for (size_t I = 0; I < NumElements; I++) { + oneapi::bfloat16 v = oneapi::detail::bitsToBfloat16(Lhs.m_Data[I]); + oneapi::bfloat16 w = -v; + Ret.m_Data[I] = oneapi::detail::bfloat16ToBits(w); + } + } else if constexpr (vec_t::IsUsingArrayOnDevice || vec_t::IsUsingArrayOnHost) { + for (size_t I = 0; I < NumElements; ++I) + Ret.setValue( + I, vec_data::get(-vec_data::get(Lhs.getValue(I)))); + return Ret; + } else { + Ret = vec_t{-Lhs.m_Data}; + if constexpr (std::is_same_v) { + Ret.ConvertToDataT(); + } + return Ret; + } + } + +// Unary operations on sycl::vec +#ifdef __SYCL_UOP +#error "Undefine __SYCL_UOP macro" +#endif +#define __SYCL_UOP(UOP, OPASSIGN) \ + friend vec_t & operator UOP(vec_t & Rhs) { \ + Rhs OPASSIGN vec_data::get(1); \ + return Rhs; \ + } \ + friend vec_t operator UOP(vec_t & Lhs, int) { \ + vec_t Ret(Lhs); \ + Lhs OPASSIGN vec_data::get(1); \ + return Ret; \ + } + + __SYCL_UOP(++, +=) + __SYCL_UOP(--, -=) +#undef __SYCL_UOP + + // The logical operations on scalar types results in 0/1, while for vec<>, + // logical operations should result in 0 and -1 (similar to OpenCL vectors). + // That's why, for vec, we need to invert the result of the logical + // operations since we store vec as scalar type on the device. +#if defined(__SYCL_RELLOGOP) || defined(RELLOGOP_BASE) +#error "Undefine __SYCL_RELLOGOP and RELLOGOP_BASE macro." +#endif + +#ifdef __SYCL_DEVICE_ONLY__ +#define RELLOGOP_BASE(RELLOGOP, COND) \ + template \ + friend std::enable_if_t<(COND), vec> operator RELLOGOP( \ + const vec_t & Lhs, const vec_t & Rhs) { \ + vec Ret{}; \ + /* This special case is needed since there are no standard operator|| */ \ + /* or operator&& functions for std::array. */ \ + if constexpr (vec_t::IsUsingArrayOnDevice && \ + (std::string_view(#RELLOGOP) == "||" || \ + std::string_view(#RELLOGOP) == "&&")) { \ + for (size_t I = 0; I < NumElements; ++I) { \ + /* We cannot use SetValue here as the operator is not a friend of*/ \ + /* Ret on Windows. */ \ + Ret[I] = static_cast(-(vec_data::get( \ + Lhs.getValue(I)) RELLOGOP vec_data::get(Rhs.getValue(I)))); \ + } \ + } else { \ + Ret = vec( \ + (typename vec::vector_t)( \ + Lhs.m_Data RELLOGOP Rhs.m_Data)); \ + if (NumElements == 1) /*Scalar 0/1 logic was applied, invert*/ \ + Ret *= -1; \ + } \ + return Ret; \ + } +#else // __SYCL_DEVICE_ONLY__ +#define RELLOGOP_BASE(RELLOGOP, COND) \ + template \ + friend std::enable_if_t<(COND), vec> operator RELLOGOP( \ + const vec_t & Lhs, const vec_t & Rhs) { \ + vec Ret{}; \ + for (size_t I = 0; I < NumElements; ++I) { \ + /* We cannot use SetValue here as the operator is not a friend of*/ \ + /* Ret on Windows. */ \ + Ret[I] = static_cast(-(vec_data::get( \ + Lhs.getValue(I)) RELLOGOP vec_data::get(Rhs.getValue(I)))); \ + } \ + return Ret; \ + } +#endif + +#define __SYCL_RELLOGOP(RELLOGOP, COND) \ + RELLOGOP_BASE(RELLOGOP, COND) \ + \ + template \ + friend std::enable_if_t<(COND), vec> operator RELLOGOP( \ + const vec_t & Lhs, const DataT & Rhs) { \ + return Lhs RELLOGOP vec_t(Rhs); \ + } \ + template \ + friend std::enable_if_t<(COND), vec> operator RELLOGOP( \ + const DataT & Lhs, const vec_t & Rhs) { \ + return vec_t(Lhs) RELLOGOP Rhs; \ + } + + // OP is: ==, !=, <, >, <=, >=, &&, || + // vec operatorOP(const vec &Rhs) const; + // vec operatorOP(const DataT &Rhs) const; + __SYCL_RELLOGOP(==, true) + __SYCL_RELLOGOP(!=, true) + __SYCL_RELLOGOP(>, true) + __SYCL_RELLOGOP(<, true) + __SYCL_RELLOGOP(>=, true) + __SYCL_RELLOGOP(<=, true) + + // Only available to integral types. + __SYCL_RELLOGOP(&&, (!detail::is_vgenfloat_v)) + __SYCL_RELLOGOP(||, (!detail::is_vgenfloat_v)) +#undef __SYCL_RELLOGOP +#undef RELLOGOP_BASE + + // Binary operations on sycl::vec<> for all types except std::byte. + __SYCL_BINOP(+, +=, true, true) + __SYCL_BINOP(-, -=, true, true) + __SYCL_BINOP(*, *=, false, true) + __SYCL_BINOP(/, /=, false, true) + + // The following OPs are available only when: DataT != cl_float && + // DataT != cl_double && DataT != cl_half && DataT != BF16. + __SYCL_BINOP(%, %=, false, + (!detail::is_vgenfloat_v)) + // Bitwise operations are allowed for std::byte. + __SYCL_BINOP(|, |=, false, (!detail::is_vgenfloat_v)) + __SYCL_BINOP(&, &=, false, (!detail::is_vgenfloat_v)) + __SYCL_BINOP(^, ^=, false, (!detail::is_vgenfloat_v)) + __SYCL_BINOP(>>, >>=, false, (!detail::is_vgenfloat_v)) + __SYCL_BINOP(<<, <<=, true, (!detail::is_vgenfloat_v)) + + // friends + template friend class vec; +}; // class vec_arith<> + +#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) +template +class vec_arith: public vec_arith_common { +public: + using DataT = typename std::conditional_t; + using vec_t = vec; + template using vec_data = vec_helper; + + // Special <<, >> operators for std::byte. + // std::byte is not an arithmetic type and it only supports the following + // overloads of >> and << operators. + // + // 1 template + // constexpr std::byte operator<<( std::byte b, IntegerType shift ) + // noexcept; + friend vec_t operator <<(const vec_t & Lhs, int shift) { + vec_t Ret; + for (size_t I = 0; I < NumElements; ++I) { + Ret[I] = Lhs[I] << shift; + } + return Ret; + } + friend vec_t & operator <<(vec_t & Lhs, int shift) { + Lhs = Lhs << shift; + return Lhs; + } + + // 2 template + // constexpr std::byte operator>>( std::byte b, IntegerType shift ) + // noexcept; + friend vec_t operator >>(const vec_t & Lhs, int shift) { + vec_t Ret; + for (size_t I = 0; I < NumElements; ++I) { + Ret[I] = Lhs[I] >> shift; + } + return Ret; + } + friend vec_t & operator >>(vec_t & Lhs, int shift) { + Lhs = Lhs >> shift; + return Lhs; + } + + __SYCL_BINOP(|, |=, false, true) + __SYCL_BINOP(&, &=, false, true) + __SYCL_BINOP(^, ^=, false, true) + +// friends +template friend class vec; +}; +#endif // (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) + +template +class vec_arith_common { + public: + using vec_t = vec; + + // operator~() available only when: dataT != float && dataT != double + // && dataT != half + template + friend std::enable_if_t, vec_t> + operator~(const vec_t &Rhs) { + if constexpr (vec_t::IsUsingArrayOnDevice || vec_t::IsUsingArrayOnHost) { + vec_t Ret{}; + for (size_t I = 0; I < NumElements; ++I) { + Ret.setValue(I, ~Rhs.getValue(I)); + } + return Ret; + } else { + vec_t Ret{(typename vec_t::DataType) ~Rhs.m_Data}; + if constexpr (std::is_same_v) { + Ret.ConvertToDataT(); + } + return Ret; + } + } + + //friends + template friend class vec; +}; + +#undef __SYCL_BINOP +#undef BINOP_BASE + +} // namespace detail +} // namespace _V1 +} // namespace sycl \ No newline at end of file diff --git a/sycl/include/sycl/vector_preview.hpp b/sycl/include/sycl/vector_preview.hpp index 436e10d697d08..c46643f68d76d 100644 --- a/sycl/include/sycl/vector_preview.hpp +++ b/sycl/include/sycl/vector_preview.hpp @@ -39,6 +39,7 @@ #include // for memcpy #include // for is_contained #include // for is_floating_point +#include // for vec_arith_common and vec_arith #include // for convertImpl #include // for vector_alignment #include // for StorageT, half, Vec16... @@ -161,18 +162,6 @@ class SwizzleOp; template struct VecStorage; -// Element type for relational operator return value. -template -using rel_t = typename std::conditional_t< - sizeof(DataT) == sizeof(opencl::cl_char), opencl::cl_char, - typename std::conditional_t< - sizeof(DataT) == sizeof(opencl::cl_short), opencl::cl_short, - typename std::conditional_t< - sizeof(DataT) == sizeof(opencl::cl_int), opencl::cl_int, - typename std::conditional_t>>>; - // Special type indicating that SwizzleOp should just read value from vector - // not trying to perform any operations. Should not be called. template class GetOp { @@ -358,7 +347,8 @@ using vec_data_t = typename detail::vec_helper::RetType; /// SYCL devices as well as in host C++ code. /// /// \ingroup sycl_api -template class vec { +template +class vec : public detail::vec_arith { using DataT = Type; // This represent type of underlying value. There should be only one field @@ -983,331 +973,6 @@ template class vec { } } -#if defined(__SYCL_BINOP) || defined(BINOP_BASE) -#error "Undefine __SYCL_BINOP and BINOP_BASE macro" -#endif - -// Note: vec<>/SwizzleOp logical value is 0/-1 logic, as opposed to 0/1 logic. -// As far as CTS validation is concerned, 0/-1 logic also applies when -// NumElements is equal to one, which is somewhat inconsistent with being -// transparent with scalar data. -// TODO: Determine if vec<, NumElements=1> is needed at all, remove this -// inconsistency if not by disallowing one-element vectors (as in OpenCL) -#ifdef __SYCL_DEVICE_ONLY__ -#define BINOP_BASE(BINOP, OPASSIGN, CONVERT, COND) \ - template \ - friend std::enable_if_t<(COND), vec> operator BINOP(const vec & Lhs, \ - const vec & Rhs) { \ - vec Ret; \ - if constexpr (IsUsingArrayOnDevice) { \ - for (size_t I = 0; I < NumElements; ++I) { \ - Ret.setValue(I, (Lhs.getValue(I) BINOP Rhs.getValue(I))); \ - } \ - } else { \ - Ret.m_Data = Lhs.m_Data BINOP Rhs.m_Data; \ - if constexpr (std::is_same_v && CONVERT) { \ - Ret.ConvertToDataT(); \ - } \ - } \ - return Ret; \ - } -#else // __SYCL_DEVICE_ONLY__ - -#define BINOP_BASE(BINOP, OPASSIGN, CONVERT, COND) \ - template \ - friend std::enable_if_t<(COND), vec> operator BINOP(const vec & Lhs, \ - const vec & Rhs) { \ - vec Ret{}; \ - if constexpr (NativeVec) \ - Ret.m_Data = Lhs.m_Data BINOP Rhs.m_Data; \ - else \ - for (size_t I = 0; I < NumElements; ++I) \ - Ret.setValue(I, (DataT)(vec_data::get(Lhs.getValue( \ - I)) BINOP vec_data::get(Rhs.getValue(I)))); \ - return Ret; \ - } -#endif // __SYCL_DEVICE_ONLY__ - -#define __SYCL_BINOP(BINOP, OPASSIGN, CONVERT, COND) \ - BINOP_BASE(BINOP, OPASSIGN, CONVERT, COND) \ - \ - template \ - friend std::enable_if_t<(COND), vec> operator BINOP(const vec & Lhs, \ - const DataT & Rhs) { \ - return Lhs BINOP vec(Rhs); \ - } \ - template \ - friend std::enable_if_t<(COND), vec> operator BINOP(const DataT & Lhs, \ - const vec & Rhs) { \ - return vec(Lhs) BINOP Rhs; \ - } \ - template \ - friend std::enable_if_t<(COND), vec> &operator OPASSIGN(vec & Lhs, \ - const vec & Rhs) { \ - Lhs = Lhs BINOP Rhs; \ - return Lhs; \ - } \ - template \ - friend std::enable_if_t<(Num != 1) && (COND), vec &> operator OPASSIGN( \ - vec & Lhs, const DataT & Rhs) { \ - Lhs = Lhs BINOP vec(Rhs); \ - return Lhs; \ - } - - // std::byte is not an arithmetic type. - __SYCL_BINOP(+, +=, true, (!detail::is_byte_v)) - __SYCL_BINOP(-, -=, true, (!detail::is_byte_v)) - __SYCL_BINOP(*, *=, false, (!detail::is_byte_v)) - __SYCL_BINOP(/, /=, false, (!detail::is_byte_v)) - - // The following OPs are available only when: DataT != cl_float && - // DataT != cl_double && DataT != cl_half && DataT != BF16. - __SYCL_BINOP(%, %=, false, - (!detail::is_vgenfloat_v && (!detail::is_byte_v))) - // Bitwise operations are allowed for std::byte. - __SYCL_BINOP(|, |=, false, (!detail::is_vgenfloat_v)) - __SYCL_BINOP(&, &=, false, (!detail::is_vgenfloat_v)) - __SYCL_BINOP(^, ^=, false, (!detail::is_vgenfloat_v)) - __SYCL_BINOP(>>, >>=, false, - (!detail::is_vgenfloat_v && (!detail::is_byte_v))) - __SYCL_BINOP(<<, <<=, true, - (!detail::is_vgenfloat_v && (!detail::is_byte_v))) - -#undef BINOP_BASE -#undef __SYCL_BINOP - - // Special <<, >> operators for std::byte. - // std::byte is not an arithmetic type and it only supports the following - // overloads of >> and << operators. - // - // 1 template - // constexpr std::byte operator<<( std::byte b, IntegerType shift ) - // noexcept; - // 2 template - // constexpr std::byte operator>>( std::byte b, IntegerType shift ) - // noexcept; -#define __SYCL_SHIFT_BYTE(OP, OPASSIGN) \ - template \ - friend std::enable_if_t<(detail::is_byte_v), vec> operator OP( \ - const vec & Lhs, int shift) { \ - vec Ret; \ - for (size_t I = 0; I < NumElements; ++I) { \ - Ret[I] = Lhs[I] OP shift; \ - } \ - return Ret; \ - } \ - template \ - friend std::enable_if_t<(detail::is_byte_v), vec &> operator OPASSIGN( \ - vec & Lhs, int shift) { \ - Lhs = Lhs OP shift; \ - return Lhs; \ - } - - __SYCL_SHIFT_BYTE(<<, <<=) - __SYCL_SHIFT_BYTE(>>, >>=) -#undef __SYCL_SHIFT_BYTE - - // Note: vec<>/SwizzleOp logical value is 0/-1 logic, as opposed to 0/1 logic. - // As far as CTS validation is concerned, 0/-1 logic also applies when - // NumElements is equal to one, which is somewhat inconsistent with being - // transparent with scalar data. - // TODO: Determine if vec<, NumElements=1> is needed at all, remove this - // inconsistency if not by disallowing one-element vectors (as in OpenCL) - -#if defined(__SYCL_RELLOGOP) || defined(RELLOGOP_BASE) -#error "Undefine __SYCL_RELLOGOP and RELLOGOP_BASE macro." -#endif - -#ifdef __SYCL_DEVICE_ONLY__ -#define RELLOGOP_BASE(RELLOGOP, COND) \ - template \ - friend std::enable_if_t<(COND), vec> operator RELLOGOP( \ - const vec & Lhs, const vec & Rhs) { \ - vec Ret{}; \ - /* This special case is needed since there are no standard operator|| */ \ - /* or operator&& functions for std::array. */ \ - if constexpr (IsUsingArrayOnDevice && \ - (std::string_view(#RELLOGOP) == "||" || \ - std::string_view(#RELLOGOP) == "&&")) { \ - for (size_t I = 0; I < NumElements; ++I) { \ - /* We cannot use SetValue here as the operator is not a friend of*/ \ - /* Ret on Windows. */ \ - Ret[I] = static_cast(-(vec_data::get( \ - Lhs.getValue(I)) RELLOGOP vec_data::get(Rhs.getValue(I)))); \ - } \ - } else { \ - Ret = vec( \ - (typename vec::vector_t)( \ - Lhs.m_Data RELLOGOP Rhs.m_Data)); \ - if (NumElements == 1) /*Scalar 0/1 logic was applied, invert*/ \ - Ret *= -1; \ - } \ - return Ret; \ - } -#else // __SYCL_DEVICE_ONLY__ -#define RELLOGOP_BASE(RELLOGOP, COND) \ - template \ - friend std::enable_if_t<(COND), vec> operator RELLOGOP( \ - const vec & Lhs, const vec & Rhs) { \ - vec Ret{}; \ - for (size_t I = 0; I < NumElements; ++I) { \ - /* We cannot use SetValue here as the operator is not a friend of*/ \ - /* Ret on Windows. */ \ - Ret[I] = static_cast(-(vec_data::get( \ - Lhs.getValue(I)) RELLOGOP vec_data::get(Rhs.getValue(I)))); \ - } \ - return Ret; \ - } -#endif - -#define __SYCL_RELLOGOP(RELLOGOP, COND) \ - RELLOGOP_BASE(RELLOGOP, COND) \ - \ - template \ - friend std::enable_if_t<(COND), vec> operator RELLOGOP( \ - const vec & Lhs, const DataT & Rhs) { \ - return Lhs RELLOGOP vec(Rhs); \ - } \ - template \ - friend std::enable_if_t<(COND), vec> operator RELLOGOP( \ - const DataT & Lhs, const vec & Rhs) { \ - return vec(Lhs) RELLOGOP Rhs; \ - } - - // OP is: ==, !=, <, >, <=, >=, &&, || - // vec operatorOP(const vec &Rhs) const; - // vec operatorOP(const DataT &Rhs) const; - __SYCL_RELLOGOP(==, (!detail::is_byte_v)) - __SYCL_RELLOGOP(!=, (!detail::is_byte_v)) - __SYCL_RELLOGOP(>, (!detail::is_byte_v)) - __SYCL_RELLOGOP(<, (!detail::is_byte_v)) - __SYCL_RELLOGOP(>=, (!detail::is_byte_v)) - __SYCL_RELLOGOP(<=, (!detail::is_byte_v)) - - // Only available to integral types. - __SYCL_RELLOGOP(&&, (!detail::is_vgenfloat_v) && (!detail::is_byte_v)) - __SYCL_RELLOGOP(||, (!detail::is_vgenfloat_v) && (!detail::is_byte_v)) -#undef __SYCL_RELLOGOP -#undef RELLOGOP_BASE - -#ifdef __SYCL_UOP -#error "Undefine __SYCL_UOP macro" -#endif -#define __SYCL_UOP(UOP, OPASSIGN, COND) \ - template \ - friend std::enable_if_t<(COND), vec &> operator UOP(vec & Rhs) { \ - Rhs OPASSIGN vec_data::get(1); \ - return Rhs; \ - } \ - template \ - friend std::enable_if_t<(COND), vec> operator UOP(vec & Lhs, int) { \ - vec Ret(Lhs); \ - Lhs OPASSIGN vec_data::get(1); \ - return Ret; \ - } - - __SYCL_UOP(++, +=, (!detail::is_byte_v)) - __SYCL_UOP(--, -=, (!detail::is_byte_v)) -#undef __SYCL_UOP - - // operator~() available only when: dataT != float && dataT != double - // && dataT != half - template - friend std::enable_if_t, vec> - operator~(const vec &Rhs) { - if constexpr (IsUsingArrayOnDevice || IsUsingArrayOnHost) { - vec Ret{}; - for (size_t I = 0; I < NumElements; ++I) { - Ret.setValue(I, ~Rhs.getValue(I)); - } - return Ret; - } else { - vec Ret{(typename vec::DataType) ~Rhs.m_Data}; - if constexpr (std::is_same_v) { - Ret.ConvertToDataT(); - } - return Ret; - } - } - - // operator!. Not available for std::byte. - template - friend std::enable_if_t<(!detail::is_byte_v), - vec, NumElements>> - operator!(const vec &Rhs) { - if constexpr (IsUsingArrayOnDevice || IsUsingArrayOnHost) { - vec Ret{}; - for (size_t I = 0; I < NumElements; ++I) { -#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) - // std::byte neither supports ! unary op or casting, so special handling - // is needed. And, worse, Windows has a conflict with 'byte'. - if constexpr (std::is_same_v) { - Ret.setValue(I, std::byte{!vec_data::get(Rhs.getValue(I))}); - } else -#endif // (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) - { - Ret.setValue(I, !vec_data::get(Rhs.getValue(I))); - } - } - return Ret.template as, NumElements>>(); - } else { - return vec{(typename vec::DataType) !Rhs.m_Data} - .template as, NumElements>>(); - } - } - - // operator +. Not available for std::byte as it is not an arithmetic type. - template - friend std::enable_if_t<(!detail::is_byte_v), vec> - operator+(const vec &Lhs) { - if constexpr (IsUsingArrayOnDevice || IsUsingArrayOnHost) { - vec Ret{}; - for (size_t I = 0; I < NumElements; ++I) - Ret.setValue( - I, vec_data::get(+vec_data::get(Lhs.getValue(I)))); - return Ret; - } else { - return vec{+Lhs.m_Data}; - } - } - - // operator -. Not available for std::byte as it is not an arithmetic type. - template - friend std::enable_if_t<(!detail::is_byte_v), vec> - operator-(const vec &Lhs) { - namespace oneapi = sycl::ext::oneapi; - vec Ret{}; - if constexpr (IsBfloat16 && NumElements == 1) { - oneapi::bfloat16 v = oneapi::detail::bitsToBfloat16(Lhs.m_Data); - oneapi::bfloat16 w = -v; - Ret.m_Data = oneapi::detail::bfloat16ToBits(w); - } else if constexpr (IsBfloat16) { - for (size_t I = 0; I < NumElements; I++) { - oneapi::bfloat16 v = oneapi::detail::bitsToBfloat16(Lhs.m_Data[I]); - oneapi::bfloat16 w = -v; - Ret.m_Data[I] = oneapi::detail::bfloat16ToBits(w); - } - } else if constexpr (IsUsingArrayOnDevice || IsUsingArrayOnHost) { - for (size_t I = 0; I < NumElements; ++I) - Ret.setValue( - I, vec_data::get(-vec_data::get(Lhs.getValue(I)))); - return Ret; - } else { - Ret = vec{-Lhs.m_Data}; - if constexpr (std::is_same_v) { - Ret.ConvertToDataT(); - } - return Ret; - } - } - - // OP is: &&, || - // vec operatorOP(const vec &Rhs) const; - // vec operatorOP(const DataT &Rhs) const; - - // OP is: ==, !=, <, >, <=, >= - // vec operatorOP(const vec &Rhs) const; - // vec operatorOP(const DataT &Rhs) const; private: // Generic method that execute "Operation" on underlying values. @@ -1346,6 +1011,7 @@ template class vec { } #endif // __SYCL_USE_EXT_VECTOR_TYPE__ +public: // setValue and getValue should be able to operate on different underlying // types: enum cl_float#N , builtin vector float#N, builtin type float. // These versions are for N > 1. @@ -1415,9 +1081,8 @@ template class vec { DataT getValue(int Index) const { return (NumElements == 1) ? getValue(Index, 0) : getValue(Index, 0.f); } - +private: // fields - // Alignment is the same as size, to a maximum size of 64. // detail::vector_alignment will return that value. alignas(detail::vector_alignment::value) DataType m_Data; @@ -1427,6 +1092,9 @@ template class vec { int... T5> friend class detail::SwizzleOp; template friend class vec; + // To allow arithmetic operators access private members of vec. + template friend class detail::vec_arith; + template friend class detail::vec_arith_common; }; ///////////////////////// class sycl::vec ///////////////////////// From b739c088c15142d46f955b7cc22d80144adc7e67 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Tue, 4 Jun 2024 08:35:09 -0700 Subject: [PATCH 08/18] Fix formatting; Address reviews --- sycl/include/sycl/detail/vector_arith.hpp | 62 ++++++++++++----------- sycl/include/sycl/vector_preview.hpp | 1 + 2 files changed, 33 insertions(+), 30 deletions(-) diff --git a/sycl/include/sycl/detail/vector_arith.hpp b/sycl/include/sycl/detail/vector_arith.hpp index d9dc881be122c..0bde183e624fb 100644 --- a/sycl/include/sycl/detail/vector_arith.hpp +++ b/sycl/include/sycl/detail/vector_arith.hpp @@ -46,7 +46,7 @@ using rel_t = typename std::conditional_t< #define BINOP_BASE(BINOP, OPASSIGN, CONVERT, COND) \ template \ friend std::enable_if_t<(COND), vec_t> operator BINOP(const vec_t & Lhs, \ - const vec_t & Rhs) { \ + const vec_t & Rhs) { \ vec_t Ret; \ if constexpr (vec_t::IsUsingArrayOnDevice) { \ for (size_t I = 0; I < NumElements; ++I) { \ @@ -65,11 +65,12 @@ using rel_t = typename std::conditional_t< #define BINOP_BASE(BINOP, OPASSIGN, CONVERT, COND) \ template \ friend std::enable_if_t<(COND), vec_t> operator BINOP(const vec_t & Lhs, \ - const vec_t & Rhs) { \ + const vec_t & Rhs) { \ vec_t Ret{}; \ for (size_t I = 0; I < NumElements; ++I) \ - Ret.setValue(I, (DataT)(vec_data::get(Lhs.getValue( \ - I)) BINOP vec_data::get(Rhs.getValue(I)))); \ + Ret.setValue(I, \ + (DataT)(vec_data::get(Lhs.getValue(I)) \ + BINOP vec_data::get(Rhs.getValue(I)))); \ return Ret; \ } #endif // __SYCL_DEVICE_ONLY__ @@ -79,17 +80,17 @@ using rel_t = typename std::conditional_t< \ template \ friend std::enable_if_t<(COND), vec_t> operator BINOP(const vec_t & Lhs, \ - const DataT & Rhs) { \ + const DataT & Rhs) { \ return Lhs BINOP vec_t(Rhs); \ } \ template \ friend std::enable_if_t<(COND), vec_t> operator BINOP(const DataT & Lhs, \ - const vec_t & Rhs) { \ + const vec_t & Rhs) { \ return vec_t(Lhs) BINOP Rhs; \ } \ template \ - friend std::enable_if_t<(COND), vec_t> &operator OPASSIGN(vec_t & Lhs, \ - const vec_t & Rhs) { \ + friend std::enable_if_t<(COND), vec_t> &operator OPASSIGN( \ + vec_t & Lhs, const vec_t & Rhs) { \ Lhs = Lhs BINOP Rhs; \ return Lhs; \ } \ @@ -108,7 +109,7 @@ using rel_t = typename std::conditional_t< * \ | / * \ | / * sycl::vec - * + * * vec_arith_common is the base class for vec_arith. It contains * the common math operators of sycl::vec for all types. * vec_arith is the derived class that contains the math operators @@ -119,14 +120,13 @@ template struct vec_helper; template class vec_arith : public vec_arith_common { -public: +protected: using vec_t = vec; using ocl_t = rel_t; template using vec_data = vec_helper; // operator!. - friend vec, NumElements> - operator!(const vec_t &Rhs) { + friend vec, NumElements> operator!(const vec_t &Rhs) { if constexpr (vec_t::IsUsingArrayOnDevice || vec_t::IsUsingArrayOnHost) { vec_t Ret{}; for (size_t I = 0; I < NumElements; ++I) { @@ -166,7 +166,8 @@ class vec_arith : public vec_arith_common { oneapi::bfloat16 w = -v; Ret.m_Data[I] = oneapi::detail::bfloat16ToBits(w); } - } else if constexpr (vec_t::IsUsingArrayOnDevice || vec_t::IsUsingArrayOnHost) { + } else if constexpr (vec_t::IsUsingArrayOnDevice || + vec_t::IsUsingArrayOnHost) { for (size_t I = 0; I < NumElements; ++I) Ret.setValue( I, vec_data::get(-vec_data::get(Lhs.getValue(I)))); @@ -185,11 +186,11 @@ class vec_arith : public vec_arith_common { #error "Undefine __SYCL_UOP macro" #endif #define __SYCL_UOP(UOP, OPASSIGN) \ - friend vec_t & operator UOP(vec_t & Rhs) { \ + friend vec_t &operator UOP(vec_t & Rhs) { \ Rhs OPASSIGN vec_data::get(1); \ return Rhs; \ } \ - friend vec_t operator UOP(vec_t & Lhs, int) { \ + friend vec_t operator UOP(vec_t &Lhs, int) { \ vec_t Ret(Lhs); \ Lhs OPASSIGN vec_data::get(1); \ return Ret; \ @@ -287,8 +288,7 @@ class vec_arith : public vec_arith_common { // The following OPs are available only when: DataT != cl_float && // DataT != cl_double && DataT != cl_half && DataT != BF16. - __SYCL_BINOP(%, %=, false, - (!detail::is_vgenfloat_v)) + __SYCL_BINOP(%, %=, false, (!detail::is_vgenfloat_v)) // Bitwise operations are allowed for std::byte. __SYCL_BINOP(|, |=, false, (!detail::is_vgenfloat_v)) __SYCL_BINOP(&, &=, false, (!detail::is_vgenfloat_v)) @@ -302,8 +302,11 @@ class vec_arith : public vec_arith_common { #if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) template -class vec_arith: public vec_arith_common { -public: +class vec_arith + : public vec_arith_common { +protected: + // NumElements can never be zero. Still using the redundant check to avoid + // incomplete type errors. using DataT = typename std::conditional_t; using vec_t = vec; template using vec_data = vec_helper; @@ -315,14 +318,14 @@ class vec_arith: public vec_arith_common // constexpr std::byte operator<<( std::byte b, IntegerType shift ) // noexcept; - friend vec_t operator <<(const vec_t & Lhs, int shift) { + friend vec_t operator<<(const vec_t &Lhs, int shift) { vec_t Ret; for (size_t I = 0; I < NumElements; ++I) { Ret[I] = Lhs[I] << shift; } return Ret; } - friend vec_t & operator <<(vec_t & Lhs, int shift) { + friend vec_t &operator<<=(vec_t &Lhs, int shift) { Lhs = Lhs << shift; return Lhs; } @@ -330,14 +333,14 @@ class vec_arith: public vec_arith_common // constexpr std::byte operator>>( std::byte b, IntegerType shift ) // noexcept; - friend vec_t operator >>(const vec_t & Lhs, int shift) { + friend vec_t operator>>(const vec_t &Lhs, int shift) { vec_t Ret; for (size_t I = 0; I < NumElements; ++I) { Ret[I] = Lhs[I] >> shift; } return Ret; } - friend vec_t & operator >>(vec_t & Lhs, int shift) { + friend vec_t &operator>>=(vec_t &Lhs, int shift) { Lhs = Lhs >> shift; return Lhs; } @@ -346,14 +349,13 @@ class vec_arith: public vec_arith_common friend class vec; + // friends + template friend class vec; }; #endif // (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) -template -class vec_arith_common { - public: +template class vec_arith_common { +protected: using vec_t = vec; // operator~() available only when: dataT != float && dataT != double @@ -376,7 +378,7 @@ class vec_arith_common { } } - //friends + // friends template friend class vec; }; @@ -385,4 +387,4 @@ class vec_arith_common { } // namespace detail } // namespace _V1 -} // namespace sycl \ No newline at end of file +} // namespace sycl diff --git a/sycl/include/sycl/vector_preview.hpp b/sycl/include/sycl/vector_preview.hpp index c46643f68d76d..dbaa9cff7d44a 100644 --- a/sycl/include/sycl/vector_preview.hpp +++ b/sycl/include/sycl/vector_preview.hpp @@ -1081,6 +1081,7 @@ class vec : public detail::vec_arith { DataT getValue(int Index) const { return (NumElements == 1) ? getValue(Index, 0) : getValue(Index, 0.f); } + private: // fields // Alignment is the same as size, to a maximum size of 64. From 19aa68d0ed45f1d7c2e94772031368b84881d257 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Thu, 6 Jun 2024 12:22:36 -0700 Subject: [PATCH 09/18] Expose getters and setters to BINOPS via a class in ::detail --- sycl/include/sycl/detail/vector_arith.hpp | 50 ++++++++++++++++------- sycl/include/sycl/vector_preview.hpp | 25 ++++++++++-- 2 files changed, 58 insertions(+), 17 deletions(-) diff --git a/sycl/include/sycl/detail/vector_arith.hpp b/sycl/include/sycl/detail/vector_arith.hpp index 0bde183e624fb..67e5a13b1efcd 100644 --- a/sycl/include/sycl/detail/vector_arith.hpp +++ b/sycl/include/sycl/detail/vector_arith.hpp @@ -25,6 +25,8 @@ template class vec; namespace detail { +template class VecGetterSetter; + // Element type for relational operator return value. template using rel_t = typename std::conditional_t< @@ -50,7 +52,10 @@ using rel_t = typename std::conditional_t< vec_t Ret; \ if constexpr (vec_t::IsUsingArrayOnDevice) { \ for (size_t I = 0; I < NumElements; ++I) { \ - Ret.setValue(I, (Lhs.getValue(I) BINOP Rhs.getValue(I))); \ + detail::VecGetterSetter::setValue( \ + Ret, I, \ + (detail::VecGetterSetter::getValue(Lhs, I) \ + BINOP detail::VecGetterSetter::getValue(Rhs, I))); \ } \ } else { \ Ret.m_Data = Lhs.m_Data BINOP Rhs.m_Data; \ @@ -68,9 +73,12 @@ using rel_t = typename std::conditional_t< const vec_t & Rhs) { \ vec_t Ret{}; \ for (size_t I = 0; I < NumElements; ++I) \ - Ret.setValue(I, \ - (DataT)(vec_data::get(Lhs.getValue(I)) \ - BINOP vec_data::get(Rhs.getValue(I)))); \ + detail::VecGetterSetter::setValue( \ + Ret, I, \ + (DataT)(vec_data::get( \ + detail::VecGetterSetter::getValue(Lhs, I)) \ + BINOP vec_data::get( \ + detail::VecGetterSetter::getValue(Rhs, I)))); \ return Ret; \ } #endif // __SYCL_DEVICE_ONLY__ @@ -130,7 +138,10 @@ class vec_arith : public vec_arith_common { if constexpr (vec_t::IsUsingArrayOnDevice || vec_t::IsUsingArrayOnHost) { vec_t Ret{}; for (size_t I = 0; I < NumElements; ++I) { - Ret.setValue(I, !vec_data::get(Rhs.getValue(I))); + detail::VecGetterSetter::setValue( + Ret, I, + !vec_data::get( + detail::VecGetterSetter::getValue(Rhs, I))); } return Ret.template as, NumElements>>(); } else { @@ -144,8 +155,10 @@ class vec_arith : public vec_arith_common { if constexpr (vec_t::IsUsingArrayOnDevice || vec_t::IsUsingArrayOnHost) { vec_t Ret{}; for (size_t I = 0; I < NumElements; ++I) - Ret.setValue( - I, vec_data::get(+vec_data::get(Lhs.getValue(I)))); + detail::VecGetterSetter::setValue( + Ret, I, + vec_data::get(+vec_data::get( + detail::VecGetterSetter::getValue(Lhs, I)))); return Ret; } else { return vec_t{+Lhs.m_Data}; @@ -169,8 +182,10 @@ class vec_arith : public vec_arith_common { } else if constexpr (vec_t::IsUsingArrayOnDevice || vec_t::IsUsingArrayOnHost) { for (size_t I = 0; I < NumElements; ++I) - Ret.setValue( - I, vec_data::get(-vec_data::get(Lhs.getValue(I)))); + detail::VecGetterSetter::setValue( + Ret, I, + vec_data::get(-vec_data::get( + detail::VecGetterSetter::getValue(Lhs, I)))); return Ret; } else { Ret = vec_t{-Lhs.m_Data}; @@ -222,8 +237,11 @@ class vec_arith : public vec_arith_common { for (size_t I = 0; I < NumElements; ++I) { \ /* We cannot use SetValue here as the operator is not a friend of*/ \ /* Ret on Windows. */ \ - Ret[I] = static_cast(-(vec_data::get( \ - Lhs.getValue(I)) RELLOGOP vec_data::get(Rhs.getValue(I)))); \ + Ret[I] = static_cast( \ + -(vec_data::get( \ + detail::VecGetterSetter::getValue(Lhs, I)) \ + RELLOGOP vec_data::get( \ + detail::VecGetterSetter::getValue(Rhs, I)))); \ } \ } else { \ Ret = vec( \ @@ -243,8 +261,11 @@ class vec_arith : public vec_arith_common { for (size_t I = 0; I < NumElements; ++I) { \ /* We cannot use SetValue here as the operator is not a friend of*/ \ /* Ret on Windows. */ \ - Ret[I] = static_cast(-(vec_data::get( \ - Lhs.getValue(I)) RELLOGOP vec_data::get(Rhs.getValue(I)))); \ + Ret[I] = static_cast( \ + -(vec_data::get( \ + detail::VecGetterSetter::getValue(Lhs, I)) \ + RELLOGOP vec_data::get( \ + detail::VecGetterSetter::getValue(Rhs, I)))); \ } \ return Ret; \ } @@ -366,7 +387,8 @@ template class vec_arith_common { if constexpr (vec_t::IsUsingArrayOnDevice || vec_t::IsUsingArrayOnHost) { vec_t Ret{}; for (size_t I = 0; I < NumElements; ++I) { - Ret.setValue(I, ~Rhs.getValue(I)); + detail::VecGetterSetter::setValue( + Ret, I, ~detail::VecGetterSetter::getValue(Rhs, I)); } return Ret; } else { diff --git a/sycl/include/sycl/vector_preview.hpp b/sycl/include/sycl/vector_preview.hpp index dbaa9cff7d44a..e159ea2454371 100644 --- a/sycl/include/sycl/vector_preview.hpp +++ b/sycl/include/sycl/vector_preview.hpp @@ -39,7 +39,7 @@ #include // for memcpy #include // for is_contained #include // for is_floating_point -#include // for vec_arith_common and vec_arith +#include // for vec_arith_common and vec_arith #include // for convertImpl #include // for vector_alignment #include // for StorageT, half, Vec16... @@ -335,6 +335,26 @@ __SYCL_DEFINE_BF16_VECSTORAGE(4) __SYCL_DEFINE_BF16_VECSTORAGE(8) __SYCL_DEFINE_BF16_VECSTORAGE(16) #undef __SYCL_DEFINE_BF16_VECSTORAGE + +// FIXME: Remove this class after eliminating setValue() and getValue() +// dependencies from math operations on sycl::vec. +// This class is a friend of sycl::vec and exposes getValue/setValue +// that are used by sycl::vec math operations. +template class VecGetterSetter { +public: + template + constexpr static void setValue(VecT &v, int Index, const DataT &Value) { + if (N == 1) + v.setValue(Index, Value, 0); + else + v.setValue(Index, Value, 0.f); + } + + template + static DataT getValue(VecT v, int Index) { + return (N == 1) ? v.getValue(Index, 0) : v.getValue(Index, 0.f); + } +}; } // namespace detail template using vec_data = detail::vec_helper; @@ -1011,7 +1031,6 @@ class vec : public detail::vec_arith { } #endif // __SYCL_USE_EXT_VECTOR_TYPE__ -public: // setValue and getValue should be able to operate on different underlying // types: enum cl_float#N , builtin vector float#N, builtin type float. // These versions are for N > 1. @@ -1082,7 +1101,6 @@ class vec : public detail::vec_arith { return (NumElements == 1) ? getValue(Index, 0) : getValue(Index, 0.f); } -private: // fields // Alignment is the same as size, to a maximum size of 64. // detail::vector_alignment will return that value. @@ -1096,6 +1114,7 @@ class vec : public detail::vec_arith { // To allow arithmetic operators access private members of vec. template friend class detail::vec_arith; template friend class detail::vec_arith_common; + template friend class detail::VecGetterSetter; }; ///////////////////////// class sycl::vec ///////////////////////// From 0d7488674b7f2a7f6b12c203f7b5fa4c4eec90b8 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Fri, 7 Jun 2024 07:49:31 -0700 Subject: [PATCH 10/18] Address reviews --- sycl/include/sycl/detail/vector_arith.hpp | 41 +++++++++++------------ sycl/include/sycl/vector_preview.hpp | 6 ++-- 2 files changed, 22 insertions(+), 25 deletions(-) diff --git a/sycl/include/sycl/detail/vector_arith.hpp b/sycl/include/sycl/detail/vector_arith.hpp index 67e5a13b1efcd..fb92a77389d7c 100644 --- a/sycl/include/sycl/detail/vector_arith.hpp +++ b/sycl/include/sycl/detail/vector_arith.hpp @@ -25,7 +25,7 @@ template class vec; namespace detail { -template class VecGetterSetter; +template class VecAccess; // Element type for relational operator return value. template @@ -52,10 +52,10 @@ using rel_t = typename std::conditional_t< vec_t Ret; \ if constexpr (vec_t::IsUsingArrayOnDevice) { \ for (size_t I = 0; I < NumElements; ++I) { \ - detail::VecGetterSetter::setValue( \ + detail::VecAccess::setValue( \ Ret, I, \ - (detail::VecGetterSetter::getValue(Lhs, I) \ - BINOP detail::VecGetterSetter::getValue(Rhs, I))); \ + (detail::VecAccess::getValue(Lhs, I) \ + BINOP detail::VecAccess::getValue(Rhs, I))); \ } \ } else { \ Ret.m_Data = Lhs.m_Data BINOP Rhs.m_Data; \ @@ -73,12 +73,12 @@ using rel_t = typename std::conditional_t< const vec_t & Rhs) { \ vec_t Ret{}; \ for (size_t I = 0; I < NumElements; ++I) \ - detail::VecGetterSetter::setValue( \ + detail::VecAccess::setValue( \ Ret, I, \ (DataT)(vec_data::get( \ - detail::VecGetterSetter::getValue(Lhs, I)) \ + detail::VecAccess::getValue(Lhs, I)) \ BINOP vec_data::get( \ - detail::VecGetterSetter::getValue(Rhs, I)))); \ + detail::VecAccess::getValue(Rhs, I)))); \ return Ret; \ } #endif // __SYCL_DEVICE_ONLY__ @@ -138,10 +138,9 @@ class vec_arith : public vec_arith_common { if constexpr (vec_t::IsUsingArrayOnDevice || vec_t::IsUsingArrayOnHost) { vec_t Ret{}; for (size_t I = 0; I < NumElements; ++I) { - detail::VecGetterSetter::setValue( + detail::VecAccess::setValue( Ret, I, - !vec_data::get( - detail::VecGetterSetter::getValue(Rhs, I))); + !vec_data::get(detail::VecAccess::getValue(Rhs, I))); } return Ret.template as, NumElements>>(); } else { @@ -155,10 +154,10 @@ class vec_arith : public vec_arith_common { if constexpr (vec_t::IsUsingArrayOnDevice || vec_t::IsUsingArrayOnHost) { vec_t Ret{}; for (size_t I = 0; I < NumElements; ++I) - detail::VecGetterSetter::setValue( + detail::VecAccess::setValue( Ret, I, vec_data::get(+vec_data::get( - detail::VecGetterSetter::getValue(Lhs, I)))); + detail::VecAccess::getValue(Lhs, I)))); return Ret; } else { return vec_t{+Lhs.m_Data}; @@ -182,10 +181,10 @@ class vec_arith : public vec_arith_common { } else if constexpr (vec_t::IsUsingArrayOnDevice || vec_t::IsUsingArrayOnHost) { for (size_t I = 0; I < NumElements; ++I) - detail::VecGetterSetter::setValue( + detail::VecAccess::setValue( Ret, I, vec_data::get(-vec_data::get( - detail::VecGetterSetter::getValue(Lhs, I)))); + detail::VecAccess::getValue(Lhs, I)))); return Ret; } else { Ret = vec_t{-Lhs.m_Data}; @@ -238,10 +237,9 @@ class vec_arith : public vec_arith_common { /* We cannot use SetValue here as the operator is not a friend of*/ \ /* Ret on Windows. */ \ Ret[I] = static_cast( \ - -(vec_data::get( \ - detail::VecGetterSetter::getValue(Lhs, I)) \ + -(vec_data::get(detail::VecAccess::getValue(Lhs, I)) \ RELLOGOP vec_data::get( \ - detail::VecGetterSetter::getValue(Rhs, I)))); \ + detail::VecAccess::getValue(Rhs, I)))); \ } \ } else { \ Ret = vec( \ @@ -262,10 +260,9 @@ class vec_arith : public vec_arith_common { /* We cannot use SetValue here as the operator is not a friend of*/ \ /* Ret on Windows. */ \ Ret[I] = static_cast( \ - -(vec_data::get( \ - detail::VecGetterSetter::getValue(Lhs, I)) \ + -(vec_data::get(detail::VecAccess::getValue(Lhs, I)) \ RELLOGOP vec_data::get( \ - detail::VecGetterSetter::getValue(Rhs, I)))); \ + detail::VecAccess::getValue(Rhs, I)))); \ } \ return Ret; \ } @@ -387,8 +384,8 @@ template class vec_arith_common { if constexpr (vec_t::IsUsingArrayOnDevice || vec_t::IsUsingArrayOnHost) { vec_t Ret{}; for (size_t I = 0; I < NumElements; ++I) { - detail::VecGetterSetter::setValue( - Ret, I, ~detail::VecGetterSetter::getValue(Rhs, I)); + detail::VecAccess::setValue( + Ret, I, ~detail::VecAccess::getValue(Rhs, I)); } return Ret; } else { diff --git a/sycl/include/sycl/vector_preview.hpp b/sycl/include/sycl/vector_preview.hpp index e159ea2454371..6be5923d97ba0 100644 --- a/sycl/include/sycl/vector_preview.hpp +++ b/sycl/include/sycl/vector_preview.hpp @@ -39,7 +39,7 @@ #include // for memcpy #include // for is_contained #include // for is_floating_point -#include // for vec_arith_common and vec_arith +#include // for vec_arith_common and vec_arith #include // for convertImpl #include // for vector_alignment #include // for StorageT, half, Vec16... @@ -340,7 +340,7 @@ __SYCL_DEFINE_BF16_VECSTORAGE(16) // dependencies from math operations on sycl::vec. // This class is a friend of sycl::vec and exposes getValue/setValue // that are used by sycl::vec math operations. -template class VecGetterSetter { +template class VecAccess { public: template constexpr static void setValue(VecT &v, int Index, const DataT &Value) { @@ -1114,7 +1114,7 @@ class vec : public detail::vec_arith { // To allow arithmetic operators access private members of vec. template friend class detail::vec_arith; template friend class detail::vec_arith_common; - template friend class detail::VecGetterSetter; + template friend class detail::VecAccess; }; ///////////////////////// class sycl::vec ///////////////////////// From 3b34c2cf22c54bcc13baa6d0fa465697a4158772 Mon Sep 17 00:00:00 2001 From: "Agarwal, Udit" Date: Fri, 7 Jun 2024 11:08:53 -0700 Subject: [PATCH 11/18] Minor fixes --- sycl/include/sycl/detail/vector_arith.hpp | 17 ++++- sycl/include/sycl/vector_preview.hpp | 78 +++++++++++++---------- 2 files changed, 57 insertions(+), 38 deletions(-) diff --git a/sycl/include/sycl/detail/vector_arith.hpp b/sycl/include/sycl/detail/vector_arith.hpp index fb92a77389d7c..e08e8f7b9c46b 100644 --- a/sycl/include/sycl/detail/vector_arith.hpp +++ b/sycl/include/sycl/detail/vector_arith.hpp @@ -60,7 +60,7 @@ using rel_t = typename std::conditional_t< } else { \ Ret.m_Data = Lhs.m_Data BINOP Rhs.m_Data; \ if constexpr (std::is_same_v && CONVERT) { \ - Ret.ConvertToDataT(); \ + vec_arith_common::ConvertToDataT(Ret); \ } \ } \ return Ret; \ @@ -189,7 +189,7 @@ class vec_arith : public vec_arith_common { } else { Ret = vec_t{-Lhs.m_Data}; if constexpr (std::is_same_v) { - Ret.ConvertToDataT(); + vec_arith_common::ConvertToDataT(Ret); } return Ret; } @@ -391,12 +391,23 @@ template class vec_arith_common { } else { vec_t Ret{(typename vec_t::DataType) ~Rhs.m_Data}; if constexpr (std::is_same_v) { - Ret.ConvertToDataT(); + vec_arith_common::ConvertToDataT(Ret); } return Ret; } } +#ifdef __SYCL_DEVICE_ONLY__ + using vec_bool_t = vec; + // Require only for std::bool. + static void ConvertToDataT(vec_bool_t &Ret) { + for (size_t i = 0; i < NumElements; ++i) { + DataT tmp = detail::VecAccess::getValue(Ret, i); + detail::VecAccess::setValue(Ret, i, tmp); + } + } +#endif + // friends template friend class vec; }; diff --git a/sycl/include/sycl/vector_preview.hpp b/sycl/include/sycl/vector_preview.hpp index 6be5923d97ba0..05687338928c5 100644 --- a/sycl/include/sycl/vector_preview.hpp +++ b/sycl/include/sycl/vector_preview.hpp @@ -26,10 +26,6 @@ #error "SYCL device compiler is built without ext_vector_type support" #endif -#if defined(__SYCL_DEVICE_ONLY__) -#define __SYCL_USE_EXT_VECTOR_TYPE__ -#endif - #include // for decorated, address_space #include // for half, cl_char, cl_int #include // for ArrayCreator, RepeatV... @@ -47,7 +43,7 @@ #include // bfloat16 #include // for array -#include // for assert +#include // for assert #include // for size_t, NULL, byte #include // for uint8_t, int16_t, int... #include // for divides, multiplies @@ -363,18 +359,28 @@ template using vec_data_t = typename detail::vec_helper::RetType; ///////////////////////// class sycl::vec ///////////////////////// -/// Provides a cross-patform vector class template that works efficiently on -/// SYCL devices as well as in host C++ code. -/// -/// \ingroup sycl_api +// Provides a cross-patform vector class template that works efficiently on +// SYCL devices as well as in host C++ code. template class vec : public detail::vec_arith { using DataT = Type; + static constexpr size_t AdjustedNum = (NumElements == 3) ? 4 : NumElements; + // This represent type of underlying value. There should be only one field // in the class, so vec should be equal to float16 in memory. using DataType = typename detail::VecStorage::DataType; +public: +#ifdef __SYCL_DEVICE_ONLY__ + // Type used for passing sycl::vec to SPIRV builtins. + // We can not use ext_vector_type(1) as it's not supported by SPIRV + // plugins (CTS fails). + using vector_t = + typename detail::VecStorage::VectorDataType; +#endif // __SYCL_DEVICE_ONLY__ + +private: static constexpr bool IsHostHalf = std::is_same_v && std::is_same_v { static constexpr bool IsBfloat16 = std::is_same_v; - static constexpr size_t AdjustedNum = (NumElements == 3) ? 4 : NumElements; static constexpr size_t Sz = sizeof(DataT) * AdjustedNum; static constexpr bool IsSizeGreaterThanMaxAlign = (Sz > detail::MaxVecAlignment); @@ -456,6 +461,8 @@ class vec : public detail::vec_arith { } template static constexpr auto FlattenVecArgHelper(const T &A) { + // static_cast required to avoid narrowing conversion warning + // when T = unsigned long int and DataT_ = int. return std::array{vec_data::get(static_cast(A))}; } template struct FlattenVecArg { @@ -551,6 +558,7 @@ class vec : public detail::vec_arith { using EnableIfSuitableNumElements = typename std::enable_if_t::value>; + // Implementation detail for the next public ctor. template constexpr vec(const std::array, NumElements> &Arr, std::index_sequence) @@ -562,14 +570,13 @@ class vec : public detail::vec_arith { })(Arr[Is])...} {} public: + // Aliases required by SPEC to make sycl::vec consistent + // with that of marray and buffer. using element_type = DataT; using value_type = DataT; using rel_t = detail::rel_t; -#ifdef __SYCL_DEVICE_ONLY__ - using vector_t = - typename detail::VecStorage::VectorDataType; -#endif // __SYCL_DEVICE_ONLY__ + /****************** Constructors **************/ vec() = default; constexpr vec(const vec &Rhs) = default; @@ -587,7 +594,7 @@ class vec : public detail::vec_arith { return *this; } -#ifdef __SYCL_USE_EXT_VECTOR_TYPE__ +#ifdef __SYCL_DEVICE_ONLY__ template using EnableIfNotHostHalf = typename std::enable_if_t; @@ -601,7 +608,7 @@ class vec : public detail::vec_arith { template using EnableIfNotUsingArrayOnDevice = typename std::enable_if_t; -#endif // __SYCL_USE_EXT_VECTOR_TYPE__ +#endif // __SYCL_DEVICE_ONLY__ template using EnableIfUsingArray = @@ -612,7 +619,7 @@ class vec : public detail::vec_arith { typename std::enable_if_t; -#ifdef __SYCL_USE_EXT_VECTOR_TYPE__ +#ifdef __SYCL_DEVICE_ONLY__ template explicit constexpr vec(const EnableIfNotUsingArrayOnDevice &arg) @@ -645,12 +652,17 @@ class vec : public detail::vec_arith { } return *this; } -#else // __SYCL_USE_EXT_VECTOR_TYPE__ +#else // __SYCL_DEVICE_ONLY__ explicit constexpr vec(const DataT &arg) : vec{detail::RepeatValue( static_cast>(arg)), std::make_index_sequence()} {} + /****************** Assignment Operators **************/ + + // Template required to prevent ambiguous overload with the copy assignment + // when NumElements == 1. The template prevents implicit conversion from + // vec<_, 1> to DataT. template typename std::enable_if_t< std::is_fundamental_v> || @@ -662,9 +674,9 @@ class vec : public detail::vec_arith { } return *this; } -#endif // __SYCL_USE_EXT_VECTOR_TYPE__ +#endif // __SYCL_DEVICE_ONLY__ -#ifdef __SYCL_USE_EXT_VECTOR_TYPE__ +#ifdef __SYCL_DEVICE_ONLY__ // Optimized naive constructors with NumElements of DataT values. // We don't expect compilers to optimize vararg recursive functions well. @@ -713,7 +725,7 @@ class vec : public detail::vec_arith { vec_data::get(ArgA), vec_data::get(ArgB), vec_data::get(ArgC), vec_data::get(ArgD), vec_data::get(ArgE), vec_data::get(ArgF)} {} -#endif // __SYCL_USE_EXT_VECTOR_TYPE__ +#endif // __SYCL_DEVICE_ONLY__ // Constructor from values of base type or vec of base type. Checks that // base types are match and that the NumElements == sum of lengths of args. @@ -736,6 +748,10 @@ class vec : public detail::vec_arith { } } + /* Available only when: compiled for the device. + * Converts this SYCL vec instance to the underlying backend-native vector + * type defined by vector_t. + */ operator vector_t() const { if constexpr (!IsUsingArrayOnDevice) { return m_Data; @@ -986,17 +1002,9 @@ class vec : public detail::vec_arith { store(Offset, MultiPtr); } - void ConvertToDataT() { - for (size_t i = 0; i < NumElements; ++i) { - DataT tmp = getValue(i); - setValue(i, tmp); - } - } - private: // Generic method that execute "Operation" on underlying values. - -#ifdef __SYCL_USE_EXT_VECTOR_TYPE__ +#ifdef __SYCL_DEVICE_ONLY__ template