diff --git a/sycl/include/sycl/sub_group.hpp b/sycl/include/sycl/sub_group.hpp index 7cdae68724788..758fb7bda27b6 100644 --- a/sycl/include/sycl/sub_group.hpp +++ b/sycl/include/sycl/sub_group.hpp @@ -210,6 +210,64 @@ struct sub_group { #endif } + template + using EnableIfIsScalarArithmetic = + std::enable_if_t::value, T>; + + /* --- one-input shuffles --- */ + /* indices in [0 , sub_group size) */ + template + __SYCL_DEPRECATED("Shuffles in the sub-group class are deprecated.") + T shuffle(T x, id_type local_id) const { +#ifdef __SYCL_DEVICE_ONLY__ + return sycl::detail::spirv::Shuffle(*this, x, local_id); +#else + (void)x; + (void)local_id; + throw sycl::exception(make_error_code(errc::feature_not_supported), + "Sub-groups are not supported on host."); +#endif + } + + template + __SYCL_DEPRECATED("Shuffles in the sub-group class are deprecated.") + T shuffle_down(T x, uint32_t delta) const { +#ifdef __SYCL_DEVICE_ONLY__ + return sycl::detail::spirv::ShuffleDown(*this, x, delta); +#else + (void)x; + (void)delta; + throw sycl::exception(make_error_code(errc::feature_not_supported), + "Sub-groups are not supported on host."); +#endif + } + + template + __SYCL_DEPRECATED("Shuffles in the sub-group class are deprecated.") + T shuffle_up(T x, uint32_t delta) const { +#ifdef __SYCL_DEVICE_ONLY__ + return sycl::detail::spirv::ShuffleUp(*this, x, delta); +#else + (void)x; + (void)delta; + throw sycl::exception(make_error_code(errc::feature_not_supported), + "Sub-groups are not supported on host."); +#endif + } + + template + __SYCL_DEPRECATED("Shuffles in the sub-group class are deprecated.") + T shuffle_xor(T x, id_type value) const { +#ifdef __SYCL_DEVICE_ONLY__ + return sycl::detail::spirv::ShuffleXor(*this, x, value); +#else + (void)x; + (void)value; + throw sycl::exception(make_error_code(errc::feature_not_supported), + "Sub-groups are not supported on host."); +#endif + } + /* --- sub_group load/stores --- */ /* these can map to SIMD or block read/write hardware where available */ #ifdef __SYCL_DEVICE_ONLY__ diff --git a/sycl/test-e2e/SubGroup/generic-shuffle.cpp b/sycl/test-e2e/SubGroup/generic-shuffle.cpp new file mode 100644 index 0000000000000..d799a7a64de5e --- /dev/null +++ b/sycl/test-e2e/SubGroup/generic-shuffle.cpp @@ -0,0 +1,240 @@ +// RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out +// RUN: %{run} %t.out +// +//==-- generic_shuffle.cpp - SYCL sub_group generic shuffle test *- C++ -*--==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "helper.hpp" +#include +#include +#include +#include +template class pointer_kernel; + +using namespace sycl; + +template +void check_pointer(queue &Queue, size_t G = 256, size_t L = 64) { + try { + nd_range<1> NdRange(G, L); + buffer buf(G); + buffer buf_up(G); + buffer buf_down(G); + buffer buf_xor(G); + buffer sgsizebuf(1); + Queue.submit([&](handler &cgh) { + auto acc = buf.template get_access(cgh); + auto acc_up = buf_up.template get_access(cgh); + auto acc_down = + buf_down.template get_access(cgh); + auto acc_xor = buf_xor.template get_access(cgh); + auto sgsizeacc = sgsizebuf.get_access(cgh); + + cgh.parallel_for( + NdRange, [=](nd_item<1> NdItem) { + sycl::sub_group SG = NdItem.get_sub_group(); + uint32_t wggid = NdItem.get_global_id(0); + uint32_t sgid = SG.get_group_id().get(0); + if (wggid == 0) + sgsizeacc[0] = SG.get_max_local_range()[0]; + + T *ptr = static_cast(0x0) + wggid; + + /*GID of middle element in every subgroup*/ + acc[NdItem.get_global_id()] = + SG.shuffle(ptr, SG.get_max_local_range()[0] / 2); + + /* Save GID-SGID */ + acc_up[NdItem.get_global_id()] = SG.shuffle_up(ptr, sgid); + + /* Save GID+SGID */ + acc_down[NdItem.get_global_id()] = SG.shuffle_down(ptr, sgid); + + /* Save GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */ + acc_xor[NdItem.get_global_id()] = + SG.shuffle_xor(ptr, sgid % SG.get_max_local_range()[0]); + }); + }); + host_accessor acc(buf); + host_accessor acc_up(buf_up); + host_accessor acc_down(buf_down); + host_accessor acc_xor(buf_xor); + host_accessor sgsizeacc(sgsizebuf); + + size_t sg_size = sgsizeacc[0]; + int SGid = 0; + int SGLid = 0; + int SGBeginGid = 0; + for (int j = 0; j < G; j++) { + if (j % L % sg_size == 0) { + SGid++; + SGLid = 0; + SGBeginGid = j; + } + if (j % L == 0) { + SGid = 0; + SGLid = 0; + SGBeginGid = j; + } + + /*GID of middle element in every subgroup*/ + exit_if_not_equal(acc[j], + static_cast(0x0) + + (j / L * L + SGid * sg_size + sg_size / 2), + "shuffle"); + + /* Value GID+SGID for all element except last SGID in SG*/ + if (j % L % sg_size + SGid < sg_size && j % L + SGid < L) { + exit_if_not_equal(acc_down[j], static_cast(0x0) + (j + SGid), + "shuffle_down"); + } + + /* Value GID-SGID for all element except first SGID in SG*/ + if (j % L % sg_size >= SGid) { + exit_if_not_equal(acc_up[j], static_cast(0x0) + (j - SGid), + "shuffle_up"); + } + + /* Value GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */ + exit_if_not_equal(acc_xor[j], + static_cast(0x0) + + (SGBeginGid + (SGLid ^ (SGid % sg_size))), + "shuffle_xor"); + SGLid++; + } + } catch (exception e) { + std::cout << "SYCL exception caught: " << e.what(); + exit(1); + } +} + +template +void check_struct(queue &Queue, Generator &Gen, size_t G = 256, size_t L = 64) { + + // Fill a vector with values that will be shuffled + std::vector values(G); + std::generate(values.begin(), values.end(), Gen); + + try { + nd_range<1> NdRange(G, L); + buffer buf(G); + buffer buf_up(G); + buffer buf_down(G); + buffer buf_xor(G); + buffer sgsizebuf(1); + buffer buf_in(values.data(), values.size()); + Queue.submit([&](handler &cgh) { + auto acc = buf.template get_access(cgh); + auto acc_up = buf_up.template get_access(cgh); + auto acc_down = + buf_down.template get_access(cgh); + auto acc_xor = buf_xor.template get_access(cgh); + auto sgsizeacc = sgsizebuf.get_access(cgh); + auto in = buf_in.template get_access(cgh); + + cgh.parallel_for( + NdRange, [=](nd_item<1> NdItem) { + sycl::sub_group SG = NdItem.get_sub_group(); + uint32_t wggid = NdItem.get_global_id(0); + uint32_t sgid = SG.get_group_id().get(0); + if (wggid == 0) + sgsizeacc[0] = SG.get_max_local_range()[0]; + + T val = in[wggid]; + + /*GID of middle element in every subgroup*/ + acc[NdItem.get_global_id()] = + SG.shuffle(val, SG.get_max_local_range()[0] / 2); + + /* Save GID-SGID */ + acc_up[NdItem.get_global_id()] = SG.shuffle_up(val, sgid); + + /* Save GID+SGID */ + acc_down[NdItem.get_global_id()] = SG.shuffle_down(val, sgid); + + /* Save GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */ + acc_xor[NdItem.get_global_id()] = + SG.shuffle_xor(val, sgid % SG.get_max_local_range()[0]); + }); + }); + host_accessor acc(buf); + host_accessor acc_up(buf_up); + host_accessor acc_down(buf_down); + host_accessor acc_xor(buf_xor); + host_accessor sgsizeacc(sgsizebuf); + + size_t sg_size = sgsizeacc[0]; + int SGid = 0; + int SGLid = 0; + int SGBeginGid = 0; + for (int j = 0; j < G; j++) { + if (j % L % sg_size == 0) { + SGid++; + SGLid = 0; + SGBeginGid = j; + } + if (j % L == 0) { + SGid = 0; + SGLid = 0; + SGBeginGid = j; + } + + /*GID of middle element in every subgroup*/ + exit_if_not_equal( + acc[j], values[j / L * L + SGid * sg_size + sg_size / 2], "shuffle"); + + /* Value GID+SGID for all element except last SGID in SG*/ + if (j % L % sg_size + SGid < sg_size && j % L + SGid < L) { + exit_if_not_equal(acc_down[j], values[j + SGid], "shuffle_down"); + } + + /* Value GID-SGID for all element except first SGID in SG*/ + if (j % L % sg_size >= SGid) { + exit_if_not_equal(acc_up[j], values[j - SGid], "shuffle_up"); + } + + /* Value GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */ + exit_if_not_equal(acc_xor[j], + values[SGBeginGid + (SGLid ^ (SGid % sg_size))], + "shuffle_xor"); + SGLid++; + } + } catch (exception e) { + std::cout << "SYCL exception caught: " << e.what(); + exit(1); + } +} + +int main() { + queue Queue; + + // Test shuffle of pointer types + check_pointer(Queue); + + // Test shuffle of non-native types + auto ComplexFloatGenerator = [state = std::complex(0, 1)]() mutable { + return state += std::complex(2, 2); + }; + check_struct>( + Queue, ComplexFloatGenerator); + + if (Queue.get_device().has(sycl::aspect::fp64)) { + auto ComplexDoubleGenerator = [state = + std::complex(0, 1)]() mutable { + return state += std::complex(2, 2); + }; + check_struct>( + Queue, ComplexDoubleGenerator); + } else { + std::cout << "fp64 tests were skipped due to the device not supporting the " + "aspect."; + } + + std::cout << "Test passed." << std::endl; + return 0; +} diff --git a/sycl/test-e2e/SubGroup/shuffle.cpp b/sycl/test-e2e/SubGroup/shuffle.cpp new file mode 100644 index 0000000000000..2baf624ce804c --- /dev/null +++ b/sycl/test-e2e/SubGroup/shuffle.cpp @@ -0,0 +1,54 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +//==------------ shuffle.cpp - SYCL sub_group shuffle test -----*- C++ -*---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "shuffle.hpp" +#include + +int main() { + queue Queue; + check(Queue); + check(Queue); + check(Queue); + check(Queue); + check(Queue); + check(Queue); + check(Queue); + check(Queue); + check(Queue); + check(Queue); + check(Queue); + check(Queue); + check(Queue); + check(Queue); + check(Queue); + check(Queue); + check(Queue); + check(Queue); + check(Queue); + check(Queue); + check(Queue); + check(Queue); + check(Queue); + check(Queue); + check(Queue); + check(Queue); + check(Queue); + + // Check long long and unsigned long long because they differ from + // long and unsigned long according to C++ rules even if they have the same + // size at some system. + check(Queue); + check(Queue); + check(Queue); + check(Queue); + std::cout << "Test passed." << std::endl; + return 0; +} diff --git a/sycl/test-e2e/SubGroup/shuffle_fp16.cpp b/sycl/test-e2e/SubGroup/shuffle_fp16.cpp new file mode 100644 index 0000000000000..b2fea85d0126f --- /dev/null +++ b/sycl/test-e2e/SubGroup/shuffle_fp16.cpp @@ -0,0 +1,26 @@ +// REQUIRES: aspect-fp16 +// REQUIRES: gpu + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "shuffle.hpp" +#include + +int main() { + queue Queue; + check(Queue); + check(Queue); + check(Queue); + check(Queue); + check(Queue); + std::cout << "Test passed." << std::endl; + return 0; +} diff --git a/sycl/test-e2e/SubGroup/shuffle_fp64.cpp b/sycl/test-e2e/SubGroup/shuffle_fp64.cpp new file mode 100644 index 0000000000000..1440de1d25a8a --- /dev/null +++ b/sycl/test-e2e/SubGroup/shuffle_fp64.cpp @@ -0,0 +1,25 @@ +// REQUIRES: aspect-fp64 +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// +//==------- shuffle_fp64.cpp - SYCL sub_group shuffle test -----*- C++ -*---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "shuffle.hpp" +#include + +int main() { + queue Queue; + check(Queue); + check(Queue); + check(Queue); + check(Queue); + check(Queue); + std::cout << "Test passed." << std::endl; + return 0; +}