Skip to content

Commit

Permalink
[SYCL][COMPAT] nd_range experimental barriers seq_cst by default
Browse files Browse the repository at this point in the history
on devices with support

Not supported in AMDGPU nor NVPTX for now
  • Loading branch information
Alcpz committed Apr 9, 2024
1 parent a0c3b32 commit 59beb7e
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 58 deletions.
17 changes: 13 additions & 4 deletions sycl/include/syclcompat/util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,15 @@ inline int get_sycl_language_version() {
}

namespace experimental {

#if defined(__AMDGPU__) || defined(__NVPTX__)
// FIXME: https://github.com/intel/llvm/pull/12516 adds seq_cst support for the
// CUDA backend.
constexpr sycl::memory_order barrier_memory_order = sycl::memory_order::acq_rel;
#else
constexpr sycl::memory_order barrier_memory_order = sycl::memory_order::seq_cst;
#endif

/// Synchronize work items from all work groups within a SYCL kernel.
/// \param [in] item: Represents a work group.
/// \param [in] counter: An atomic object defined on a device memory which can
Expand All @@ -289,8 +298,8 @@ namespace experimental {
/// a SYCL kernel can be scheduled actively at the same time on a device.
template <int dimensions = 3>
inline void nd_range_barrier(
sycl::nd_item<dimensions> item,
sycl::atomic_ref<unsigned int, sycl::memory_order::acq_rel,
const sycl::nd_item<dimensions> &item,
sycl::atomic_ref<unsigned int, barrier_memory_order,
sycl::memory_scope::device,
sycl::access::address_space::global_space> &counter) {

Expand Down Expand Up @@ -329,8 +338,8 @@ inline void nd_range_barrier(
/// a SYCL kernel can be scheduled actively at the same time on a device.
template <>
inline void nd_range_barrier(
sycl::nd_item<1> item,
sycl::atomic_ref<unsigned int, sycl::memory_order::acq_rel,
const sycl::nd_item<1> &item,
sycl::atomic_ref<unsigned int, barrier_memory_order,
sycl::memory_scope::device,
sycl::access::address_space::global_space> &counter) {
unsigned int num_groups = item.get_group_range(0);
Expand Down
108 changes: 54 additions & 54 deletions sycl/test-e2e/syclcompat/util/util_nd_range_barrier_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,19 +40,19 @@
#include <sycl/detail/core.hpp>
#include <syclcompat.hpp>

void kernel_1(
sycl::nd_item<3> item_ct1,
sycl::atomic_ref<unsigned int, sycl::memory_order::acq_rel,
sycl::memory_scope::device,
sycl::access::address_space::global_space> &sync_ct1) {
void kernel_1(sycl::nd_item<3> item_ct1,
sycl::atomic_ref<
unsigned int, syclcompat::experimental::barrier_memory_order,
sycl::memory_scope::device,
sycl::access::address_space::global_space> &sync_ct1) {
syclcompat::experimental::nd_range_barrier(item_ct1, sync_ct1);
}

void kernel_2(
sycl::nd_item<3> item_ct1,
sycl::atomic_ref<unsigned int, sycl::memory_order::acq_rel,
sycl::memory_scope::device,
sycl::access::address_space::global_space> &sync_ct1) {
void kernel_2(sycl::nd_item<3> item_ct1,
sycl::atomic_ref<
unsigned int, syclcompat::experimental::barrier_memory_order,
sycl::memory_scope::device,
sycl::access::address_space::global_space> &sync_ct1) {
syclcompat::experimental::nd_range_barrier(item_ct1, sync_ct1);

syclcompat::experimental::nd_range_barrier(item_ct1, sync_ct1);
Expand All @@ -70,18 +70,18 @@ void test_nd_range_barrier_dim3() {

q_ct1
->submit([&](sycl::handler &cgh) {
cgh.parallel_for(
sycl::nd_range<3>(sycl::range<3>(1, 1, 4) *
sycl::range<3>(1, 1, 4),
sycl::range<3>(1, 1, 4)),
[=](sycl::nd_item<3> item_ct1) {
auto atm_sync_ct1 =
sycl::atomic_ref<unsigned int, sycl::memory_order::acq_rel,
sycl::memory_scope::device,
sycl::access::address_space::global_space>(
sync_ct1[0]);
kernel_1(item_ct1, atm_sync_ct1);
});
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, 4) *
sycl::range<3>(1, 1, 4),
sycl::range<3>(1, 1, 4)),
[=](sycl::nd_item<3> item_ct1) {
auto atm_sync_ct1 = sycl::atomic_ref<
unsigned int,
syclcompat::experimental::barrier_memory_order,
sycl::memory_scope::device,
sycl::access::address_space::global_space>(
sync_ct1[0]);
kernel_1(item_ct1, atm_sync_ct1);
});
})
.wait();
}
Expand All @@ -95,37 +95,37 @@ void test_nd_range_barrier_dim3() {

q_ct1
->submit([&](sycl::handler &cgh) {
cgh.parallel_for(
sycl::nd_range<3>(sycl::range<3>(1, 1, 4) *
sycl::range<3>(1, 1, 4),
sycl::range<3>(1, 1, 4)),
[=](sycl::nd_item<3> item_ct1) {
auto atm_sync_ct1 =
sycl::atomic_ref<unsigned int, sycl::memory_order::acq_rel,
sycl::memory_scope::device,
sycl::access::address_space::global_space>(
sync_ct1[0]);
kernel_2(item_ct1, atm_sync_ct1);
});
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, 4) *
sycl::range<3>(1, 1, 4),
sycl::range<3>(1, 1, 4)),
[=](sycl::nd_item<3> item_ct1) {
auto atm_sync_ct1 = sycl::atomic_ref<
unsigned int,
syclcompat::experimental::barrier_memory_order,
sycl::memory_scope::device,
sycl::access::address_space::global_space>(
sync_ct1[0]);
kernel_2(item_ct1, atm_sync_ct1);
});
})
.wait();
}
dev_ct1.queues_wait_and_throw();
}

void kernel_1(
sycl::nd_item<1> item_ct1,
sycl::atomic_ref<unsigned int, sycl::memory_order::acq_rel,
sycl::memory_scope::device,
sycl::access::address_space::global_space> &sync_ct1) {
void kernel_1(sycl::nd_item<1> item_ct1,
sycl::atomic_ref<
unsigned int, syclcompat::experimental::barrier_memory_order,
sycl::memory_scope::device,
sycl::access::address_space::global_space> &sync_ct1) {
syclcompat::experimental::nd_range_barrier(item_ct1, sync_ct1);
}

void kernel_2(
sycl::nd_item<1> item_ct1,
sycl::atomic_ref<unsigned int, sycl::memory_order::acq_rel,
sycl::memory_scope::device,
sycl::access::address_space::global_space> &sync_ct1) {
void kernel_2(sycl::nd_item<1> item_ct1,
sycl::atomic_ref<
unsigned int, syclcompat::experimental::barrier_memory_order,
sycl::memory_scope::device,
sycl::access::address_space::global_space> &sync_ct1) {
syclcompat::experimental::nd_range_barrier(item_ct1, sync_ct1);

syclcompat::experimental::nd_range_barrier(item_ct1, sync_ct1);
Expand All @@ -148,11 +148,11 @@ void test_nd_range_barrier_dim1() {
sycl::nd_range<1>(sycl::range<1>(4) * sycl::range<1>(4),
sycl::range<1>(4)),
[=](sycl::nd_item<1> item_ct1) {
auto atm_sync_ct1 =
sycl::atomic_ref<unsigned int, sycl::memory_order::acq_rel,
sycl::memory_scope::device,
sycl::access::address_space::global_space>(
sync_ct1[0]);
auto atm_sync_ct1 = sycl::atomic_ref<
unsigned int,
syclcompat::experimental::barrier_memory_order,
sycl::memory_scope::device,
sycl::access::address_space::global_space>(sync_ct1[0]);
kernel_1(item_ct1, atm_sync_ct1);
});
})
Expand All @@ -171,11 +171,11 @@ void test_nd_range_barrier_dim1() {
sycl::nd_range<1>(sycl::range<1>(4) * sycl::range<1>(4),
sycl::range<1>(4)),
[=](sycl::nd_item<1> item_ct1) {
auto atm_sync_ct1 =
sycl::atomic_ref<unsigned int, sycl::memory_order::acq_rel,
sycl::memory_scope::device,
sycl::access::address_space::global_space>(
sync_ct1[0]);
auto atm_sync_ct1 = sycl::atomic_ref<
unsigned int,
syclcompat::experimental::barrier_memory_order,
sycl::memory_scope::device,
sycl::access::address_space::global_space>(sync_ct1[0]);
kernel_2(item_ct1, atm_sync_ct1);
});
})
Expand Down

0 comments on commit 59beb7e

Please sign in to comment.