Skip to content

Commit

Permalink
[SYCL][COMPAT] nd_range barriers seq_cst by default in supported devi…
Browse files Browse the repository at this point in the history
…ces (#12974)

AMD ~~and CUDA~~ devices still not supported.
~~CUDA to be supported in #12516

Edit: Since #12516 has been merged, CUDA is also `seq_cst` by default.
  • Loading branch information
Alcpz authored Apr 30, 2024
1 parent cfd0d41 commit 9d1cbc5
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 60 deletions.
11 changes: 9 additions & 2 deletions sycl/doc/syclcompat/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1184,17 +1184,24 @@ spec, and so should be used with caution.
namespace syclcompat {
namespace experimental {
#if defined(__AMDGPU__)
// seq_cst currently not working for AMD
constexpr sycl::memory_order barrier_memory_order = sycl::memory_order::acq_rel;
#else
constexpr sycl::memory_order barrier_memory_order = sycl::memory_order::seq_cst;
#endif
template <int dimensions = 3>
inline void nd_range_barrier(
sycl::nd_item<dimensions> item,
sycl::atomic_ref<unsigned int, sycl::memory_order::acq_rel,
sycl::atomic_ref<unsigned int, barrier_memory_order,
sycl::memory_scope::device,
sycl::access::address_space::global_space> &counter);
template <>
inline void nd_range_barrier(
sycl::nd_item<1> item,
sycl::atomic_ref<unsigned int, sycl::memory_order::acq_rel,
sycl::atomic_ref<unsigned int, barrier_memory_order,
sycl::memory_scope::device,
sycl::access::address_space::global_space> &counter);
Expand Down
17 changes: 13 additions & 4 deletions sycl/include/syclcompat/util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,15 @@ inline int get_sycl_language_version() {
}

namespace experimental {

// FIXME(@intel/syclcompat-lib-reviewers): unify once supported in the AMD
// backend.
#if defined(__AMDGPU__)
constexpr sycl::memory_order barrier_memory_order = sycl::memory_order::acq_rel;
#else
constexpr sycl::memory_order barrier_memory_order = sycl::memory_order::seq_cst;
#endif

/// Synchronize work items from all work groups within a SYCL kernel.
/// \param [in] item: Represents a work group.
/// \param [in] counter: An atomic object defined on a device memory which can
Expand All @@ -502,8 +511,8 @@ namespace experimental {
/// a SYCL kernel can be scheduled actively at the same time on a device.
template <int dimensions = 3>
inline void nd_range_barrier(
sycl::nd_item<dimensions> item,
sycl::atomic_ref<unsigned int, sycl::memory_order::acq_rel,
const sycl::nd_item<dimensions> &item,
sycl::atomic_ref<unsigned int, barrier_memory_order,
sycl::memory_scope::device,
sycl::access::address_space::global_space> &counter) {

Expand Down Expand Up @@ -542,8 +551,8 @@ inline void nd_range_barrier(
/// a SYCL kernel can be scheduled actively at the same time on a device.
template <>
inline void nd_range_barrier(
sycl::nd_item<1> item,
sycl::atomic_ref<unsigned int, sycl::memory_order::acq_rel,
const sycl::nd_item<1> &item,
sycl::atomic_ref<unsigned int, barrier_memory_order,
sycl::memory_scope::device,
sycl::access::address_space::global_space> &counter) {
unsigned int num_groups = item.get_group_range(0);
Expand Down
108 changes: 54 additions & 54 deletions sycl/test-e2e/syclcompat/util/util_nd_range_barrier_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,19 +40,19 @@
#include <sycl/detail/core.hpp>
#include <syclcompat.hpp>

void kernel_1(
sycl::nd_item<3> item_ct1,
sycl::atomic_ref<unsigned int, sycl::memory_order::acq_rel,
sycl::memory_scope::device,
sycl::access::address_space::global_space> &sync_ct1) {
void kernel_1(sycl::nd_item<3> item_ct1,
sycl::atomic_ref<
unsigned int, syclcompat::experimental::barrier_memory_order,
sycl::memory_scope::device,
sycl::access::address_space::global_space> &sync_ct1) {
syclcompat::experimental::nd_range_barrier(item_ct1, sync_ct1);
}

void kernel_2(
sycl::nd_item<3> item_ct1,
sycl::atomic_ref<unsigned int, sycl::memory_order::acq_rel,
sycl::memory_scope::device,
sycl::access::address_space::global_space> &sync_ct1) {
void kernel_2(sycl::nd_item<3> item_ct1,
sycl::atomic_ref<
unsigned int, syclcompat::experimental::barrier_memory_order,
sycl::memory_scope::device,
sycl::access::address_space::global_space> &sync_ct1) {
syclcompat::experimental::nd_range_barrier(item_ct1, sync_ct1);

syclcompat::experimental::nd_range_barrier(item_ct1, sync_ct1);
Expand All @@ -70,18 +70,18 @@ void test_nd_range_barrier_dim3() {

q_ct1
->submit([&](sycl::handler &cgh) {
cgh.parallel_for(
sycl::nd_range<3>(sycl::range<3>(1, 1, 4) *
sycl::range<3>(1, 1, 4),
sycl::range<3>(1, 1, 4)),
[=](sycl::nd_item<3> item_ct1) {
auto atm_sync_ct1 =
sycl::atomic_ref<unsigned int, sycl::memory_order::acq_rel,
sycl::memory_scope::device,
sycl::access::address_space::global_space>(
sync_ct1[0]);
kernel_1(item_ct1, atm_sync_ct1);
});
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, 4) *
sycl::range<3>(1, 1, 4),
sycl::range<3>(1, 1, 4)),
[=](sycl::nd_item<3> item_ct1) {
auto atm_sync_ct1 = sycl::atomic_ref<
unsigned int,
syclcompat::experimental::barrier_memory_order,
sycl::memory_scope::device,
sycl::access::address_space::global_space>(
sync_ct1[0]);
kernel_1(item_ct1, atm_sync_ct1);
});
})
.wait();
}
Expand All @@ -95,37 +95,37 @@ void test_nd_range_barrier_dim3() {

q_ct1
->submit([&](sycl::handler &cgh) {
cgh.parallel_for(
sycl::nd_range<3>(sycl::range<3>(1, 1, 4) *
sycl::range<3>(1, 1, 4),
sycl::range<3>(1, 1, 4)),
[=](sycl::nd_item<3> item_ct1) {
auto atm_sync_ct1 =
sycl::atomic_ref<unsigned int, sycl::memory_order::acq_rel,
sycl::memory_scope::device,
sycl::access::address_space::global_space>(
sync_ct1[0]);
kernel_2(item_ct1, atm_sync_ct1);
});
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, 4) *
sycl::range<3>(1, 1, 4),
sycl::range<3>(1, 1, 4)),
[=](sycl::nd_item<3> item_ct1) {
auto atm_sync_ct1 = sycl::atomic_ref<
unsigned int,
syclcompat::experimental::barrier_memory_order,
sycl::memory_scope::device,
sycl::access::address_space::global_space>(
sync_ct1[0]);
kernel_2(item_ct1, atm_sync_ct1);
});
})
.wait();
}
dev_ct1.queues_wait_and_throw();
}

void kernel_1(
sycl::nd_item<1> item_ct1,
sycl::atomic_ref<unsigned int, sycl::memory_order::acq_rel,
sycl::memory_scope::device,
sycl::access::address_space::global_space> &sync_ct1) {
void kernel_1(sycl::nd_item<1> item_ct1,
sycl::atomic_ref<
unsigned int, syclcompat::experimental::barrier_memory_order,
sycl::memory_scope::device,
sycl::access::address_space::global_space> &sync_ct1) {
syclcompat::experimental::nd_range_barrier(item_ct1, sync_ct1);
}

void kernel_2(
sycl::nd_item<1> item_ct1,
sycl::atomic_ref<unsigned int, sycl::memory_order::acq_rel,
sycl::memory_scope::device,
sycl::access::address_space::global_space> &sync_ct1) {
void kernel_2(sycl::nd_item<1> item_ct1,
sycl::atomic_ref<
unsigned int, syclcompat::experimental::barrier_memory_order,
sycl::memory_scope::device,
sycl::access::address_space::global_space> &sync_ct1) {
syclcompat::experimental::nd_range_barrier(item_ct1, sync_ct1);

syclcompat::experimental::nd_range_barrier(item_ct1, sync_ct1);
Expand All @@ -148,11 +148,11 @@ void test_nd_range_barrier_dim1() {
sycl::nd_range<1>(sycl::range<1>(4) * sycl::range<1>(4),
sycl::range<1>(4)),
[=](sycl::nd_item<1> item_ct1) {
auto atm_sync_ct1 =
sycl::atomic_ref<unsigned int, sycl::memory_order::acq_rel,
sycl::memory_scope::device,
sycl::access::address_space::global_space>(
sync_ct1[0]);
auto atm_sync_ct1 = sycl::atomic_ref<
unsigned int,
syclcompat::experimental::barrier_memory_order,
sycl::memory_scope::device,
sycl::access::address_space::global_space>(sync_ct1[0]);
kernel_1(item_ct1, atm_sync_ct1);
});
})
Expand All @@ -171,11 +171,11 @@ void test_nd_range_barrier_dim1() {
sycl::nd_range<1>(sycl::range<1>(4) * sycl::range<1>(4),
sycl::range<1>(4)),
[=](sycl::nd_item<1> item_ct1) {
auto atm_sync_ct1 =
sycl::atomic_ref<unsigned int, sycl::memory_order::acq_rel,
sycl::memory_scope::device,
sycl::access::address_space::global_space>(
sync_ct1[0]);
auto atm_sync_ct1 = sycl::atomic_ref<
unsigned int,
syclcompat::experimental::barrier_memory_order,
sycl::memory_scope::device,
sycl::access::address_space::global_space>(sync_ct1[0]);
kernel_2(item_ct1, atm_sync_ct1);
});
})
Expand Down

0 comments on commit 9d1cbc5

Please sign in to comment.