From cf829e065ebfeeaa87d15cb907ee57f8568af7d8 Mon Sep 17 00:00:00 2001
From: JackAKirk <jack.kirk@codeplay.com>
Date: Fri, 2 Feb 2024 12:15:25 +0000
Subject: [PATCH] [syclcompat][CUDA] FIX UB in test / seq_cst requires sm_70 on
 CUDA (#12575)

Fix UB in test by using a single thread task.

A shared USM variable was being simultaneously written to by multiple
threads without using atomics. AFAIK this is generally not a well
defined program, and was leading to invalid values.

seq_cst also requires sm_70 on CUDA, so the compilation invocation is
updated to reflect this.
The CI device is >= sm_70 so it can use seq_cst. However this test did
not compile for >=sm_70.

Signed-off-by: JackAKirk <jack.kirk@codeplay.com>
---
 sycl/test-e2e/syclcompat/atomic/atomic_class.cpp          | 6 +++---
 sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/sycl/test-e2e/syclcompat/atomic/atomic_class.cpp b/sycl/test-e2e/syclcompat/atomic/atomic_class.cpp
index 3c6b95c1eb4af..d004a37bcc610 100644
--- a/sycl/test-e2e/syclcompat/atomic/atomic_class.cpp
+++ b/sycl/test-e2e/syclcompat/atomic/atomic_class.cpp
@@ -32,7 +32,7 @@
 
 // UNSUPPORTED: hip || (windows && level_zero)
 
-// RUN: %clangxx -std=c++20 -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out
+// RUN: %clangxx -std=c++20 -fsycl -fsycl-targets=%{sycl_triple} %if any-device-is-cuda %{ -Xsycl-target-backend --cuda-gpu-arch=sm_70 %} %s -o %t.out
 // RUN: %{run} %t.out
 
 #include <sycl/sycl.hpp>
@@ -41,8 +41,8 @@
 #include "../common.hpp"
 #include "atomic_fixt.hpp"
 
-constexpr size_t numBlocks = 64;
-constexpr size_t numThreads = 256;
+constexpr size_t numBlocks = 1;
+constexpr size_t numThreads = 1;
 constexpr size_t numData = 6;
 
 template <typename T, typename AtomicType>
diff --git a/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp b/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp
index 6b31bcc626ee0..4ccc67fbff53e 100644
--- a/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp
+++ b/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp
@@ -32,7 +32,7 @@
 
 // UNSUPPORTED: hip
 
-// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out
+// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %if any-device-is-cuda %{ -Xsycl-target-backend --cuda-gpu-arch=sm_70 %} %s -o %t.out
 // RUN: %{run} %t.out
 
 #include <iostream>