From 56d6e0334d3e8a73bad89679353012511be7a67c Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Mon, 22 Apr 2024 12:17:29 +0100 Subject: [PATCH 1/3] [SYCL][E2E] Fix some tests in multi-device mode The atomic tests can be run by multiple SYCL devices, in which case the compiler needs to be told which device the `--cuda-gpu-arch` parameter applies to. The changes to the bfloat16 tests try and restore earlier behaviour, where the `aspect-ext_oneapi_bfloat16_math_functions` feature was preventing other devices from running. It also removes RUN lines from a shared header file (which we don't consider a valid test file) which may mislead people into thinking it itself is a runnable test. --- sycl/test-e2e/BFloat16/bfloat16_builtins.hpp | 15 --------------- .../BFloat16/bfloat16_builtins_cuda_generic.cpp | 1 + sycl/test-e2e/syclcompat/atomic/atomic_class.cpp | 2 +- .../syclcompat/atomic/atomic_memory_acq_rel.cpp | 2 +- 4 files changed, 3 insertions(+), 17 deletions(-) diff --git a/sycl/test-e2e/BFloat16/bfloat16_builtins.hpp b/sycl/test-e2e/BFloat16/bfloat16_builtins.hpp index df0716a4c3b5a..0452129d439c9 100644 --- a/sycl/test-e2e/BFloat16/bfloat16_builtins.hpp +++ b/sycl/test-e2e/BFloat16/bfloat16_builtins.hpp @@ -4,21 +4,6 @@ // + sm_80 and above uses some native bfloat16 math instructions // + below sm_80 always uses generic impls -// DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%} -// REQUIRES: aspect-ext_oneapi_bfloat16_math_functions -// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %if any-device-is-cuda %{ -Xsycl-target-backend --cuda-gpu-arch=sm_80 %} %s -o %t.out %{mathflags} -// RUN: %{run} %t.out - -// Test "new" (ABI breaking) for all platforms ( sm_80/native if CUDA ) -// RUN: %if preview-breaking-changes-supported %{ %clangxx -fsycl -fpreview-breaking-changes -fsycl-targets=%{sycl_triple} %if any-device-is-cuda %{ -Xsycl-target-backend --cuda-gpu-arch=sm_80 %} %s -o %t2.out %{mathflags} %} -// RUN: %if preview-breaking-changes-supported %{ %{run} %t2.out %} - -// If CUDA, test "new" again for sm_75/generic -// RUN: %if any-device-is-cuda %{ %if preview-breaking-changes-supported %{ %clangxx -fsycl -fpreview-breaking-changes -fsycl-targets=%{sycl_triple} -Xsycl-target-backend --cuda-gpu-arch=sm_75 %s -o %t3.out %{mathflags} %} %} -// RUN: %if any-device-is-cuda %{ %if preview-breaking-changes-supported %{ %{run} %t3.out %} %} - -// Currently the feature isn't supported on FPGA. -// UNSUPPORTED: accelerator #include #include diff --git a/sycl/test-e2e/BFloat16/bfloat16_builtins_cuda_generic.cpp b/sycl/test-e2e/BFloat16/bfloat16_builtins_cuda_generic.cpp index 3e2b6b1aae433..ced9093b6b075 100644 --- a/sycl/test-e2e/BFloat16/bfloat16_builtins_cuda_generic.cpp +++ b/sycl/test-e2e/BFloat16/bfloat16_builtins_cuda_generic.cpp @@ -5,6 +5,7 @@ // + below sm_80 always uses generic impls // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%} +// REQUIRES: aspect-ext_oneapi_bfloat16_math_functions // If CUDA, test "new" again for sm_75/generic // RUN: %if any-device-is-cuda %{ %if preview-breaking-changes-supported %{ %clangxx -fsycl -fpreview-breaking-changes -fsycl-targets=%{sycl_triple} -Xsycl-target-backend --cuda-gpu-arch=sm_75 %s -o %t3.out %{mathflags} %} %} diff --git a/sycl/test-e2e/syclcompat/atomic/atomic_class.cpp b/sycl/test-e2e/syclcompat/atomic/atomic_class.cpp index d004a37bcc610..4e574a556887c 100644 --- a/sycl/test-e2e/syclcompat/atomic/atomic_class.cpp +++ b/sycl/test-e2e/syclcompat/atomic/atomic_class.cpp @@ -32,7 +32,7 @@ // UNSUPPORTED: hip || (windows && level_zero) -// RUN: %clangxx -std=c++20 -fsycl -fsycl-targets=%{sycl_triple} %if any-device-is-cuda %{ -Xsycl-target-backend --cuda-gpu-arch=sm_70 %} %s -o %t.out +// RUN: %clangxx -std=c++20 -fsycl -fsycl-targets=%{sycl_triple} %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %} %s -o %t.out // RUN: %{run} %t.out #include diff --git a/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp b/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp index 2acaa85022f0b..047d490140760 100644 --- a/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp +++ b/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp @@ -32,7 +32,7 @@ // UNSUPPORTED: hip -// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %if any-device-is-cuda %{ -Xsycl-target-backend --cuda-gpu-arch=sm_70 %} %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %} %s -o %t.out // RUN: %{run} %t.out #include From 7ce174aa86ebeaa49dbccbdf7e1b0c6cc0fdcab3 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Thu, 25 Apr 2024 15:32:48 +0100 Subject: [PATCH 2/3] feedback: remove aspect; unsupported cpu --- sycl/test-e2e/BFloat16/bfloat16_builtins.cpp | 4 ++-- sycl/test-e2e/BFloat16/bfloat16_builtins_cuda_generic.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sycl/test-e2e/BFloat16/bfloat16_builtins.cpp b/sycl/test-e2e/BFloat16/bfloat16_builtins.cpp index 72eda9978f434..6a4010aed17f5 100644 --- a/sycl/test-e2e/BFloat16/bfloat16_builtins.cpp +++ b/sycl/test-e2e/BFloat16/bfloat16_builtins.cpp @@ -5,7 +5,6 @@ // + below sm_80 always uses generic impls // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%} -// REQUIRES: aspect-ext_oneapi_bfloat16_math_functions // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %if any-device-is-cuda %{ -Xsycl-target-backend --cuda-gpu-arch=sm_80 %} %s -o %t.out %{mathflags} // RUN: %{run} %t.out @@ -14,7 +13,8 @@ // RUN: %if preview-breaking-changes-supported %{ %{run} %t2.out %} // Currently the feature isn't supported on FPGA. -// UNSUPPORTED: accelerator +// FIXME: enable opaque pointers support on CPU. +// UNSUPPORTED: accelerator, cpu #include "bfloat16_builtins.hpp" int main() { diff --git a/sycl/test-e2e/BFloat16/bfloat16_builtins_cuda_generic.cpp b/sycl/test-e2e/BFloat16/bfloat16_builtins_cuda_generic.cpp index ced9093b6b075..844c5b3c69684 100644 --- a/sycl/test-e2e/BFloat16/bfloat16_builtins_cuda_generic.cpp +++ b/sycl/test-e2e/BFloat16/bfloat16_builtins_cuda_generic.cpp @@ -5,14 +5,14 @@ // + below sm_80 always uses generic impls // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%} -// REQUIRES: aspect-ext_oneapi_bfloat16_math_functions // If CUDA, test "new" again for sm_75/generic // RUN: %if any-device-is-cuda %{ %if preview-breaking-changes-supported %{ %clangxx -fsycl -fpreview-breaking-changes -fsycl-targets=%{sycl_triple} -Xsycl-target-backend --cuda-gpu-arch=sm_75 %s -o %t3.out %{mathflags} %} %} // RUN: %if any-device-is-cuda %{ %if preview-breaking-changes-supported %{ %{run} %t3.out %} %} // Currently the feature isn't supported on FPGA. -// UNSUPPORTED: accelerator +// FIXME: enable opaque pointers support on CPU. +// UNSUPPORTED: accelerator, cpu #include "bfloat16_builtins.hpp" int main() { From 4dd6916f4c78d3fd25d7975f73278f0a2a5fc6ed Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Wed, 10 Jul 2024 18:39:08 +0100 Subject: [PATCH 3/3] update: xfail another test --- sycl/test-e2e/BFloat16/bfloat16_builtins.cpp | 3 ++- sycl/test-e2e/BFloat16/bfloat16_builtins_cuda_generic.cpp | 3 ++- sycl/test-e2e/BFloat16/bfloat16_type.cpp | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/sycl/test-e2e/BFloat16/bfloat16_builtins.cpp b/sycl/test-e2e/BFloat16/bfloat16_builtins.cpp index 6a4010aed17f5..b96882aa86851 100644 --- a/sycl/test-e2e/BFloat16/bfloat16_builtins.cpp +++ b/sycl/test-e2e/BFloat16/bfloat16_builtins.cpp @@ -13,8 +13,9 @@ // RUN: %if preview-breaking-changes-supported %{ %{run} %t2.out %} // Currently the feature isn't supported on FPGA. +// UNSUPPORTED: accelerator // FIXME: enable opaque pointers support on CPU. -// UNSUPPORTED: accelerator, cpu +// XFAIL: cpu #include "bfloat16_builtins.hpp" int main() { diff --git a/sycl/test-e2e/BFloat16/bfloat16_builtins_cuda_generic.cpp b/sycl/test-e2e/BFloat16/bfloat16_builtins_cuda_generic.cpp index 844c5b3c69684..06621c8d001ba 100644 --- a/sycl/test-e2e/BFloat16/bfloat16_builtins_cuda_generic.cpp +++ b/sycl/test-e2e/BFloat16/bfloat16_builtins_cuda_generic.cpp @@ -11,8 +11,9 @@ // RUN: %if any-device-is-cuda %{ %if preview-breaking-changes-supported %{ %{run} %t3.out %} %} // Currently the feature isn't supported on FPGA. +// UNSUPPORTED: accelerator // FIXME: enable opaque pointers support on CPU. -// UNSUPPORTED: accelerator, cpu +// XFAIL: cpu #include "bfloat16_builtins.hpp" int main() { diff --git a/sycl/test-e2e/BFloat16/bfloat16_type.cpp b/sycl/test-e2e/BFloat16/bfloat16_type.cpp index 73936eb5dc4e7..0aef086807f73 100644 --- a/sycl/test-e2e/BFloat16/bfloat16_type.cpp +++ b/sycl/test-e2e/BFloat16/bfloat16_type.cpp @@ -7,6 +7,7 @@ // UNSUPPORTED: accelerator // FIXME: enable opaque pointers support on CPU. +// XFAIL: cpu //==----------- bfloat16_type.cpp - SYCL bfloat16 type test ----------------==// //