diff --git a/clang/lib/Driver/Compilation.cpp b/clang/lib/Driver/Compilation.cpp index e3c6c2d2ee34f..6c728f316fc89 100644 --- a/clang/lib/Driver/Compilation.cpp +++ b/clang/lib/Driver/Compilation.cpp @@ -60,6 +60,61 @@ Compilation::~Compilation() { delete Arg.second; } +static void HandleXarchArgs(DerivedArgList *OffloadArgList, const Driver &D, + bool IsDevice) { + if (!OffloadArgList) + return; + + if (IsDevice && !OffloadArgList->hasArg(options::OPT_Xarch_device)) + return; + + if (!IsDevice && !OffloadArgList->hasArg(options::OPT_Xarch_host)) + return; + + bool NeedHandle = false; + std::vector XarchValues; + XarchValues = IsDevice + ? OffloadArgList->getAllArgValues(options::OPT_Xarch_device) + : OffloadArgList->getAllArgValues(options::OPT_Xarch_host); + SmallVector XarchValueRefs; + for (auto XarchV : XarchValues) { + if (XarchV.find(' ') != std::string::npos) { + NeedHandle = true; + StringRef XarchVRef(XarchV); + SmallVector XarchVecs; + XarchVRef.trim().split(XarchVecs, ' ', -1, false); + size_t Index; + const size_t XSize = XarchVecs.size(); + for (Index = 0; Index < XSize; ++Index) { + if (XarchVecs[Index].compare("-mllvm") == 0) { + if (Index < (XSize - 1)) { + XarchValueRefs.push_back(OffloadArgList->MakeArgStringRef( + (StringRef("-mllvm=") + XarchVecs[Index + 1]).str())); + Index++; + continue; + } else + D.Diag(clang::diag::err_drv_missing_argument) << "-mllvm" << 1; + } else + XarchValueRefs.push_back( + OffloadArgList->MakeArgStringRef(XarchVecs[Index])); + } + } else + XarchValueRefs.push_back(StringRef(XarchV)); + } + + if (NeedHandle) { + auto Xarch_OPT = + IsDevice ? options::OPT_Xarch_device : options::OPT_Xarch_host; + OffloadArgList->eraseArg(Xarch_OPT); + for (auto XarchV : XarchValueRefs) { + Arg *A = OffloadArgList->MakeSeparateArg( + nullptr, D.getOpts().getOption(Xarch_OPT), XarchV); + A->claim(); + OffloadArgList->append(A); + } + } +} + const DerivedArgList & Compilation::getArgsForToolChain(const ToolChain *TC, StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) { @@ -82,9 +137,11 @@ Compilation::getArgsForToolChain(const ToolChain *TC, StringRef BoundArch, DerivedArgList *NewDAL = nullptr; if (!OffloadArgs) { + HandleXarchArgs(TranslatedArgs, getDriver(), false); NewDAL = TC->TranslateXarchArgs(*TranslatedArgs, BoundArch, DeviceOffloadKind, &AllocatedArgs); } else { + HandleXarchArgs(OffloadArgs, getDriver(), true); NewDAL = TC->TranslateXarchArgs(*OffloadArgs, BoundArch, DeviceOffloadKind, &AllocatedArgs); if (!NewDAL) diff --git a/clang/test/Driver/sycl-xarch.cpp b/clang/test/Driver/sycl-xarch.cpp new file mode 100644 index 0000000000000..200a8de643170 --- /dev/null +++ b/clang/test/Driver/sycl-xarch.cpp @@ -0,0 +1,162 @@ +/// +/// Perform several driver tests for SYCL -Xarch_device/host on Linux +/// + +// UNSUPPORTED: system-windows + +/// ########################################################################### + +/// test behavior of -Xarch_device with 1 option for SYCL compiler, the flag +/// should be passed to device compilation only. +// RUN: %clangxx -fsycl %s -Xarch_device -fsanitize=address -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_XARCH_DEVICE_OPTION +// RUN: %clangxx -fsycl %s -Xarch_device -fsanitize=address -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_XARCH_DEVICE_ONLY +// SYCL_XARCH_DEVICE_OPTION: clang{{.*}} "-fsycl-is-device" +// SYCL_XARCH_DEVICE_OPTION-SAME: -fsanitize=address +// SYCL_XARCH_DEVICE_OPTION-SAME: -fsanitize-address-use-after-return=never +// SYCL_XARCH_DEVICE_OPTION-SAME: -fno-sanitize-address-use-after-scope +// SYCL_XARCH_DEVICE_OPTION-SAME: "-mllvm" "-asan-instrumentation-with-call-threshold=0" +// SYCL_XARCH_DEVICE_OPTION-SAME: "-mllvm" "-asan-stack=0" +// SYCL_XARCH_DEVICE_OPTION-SAME: "-mllvm" "-asan-globals=0" +// SYCL_XARCH_DEVICE_ONLY: llc{{.*}} "-filetype=obj" +// SYCL_XARCH_DEVICE_ONLY-NOT: fsanitize=address + +/// test behavior of -Xarch_device with multiple options for SYCL compiler, the +/// flags should be passed to device compilation only. +// RUN: %clangxx -fsycl %s -Xarch_device "-fsanitize=address -DXARCH_DEVICE_TEST -mllvm -enable-merge-functions" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_XARCH_DEVICE_OPTIONS1 +// RUN: %clangxx -fsycl %s -Xarch_device "-fsanitize=address -DXARCH_DEVICE_TEST -mllvm -enable-merge-functions" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_XARCH_DEVICE_OPTIONS1 +// RUN: %clangxx -fsycl %s -Xarch_device "-fsanitize=address -DXARCH_DEVICE_TEST -mllvm -enable-merge-functions" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_XARCH_DEVICE_OPTIONS2 +// RUN: %clangxx -fsycl %s -Xarch_device "-fsanitize=address -DXARCH_DEVICE_TEST -mllvm -enable-merge-functions" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_XARCH_DEVICE_OPTIONS3 +// SYCL_XARCH_DEVICE_OPTIONS1: clang{{.*}} "-fsycl-is-device" +// SYCL_XARCH_DEVICE_OPTIONS1-SAME: -fsanitize=address +// SYCL_XARCH_DEVICE_OPTIONS1-SAME: -fsanitize-address-use-after-return=never +// SYCL_XARCH_DEVICE_OPTIONS1-SAME: -fno-sanitize-address-use-after-scope +// SYCL_XARCH_DEVICE_OPTIONS1-SAME: "-mllvm" "-asan-instrumentation-with-call-threshold=0" +// SYCL_XARCH_DEVICE_OPTIONS1-SAME: "-mllvm" "-asan-stack=0" +// SYCL_XARCH_DEVICE_OPTIONS1-SAME: "-mllvm" "-asan-globals=0" +// SYCL_XARCH_DEVICE_OPTIONS2: clang{{.*}} "-fsycl-is-device" +// SYCL_XARCH_DEVICE_OPTIONS2-SAME: XARCH_DEVICE_TEST +// SYCL_XARCH_DEVICE_OPTIONS3: clang{{.*}} "-fsycl-is-device" +// SYCL_XARCH_DEVICE_OPTIONS3-SAME: "-mllvm" "-enable-merge-functions" + + +/// test behavior of -Xarch_host with 1 option for SYCL compiler, the flag +/// should be passed to host compilation only. +// RUN: %clangxx -fsycl %s -Xarch_host -fsanitize=address -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_XARCH_HOST_OPTION +// RUN: %clangxx -fsycl %s -Xarch_host -fsanitize=address -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_XARCH_HOST_ONLY +// SYCL_XARCH_HOST_OPTION: clang{{.*}} "-fsycl-is-host" +// SYCL_XARCH_HOST_OPTION-SAME: -fsanitize=address +// SYCL_XARCH_HOST_OPTION-SAME: -fsanitize-address-use-after-scope +// SYCL_XARCH_HOST_OPTION-NEXT: libclang_rt.asan +// SYCL_XARCH_HOST_ONLY: clang{{.*}} "-fsycl-is-device" +// SYCL_XARCH_HOST_ONLY-NOT: -fsanitize=address +// SYCL_XARCH_HOST_ONLY: clang{{.*}} "-fsycl-is-host" + +/// test behavior of -Xarch_host with multiple options for SYCL compiler, the +/// flags should be passed to host compilation only. +// RUN: %clangxx -fsycl %s -Xarch_host "-fsanitize=address -DXARCH_HOST_TEST -mllvm -enable-merge-functions" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_XARCH_HOST_OPTIONS1 +// RUN: %clangxx -fsycl %s -Xarch_host "-fsanitize=address -DXARCH_HOST_TEST -mllvm -enable-merge-functions" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_XARCH_HOST_OPTIONS2 +// RUN: %clangxx -fsycl %s -Xarch_host "-fsanitize=address -DXARCH_HOST_TEST -mllvm -enable-merge-functions" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_XARCH_HOST_OPTIONS3 +// SYCL_XARCH_HOST_OPTIONS1: clang{{.*}} "-fsycl-is-host" +// SYCL_XARCH_HOST_OPTIONS1-SAME: -fsanitize=address +// SYCL_XARCH_HOST_OPTIONS1-SAME: -fsanitize-address-use-after-scope +// SYCL_XARCH_HOST_OPTIONS2: clang{{.*}} "-fsycl-is-host" +// SYCL_XARCH_HOST_OPTIONS2-SAME: XARCH_HOST_TEST +// SYCL_XARCH_HOST_OPTIONS3: clang{{.*}} "-fsycl-is-host" +// SYCL_XARCH_HOST_OPTIONS3-SAME: "-mllvm" "-enable-merge-functions" + +// test behavior of combination of -Xarch_device and -Xarch_device. +// RUN: %clangxx -fsycl %s -Xarch_device "-fsanitize=address -mllvm -enable-merge-functions" \ +// RUN: -Xarch_host "-fsanitize=memory -DUSE_XARCH_HOST -fno-builtin" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_XARCH_COM_DEVICE_OPTIONS1 +// RUN: %clangxx -fsycl %s -Xarch_device "-fsanitize=address -mllvm -enable-merge-functions" \ +// RUN: -Xarch_host "-fsanitize=memory -DUSE_XARCH_HOST -fno-builtin" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_XARCH_COM_DEVICE_OPTIONS2 +// RUN: %clangxx -fsycl %s -Xarch_device "-fsanitize=address -mllvm -enable-merge-functions" \ +// RUN: -Xarch_host "-fsanitize=memory -DUSE_XARCH_HOST -fno-builtin" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_XARCH_COM_NO_DEVICE +// RUN: %clangxx -fsycl %s -Xarch_device "-fsanitize=address -mllvm -enable-merge-functions" \ +// RUN: -Xarch_host "-fsanitize=memory -DUSE_XARCH_HOST -fno-builtin" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_XARCH_COM_HOST_OPTIONS1 +// RUN: %clangxx -fsycl %s -Xarch_device "-fsanitize=address -mllvm -enable-merge-functions" \ +// RUN: -Xarch_host "-fsanitize=memory -DUSE_XARCH_HOST -fno-builtin" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_XARCH_COM_HOST_OPTIONS2 +// RUN: %clangxx -fsycl %s -Xarch_device "-fsanitize=address -mllvm -enable-merge-functions" \ +// RUN: -Xarch_host "-fsanitize=memory -DUSE_XARCH_HOST -fno-builtin" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_XARCH_COM_HOST_OPTIONS3 +// RUN: %clangxx -fsycl %s -Xarch_device "-fsanitize=address -mllvm -enable-merge-functions" \ +// RUN: -Xarch_host "-fsanitize=memory -DUSE_XARCH_HOST -fno-builtin" -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_XARCH_COM_NO_HOST +// SYCL_XARCH_COM_DEVICE_OPTIONS1: clang{{.*}} "-fsycl-is-device" +// SYCL_XARCH_COM_DEVICE_OPTIONS1-SAME: -fsanitize=address +// SYCL_XARCH_COM_DEVICE_OPTIONS1-SAME: -fsanitize-address-use-after-return=never +// SYCL_XARCH_COM_DEVICE_OPTIONS1-SAME: -fno-sanitize-address-use-after-scope +// SYCL_XARCH_COM_DEVICE_OPTIONS1-SAME: "-mllvm" "-asan-instrumentation-with-call-threshold=0" +// SYCL_XARCH_COM_DEVICE_OPTIONS1-SAME: "-mllvm" "-asan-stack=0" +// SYCL_XARCH_COM_DEVICE_OPTIONS1-SAME: "-mllvm" "-asan-globals=0" +// SYCL_XARCH_COM_DEVICE_OPTIONS2: clang{{.*}} "-fsycl-is-device" +// SYCL_XARCH_COM_DEVICE_OPTIONS2-SAME: "-mllvm" "-enable-merge-functions" +// SYCL_XARCH_COM_NO_DEVICE: clang{{.*}} "-fsycl-is-device" +// SYCL_XARCH_COM_NO_DEVICE-NOT: USE_XARCH_HOST +// SYCL_XARCH_COM_NO_DEVICE: clang{{.*}} "-fsycl-is-host" +// SYCL_XARCH_COM_HOST_OPTIONS1: clang{{.*}} "-fsycl-is-host" +// SYCL_XARCH_COM_HOST_OPTIONS1-SAME: -fsanitize=memory +// SYCL_XARCH_COM_HOST_OPTIONS1-NEXT: libclang_rt.msan +// SYCL_XARCH_COM_HOST_OPTIONS2: clang{{.*}} "-fsycl-is-host" +// SYCL_XARCH_COM_HOST_OPTIONS2-SAME: USE_XARCH_HOST +// SYCL_XARCH_COM_HOST_OPTIONS3: clang{{.*}} "-fsycl-is-host" +// SYCL_XARCH_COM_HOST_OPTIONS3-SAME: -fno-builtin +// SYCL_XARCH_COM_NO_HOST: clang{{.*}} "-fsycl-is-host" +// SYCL_XARCH_COM_NO_HOST-NOT: "-mllvm" "-enable-merge-functions" + + +// test behavior of multiple usage of -Xarch_host in single command line +// RUN: %clangxx -fsycl %s -Xarch_host "-fsanitize=address -mllvm -enable-merge-functions" \ +// RUN: -Xarch_host -DFOO -Xarch_host -DFOO1 -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_XARCH_HOST_MULTIPLE1 +// RUN: %clangxx -fsycl %s -Xarch_host "-fsanitize=address -mllvm -enable-merge-functions" \ +// RUN: -Xarch_host -DFOO -Xarch_host -DFOO1 -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_XARCH_HOST_MULTIPLE2 +// RUN: %clangxx -fsycl %s -Xarch_host "-fsanitize=address -mllvm -enable-merge-functions" \ +// RUN: -Xarch_host -DFOO -Xarch_host -DFOO1 -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_XARCH_HOST_MULTIPLE3 +// RUN: %clangxx -fsycl %s -Xarch_host "-fsanitize=address -mllvm -enable-merge-functions" \ +// RUN: -Xarch_host -DFOO -Xarch_host -DFOO1 -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_XARCH_HOST_MULTIPLE4 +// RUN: %clangxx -fsycl %s -Xarch_host "-fsanitize=address -mllvm -enable-merge-functions" \ +// RUN: -Xarch_host -DFOO -Xarch_host -DFOO1 -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_XARCH_NO_DEVICE_MULTIPLE1 +// RUN: %clangxx -fsycl %s -Xarch_host "-fsanitize=address -mllvm -enable-merge-functions" \ +// RUN: -Xarch_host -DFOO -Xarch_host -DFOO1 -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_XARCH_NO_DEVICE_MULTIPLE2 +// RUN: %clangxx -fsycl %s -Xarch_host "-fsanitize=address -mllvm -enable-merge-functions" \ +// RUN: -Xarch_host -DFOO -Xarch_host -DFOO1 -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_XARCH_NO_DEVICE_MULTIPLE3 +// SYCL_XARCH_HOST_MULTIPLE1: clang{{.*}} "-fsycl-is-host" +// SYCL_XARCH_HOST_MULTIPLE1-SAME: -fsanitize=address +// SYCL_XARCH_HOST_MULTIPLE1-NEXT: libclang_rt.asan +// SYCL_XARCH_HOST_MULTIPLE2: clang{{.*}} "-fsycl-is-host" +// SYCL_XARCH_HOST_MULTIPLE2-SAME: "-mllvm" "-enable-merge-functions" +// SYCL_XARCH_HOST_MULTIPLE3: clang{{.*}} "-fsycl-is-host" +// SYCL_XARCH_HOST_MULTIPLE3-SAME: "FOO" +// SYCL_XARCH_HOST_MULTIPLE4: clang{{.*}} "-fsycl-is-host" +// SYCL_XARCH_HOST_MULTIPLE4-SAME: "FOO1" +// SYCL_XARCH_NO_DEVICE_MULTIPLE1: clang{{.*}} "-fsycl-is-device" +// SYCL_XARCH_NO_DEVICE_MULTIPLE1-NOT: -fsanitize=address +// SYCL_XARCH_NO_DEVICE_MULTIPLE1: llc{{.*}} "-filetype=obj" +// SYCL_XARCH_NO_DEVICE_MULTIPLE2: clang{{.*}} "-fsycl-is-device" +// SYCL_XARCH_NO_DEVICE_MULTIPLE2-NOT: "-mllvm" "-enable-merge-functions" +// SYCL_XARCH_NO_DEVICE_MULTIPLE2: llc{{.*}} "-filetype=obj" +// SYCL_XARCH_NO_DEVICE_MULTIPLE3: clang{{.*}} "-fsycl-is-device" +// SYCL_XARCH_NO_DEVICE_MULTIPLE3-NOT: "FOO" +// SYCL_XARCH_NO_DEVICE_MULTIPLE3: llc{{.*}} "-filetype=obj" diff --git a/libdevice/atomic.hpp b/libdevice/atomic.hpp index 429792f94eb1d..3b6d1cf71f441 100644 --- a/libdevice/atomic.hpp +++ b/libdevice/atomic.hpp @@ -62,6 +62,11 @@ __spirv_AtomicCompareExchange(int SPIR_GLOBAL *, __spv::Scope::Flag, __spv::MemorySemanticsMask::Flag, __spv::MemorySemanticsMask::Flag, int, int); +extern DEVICE_EXTERNAL int +__spirv_AtomicCompareExchange(int *, __spv::Scope::Flag, + __spv::MemorySemanticsMask::Flag, + __spv::MemorySemanticsMask::Flag, int, int); + extern DEVICE_EXTERNAL int __spirv_AtomicLoad(const int SPIR_GLOBAL *, __spv::Scope::Flag, __spv::MemorySemanticsMask::Flag); @@ -70,6 +75,10 @@ extern DEVICE_EXTERNAL void __spirv_AtomicStore(int SPIR_GLOBAL *, __spv::Scope::Flag, __spv::MemorySemanticsMask::Flag, int); +extern DEVICE_EXTERNAL void +__spirv_AtomicStore(int *, __spv::Scope::Flag, __spv::MemorySemanticsMask::Flag, + int); + /// Atomically set the value in *Ptr with Desired if and only if it is Expected /// Return the value which already was in *Ptr static inline int atomicCompareAndSet(SPIR_GLOBAL int *Ptr, int Desired, @@ -80,6 +89,13 @@ static inline int atomicCompareAndSet(SPIR_GLOBAL int *Ptr, int Desired, __spv::MemorySemanticsMask::SequentiallyConsistent, Desired, Expected); } +static inline int atomicCompareAndSet(int *Ptr, int Desired, int Expected) { + return __spirv_AtomicCompareExchange( + Ptr, __spv::Scope::Device, + __spv::MemorySemanticsMask::SequentiallyConsistent, + __spv::MemorySemanticsMask::SequentiallyConsistent, Desired, Expected); +} + static inline int atomicLoad(SPIR_GLOBAL int *Ptr) { return __spirv_AtomicLoad(Ptr, __spv::Scope::Device, __spv::MemorySemanticsMask::SequentiallyConsistent); @@ -90,4 +106,9 @@ static inline void atomicStore(SPIR_GLOBAL int *Ptr, int V) { __spv::MemorySemanticsMask::SequentiallyConsistent, V); } +static inline void atomicStore(int *Ptr, int V) { + __spirv_AtomicStore(Ptr, __spv::Scope::Device, + __spv::MemorySemanticsMask::SequentiallyConsistent, V); +} + #endif // __SPIR__ diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index f8477c7b5d2d0..1d2e1b4de64f5 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -109,7 +109,7 @@ set(imf_obj_deps device_imf.hpp imf_half.hpp imf_bf16.hpp imf_rounding_op.hpp im set(itt_obj_deps device_itt.h spirv_vars.h device.h sycl-compiler) set(bfloat16_obj_deps sycl-headers sycl-compiler) if (NOT MSVC) - set(sanitizer_obj_deps device.h sycl-compiler) + set(sanitizer_obj_deps device.h atomic.hpp spirv_vars.h include/sanitizer_device_utils.hpp include/spir_global_var.hpp sycl-compiler) endif() add_devicelib_obj(libsycl-itt-stubs SRC itt_stubs.cpp DEP ${itt_obj_deps}) @@ -126,9 +126,9 @@ add_devicelib_obj(libsycl-imf-fp64 SRC imf_wrapper_fp64.cpp DEP ${imf_obj_deps}) add_devicelib_obj(libsycl-imf-bf16 SRC imf_wrapper_bf16.cpp DEP ${imf_obj_deps}) add_devicelib_obj(libsycl-bfloat16 SRC bfloat16_wrapper.cpp DEP ${cmath_obj_deps} ) if(MSVC) -add_devicelib_obj(libsycl-msvc-math SRC msvc_math.cpp DEP ${cmath_obj_deps}) + add_devicelib_obj(libsycl-msvc-math SRC msvc_math.cpp DEP ${cmath_obj_deps}) else() -add_devicelib_obj(libsycl-sanitizer SRC sanitizer_utils.cpp DEP ${sanitizer_obj_deps}) + add_devicelib_obj(libsycl-sanitizer SRC sanitizer_utils.cpp DEP ${sanitizer_obj_deps} EXTRA_ARGS -fno-sycl-instrument-device-code) endif() add_fallback_devicelib(libsycl-fallback-cassert SRC fallback-cassert.cpp DEP ${crt_obj_deps} EXTRA_ARGS -fno-sycl-instrument-device-code) diff --git a/libdevice/include/device-sanitizer-report.hpp b/libdevice/include/device-sanitizer-report.hpp new file mode 100644 index 0000000000000..477fac6d4a5d4 --- /dev/null +++ b/libdevice/include/device-sanitizer-report.hpp @@ -0,0 +1,58 @@ +//==-- device-sanitizer-report.hpp - Structure and declaration for assert +// support --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +// Treat this header as system one to workaround frontend's restriction +#pragma clang system_header + +#include + +enum class DeviceSanitizerErrorType : int32_t { + UNKNOWN, + OUT_OF_BOUND, + MISALIGNED, + USE_AFTER_FREE, + OUT_OF_SHADOW_BOUND, +}; + +enum class DeviceSanitizerMemoryType : int32_t { + UNKNOWN, + USM_DEVICE, + USM_HOST, + USM_SHARED, + LOCAL, + PRIVATE, + MEM_BUFFER, +}; + +// NOTE Layout of this structure should be aligned with the one in +// sycl/include/sycl/detail/device_sanitizer_report.hpp +struct DeviceSanitizerReport { + int Flag = 0; + + char File[256 + 1] = ""; + char Func[256 + 1] = ""; + + int32_t Line = 0; + + uint64_t GID0 = 0; + uint64_t GID1 = 0; + uint64_t GID2 = 0; + + uint64_t LID0 = 0; + uint64_t LID1 = 0; + uint64_t LID2 = 0; + + bool IsWrite = false; + uint32_t AccessSize = 0; + DeviceSanitizerMemoryType MemoryType = DeviceSanitizerMemoryType::UNKNOWN; + DeviceSanitizerErrorType ErrorType = DeviceSanitizerErrorType::UNKNOWN; + + bool IsRecover = false; +}; diff --git a/libdevice/include/sanitizer_device_utils.hpp b/libdevice/include/sanitizer_device_utils.hpp new file mode 100644 index 0000000000000..ab472d2139abc --- /dev/null +++ b/libdevice/include/sanitizer_device_utils.hpp @@ -0,0 +1,49 @@ +//==-- sanitizer_device_utils.hpp - Declaration for sanitizer global var ---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include "spir_global_var.hpp" +#include + +// Treat this header as system one to workaround frontend's restriction +#pragma clang system_header + +template +class +#ifdef __SYCL_DEVICE_ONLY__ + [[__sycl_detail__::global_variable_allowed, __sycl_detail__::device_global, + __sycl_detail__::add_ir_attributes_global_variable( + "sycl-device-global-size", "sycl-device-image-scope", sizeof(T), + nullptr)]] +#endif + DeviceGlobal { +public: + DeviceGlobal() = default; + DeviceGlobal(const DeviceGlobal &) = delete; + DeviceGlobal(const DeviceGlobal &&) = delete; + DeviceGlobal &operator=(const DeviceGlobal &) = delete; + DeviceGlobal &operator=(const DeviceGlobal &&) = delete; + + DeviceGlobal &operator=(const T newValue) noexcept { + val = newValue; + return *this; + } + + operator T &() noexcept { return val; } + + operator const T &() const noexcept { return val; } + + T &get() noexcept { return val; } + + const T &get() const noexcept { return val; } + +private: + T val; +}; + +enum DeviceType : uintptr_t { UNKNOWN, CPU, GPU_PVC, GPU_DG2 }; diff --git a/libdevice/include/spir_global_var.hpp b/libdevice/include/spir_global_var.hpp new file mode 100644 index 0000000000000..4bf1cb8daefae --- /dev/null +++ b/libdevice/include/spir_global_var.hpp @@ -0,0 +1,26 @@ +//==- spir_global_var.hpp - Declaration for device global variable support -==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +// Treat this header as system one to workaround frontend's restriction +#pragma clang system_header + +#ifndef SPIR_GLOBAL_VAR +#ifdef __SYCL_DEVICE_ONLY__ +#define SPIR_GLOBAL_VAR __attribute__((sycl_global_var)) +#else +#warning "SPIR_GLOBAL_VAR not defined in host mode. Defining as empty macro." +#define SPIR_GLOBAL_VAR +#endif +#endif + +#define __SYCL_GLOBAL__ __attribute__((opencl_global)) +#define __SYCL_LOCAL__ __attribute__((opencl_local)) +#define __SYCL_PRIVATE__ __attribute__((opencl_private)) +#define __SYCL_CONSTANT__ __attribute__((opencl_constant)) diff --git a/libdevice/sanitizer_utils.cpp b/libdevice/sanitizer_utils.cpp index c0d4cbba112c3..3e4fd11ec811c 100644 --- a/libdevice/sanitizer_utils.cpp +++ b/libdevice/sanitizer_utils.cpp @@ -6,22 +6,525 @@ // //===----------------------------------------------------------------------===// +#include "atomic.hpp" #include "device.h" +#include "spirv_vars.h" + +#include "include/device-sanitizer-report.hpp" +#include "include/sanitizer_device_utils.hpp" + +#include #include -using uptr = uint64_t; +using uptr = uintptr_t; +using s8 = char; +using u8 = unsigned char; +using s16 = short; +using u16 = unsigned short; + +#define ASAN_SHADOW_SCALE 3 +#define ASAN_SHADOW_GRANULARITY (1ULL << ASAN_SHADOW_SCALE) + +DeviceGlobal __AsanShadowMemoryGlobalStart; +DeviceGlobal __AsanShadowMemoryGlobalEnd; +DeviceGlobal __AsanShadowMemoryLocalStart; +DeviceGlobal __AsanShadowMemoryLocalEnd; + +DeviceGlobal __DeviceSanitizerReportMem; + +DeviceGlobal __DeviceType; + #if defined(__SPIR__) -// TODO: add real implementation in __asan_load_n. -DEVICE_EXTERN_C_NOINLINE -void __asan_load_n(uptr addr, unsigned n) { - (void)addr; - (void)n; - return; + +#ifdef __SYCL_DEVICE_ONLY__ +extern SYCL_EXTERNAL int +__spirv_ocl_printf(const __SYCL_CONSTANT__ char *Format, ...); + +extern SYCL_EXTERNAL __SYCL_GLOBAL__ void * +__spirv_GenericCastToPtrExplicit_ToGlobal(void *, int); +extern SYCL_EXTERNAL __SYCL_LOCAL__ void * +__spirv_GenericCastToPtrExplicit_ToLocal(void *, int); +extern SYCL_EXTERNAL __SYCL_PRIVATE__ void * +__spirv_GenericCastToPtrExplicit_ToPrivate(void *, int); +#endif + +// These magic values are written to shadow for better error +// reporting. +const int kUsmDeviceRedzoneMagic = (char)0x81; +const int kUsmHostRedzoneMagic = (char)0x82; +const int kUsmSharedRedzoneMagic = (char)0x83; +const int kMemBufferRedzoneMagic = (char)0x84; + +const int kUsmDeviceDeallocatedMagic = (char)0x91; +const int kUsmHostDeallocatedMagic = (char)0x92; +const int kUsmSharedDeallocatedMagic = (char)0x93; + +const int kSharedLocalRedzoneMagic = (char)0xa1; + +// Same with Asan Stack +const int kPrivateLeftRedzoneMagic = (char)0xf1; +const int kPrivateMidRedzoneMagic = (char)0xf2; +const int kPrivateRightRedzoneMagic = (char)0xf3; + +static const __SYCL_CONSTANT__ char __asan_shadow_value_start[] = + "%p(%d) -> %p:"; +static const __SYCL_CONSTANT__ char __asan_shadow_value[] = " %02X"; +static const __SYCL_CONSTANT__ char __asan_current_shadow_value[] = ">%02X"; +static const __SYCL_CONSTANT__ char __newline[] = "\n"; + +static const __SYCL_CONSTANT__ char __global_shadow_out_of_bound[] = + "ERROR: Global shadow memory out-of-bound (ptr: %p -> %p, base: %p)\n"; +static const __SYCL_CONSTANT__ char __local_shadow_out_of_bound[] = + "ERROR: Local shadow memory out-of-bound (ptr: %p -> %p, wg: %d, base: " + "%p)\n"; + +static const __SYCL_CONSTANT__ char __unsupport_device_type[] = + "ERROR: Unsupport device type: %d\n"; + +#define ASAN_REPORT_NONE 0 +#define ASAN_REPORT_START 1 +#define ASAN_REPORT_FINISH 2 + +#define AS_PRIVATE 0 +#define AS_GLOBAL 1 +#define AS_CONSTANT 2 +#define AS_LOCAL 3 +#define AS_GENERIC 4 + +namespace { + +__SYCL_GLOBAL__ void *ToGlobal(void *ptr) { + return __spirv_GenericCastToPtrExplicit_ToGlobal(ptr, 5); +} +__SYCL_LOCAL__ void *ToLocal(void *ptr) { + return __spirv_GenericCastToPtrExplicit_ToLocal(ptr, 4); +} +__SYCL_PRIVATE__ void *ToPrivate(void *ptr) { + return __spirv_GenericCastToPtrExplicit_ToPrivate(ptr, 7); +} + +inline uptr MemToShadow_CPU(uptr addr, int32_t as) { + return __AsanShadowMemoryGlobalStart + (addr >> 3); +} + +inline uptr MemToShadow_DG2(uptr addr, int32_t as) { + uptr shadow_ptr = 0; + if (addr & (~0xffffffffffff)) { + shadow_ptr = + (((addr & 0xffffffffffff) >> 3) + __AsanShadowMemoryGlobalStart) | + (~0xffffffffffff); + } else { + shadow_ptr = (addr >> 3) + __AsanShadowMemoryGlobalStart; + } + + if (shadow_ptr > __AsanShadowMemoryGlobalEnd) { + __spirv_ocl_printf(__global_shadow_out_of_bound, addr, shadow_ptr); + } + + return shadow_ptr; +} + +inline uptr MemToShadow_PVC(uptr addr, int32_t as) { + uptr shadow_ptr = 0; + + if (as == AS_GENERIC) { + if ((shadow_ptr = (uptr)ToGlobal((void *)addr))) { + as = AS_GLOBAL; + } else if ((shadow_ptr = (uptr)ToPrivate((void *)addr))) { + as = AS_PRIVATE; + } else if ((shadow_ptr = (uptr)ToLocal((void *)addr))) { + as = AS_LOCAL; + } else { + return 0; + } + } + + if (as == AS_PRIVATE) { // private + } else if (as == AS_GLOBAL) { // global + if (addr & 0xFF00000000000000) { // Device USM + shadow_ptr = __AsanShadowMemoryGlobalStart + 0x200000000000 + + ((addr & 0xFFFFFFFFFFFF) >> 3); + } else { // Only consider 47bit VA + shadow_ptr = + __AsanShadowMemoryGlobalStart + ((addr & 0x7FFFFFFFFFFF) >> 3); + } + + if (shadow_ptr > __AsanShadowMemoryGlobalEnd) { + __spirv_ocl_printf(__global_shadow_out_of_bound, addr, shadow_ptr, + (uptr)__AsanShadowMemoryGlobalStart); + shadow_ptr = 0; + } + } else if (as == AS_CONSTANT) { // constant + } else if (as == AS_LOCAL) { // local + // The size of SLM is 128KB on PVC + constexpr unsigned slm_size = 128 * 1024; + const auto wg_lid = + __spirv_BuiltInWorkgroupId.x * __spirv_BuiltInNumWorkgroups.y * + __spirv_BuiltInNumWorkgroups.z + + __spirv_BuiltInWorkgroupId.y * __spirv_BuiltInNumWorkgroups.z + + __spirv_BuiltInWorkgroupId.z; + + shadow_ptr = __AsanShadowMemoryLocalStart + ((wg_lid * slm_size) >> 3) + + ((addr & (slm_size - 1)) >> 3); + + if (shadow_ptr > __AsanShadowMemoryLocalEnd) { + __spirv_ocl_printf(__local_shadow_out_of_bound, addr, shadow_ptr, wg_lid, + (uptr)__AsanShadowMemoryLocalStart); + shadow_ptr = 0; + } + } + + return shadow_ptr; +} + +inline uptr MemToShadow(uptr addr, int32_t as) { + uptr shadow_ptr = 0; + + if (__DeviceType == DeviceType::CPU) { + shadow_ptr = MemToShadow_CPU(addr, as); + } else if (__DeviceType == DeviceType::GPU_PVC) { + shadow_ptr = MemToShadow_PVC(addr, as); + } else { + __spirv_ocl_printf(__unsupport_device_type, (int)__DeviceType); + return shadow_ptr; + } + + return shadow_ptr; +} + +inline constexpr uptr RoundUpTo(uptr size, uptr boundary) { + return (size + boundary - 1) & ~(boundary - 1); +} + +inline constexpr uptr RoundDownTo(uptr x, uptr boundary) { + return x & ~(boundary - 1); +} + +bool MemIsZero(const char *beg, uptr size) { + const char *end = beg + size; + uptr *aligned_beg = (uptr *)RoundUpTo((uptr)beg, sizeof(uptr)); + uptr *aligned_end = (uptr *)RoundDownTo((uptr)end, sizeof(uptr)); + uptr all = 0; + // Prologue. + for (const char *mem = beg; mem < (char *)aligned_beg && mem < end; mem++) + all |= *mem; + // Aligned loop. + for (; aligned_beg < aligned_end; aligned_beg++) + all |= *aligned_beg; + // Epilogue. + if ((char *)aligned_end >= beg) { + for (const char *mem = (char *)aligned_end; mem < end; mem++) + all |= *mem; + } + return all == 0; } -DEVICE_EXTERN_C_NOINLINE -void __asan_load4(uptr addr) { __asan_load_n(addr, 4); } +void print_shadow_memory(uptr addr, int32_t as) { + uptr shadow_address = MemToShadow(addr, as); + uptr p = shadow_address & (~0xf); + __spirv_ocl_printf(__asan_shadow_value_start, addr, as, p); + for (int i = 0; i < 0xf; ++i) { + u8 shadow_value = *(u8 *)(p + i); + if (p + i == shadow_address) { + __spirv_ocl_printf(__asan_current_shadow_value, shadow_value); + } else { + __spirv_ocl_printf(__asan_shadow_value, shadow_value); + } + } + __spirv_ocl_printf(__newline); +} + +} // namespace + +bool __asan_region_is_value(uptr addr, int32_t as, std::size_t size, + char value) { + if (size == 0) + return true; + while (size--) { + char *shadow = (char *)MemToShadow(addr, as); + if (*shadow != value) { + return false; + } + ++addr; + } + return true; +} + +static void __asan_internal_report_save( + uptr ptr, int32_t as, const char __SYCL_CONSTANT__ *file, int32_t line, + const char __SYCL_CONSTANT__ *func, bool is_write, uint32_t access_size, + DeviceSanitizerMemoryType memory_type, DeviceSanitizerErrorType error_type, + bool is_recover = false) { + + const int Expected = ASAN_REPORT_NONE; + int Desired = ASAN_REPORT_START; + if (atomicCompareAndSet(&__DeviceSanitizerReportMem.get().Flag, Desired, + Expected) == Expected) { + + int FileLength = 0; + int FuncLength = 0; + + if (file) + for (auto *C = file; *C != '\0'; ++C, ++FileLength) + ; + if (func) + for (auto *C = func; *C != '\0'; ++C, ++FuncLength) + ; + + int MaxFileIdx = sizeof(__DeviceSanitizerReportMem.get().File) - 1; + int MaxFuncIdx = sizeof(__DeviceSanitizerReportMem.get().Func) - 1; + + if (FileLength < MaxFileIdx) + MaxFileIdx = FileLength; + if (FuncLength < MaxFuncIdx) + MaxFuncIdx = FuncLength; + + for (int Idx = 0; Idx < MaxFileIdx; ++Idx) + __DeviceSanitizerReportMem.get().File[Idx] = file[Idx]; + __DeviceSanitizerReportMem.get().File[MaxFileIdx] = '\0'; + + for (int Idx = 0; Idx < MaxFuncIdx; ++Idx) + __DeviceSanitizerReportMem.get().Func[Idx] = func[Idx]; + __DeviceSanitizerReportMem.get().Func[MaxFuncIdx] = '\0'; + + __DeviceSanitizerReportMem.get().Line = line; + __DeviceSanitizerReportMem.get().GID0 = __spirv_GlobalInvocationId_x(); + __DeviceSanitizerReportMem.get().GID1 = __spirv_GlobalInvocationId_y(); + __DeviceSanitizerReportMem.get().GID2 = __spirv_GlobalInvocationId_z(); + __DeviceSanitizerReportMem.get().LID0 = __spirv_LocalInvocationId_x(); + __DeviceSanitizerReportMem.get().LID1 = __spirv_LocalInvocationId_y(); + __DeviceSanitizerReportMem.get().LID2 = __spirv_LocalInvocationId_z(); + + __DeviceSanitizerReportMem.get().IsWrite = is_write; + __DeviceSanitizerReportMem.get().AccessSize = access_size; + __DeviceSanitizerReportMem.get().ErrorType = error_type; + __DeviceSanitizerReportMem.get().MemoryType = memory_type; + __DeviceSanitizerReportMem.get().IsRecover = is_recover; + + // Show we've done copying + atomicStore(&__DeviceSanitizerReportMem.get().Flag, ASAN_REPORT_FINISH); + } +} + +/// +/// ASAN Error Reporters +/// + +void __asan_report_access_error(uptr addr, int32_t as, size_t size, + bool is_write, uptr poisoned_addr, + const char __SYCL_CONSTANT__ *file, + int32_t line, + const char __SYCL_CONSTANT__ *func, + bool is_recover = false) { + // Check Error Type + s8 *shadow_address = (s8 *)MemToShadow(poisoned_addr, as); + int shadow_value = *shadow_address; + if (shadow_value > 0) { + shadow_value = *(shadow_address + 1); + } + // FIXME: check if shadow_address out-of-bound + + DeviceSanitizerMemoryType memory_type; + DeviceSanitizerErrorType error_type; + + switch (shadow_value) { + case kUsmDeviceRedzoneMagic: + memory_type = DeviceSanitizerMemoryType::USM_DEVICE; + error_type = DeviceSanitizerErrorType::OUT_OF_BOUND; + break; + case kUsmHostRedzoneMagic: + memory_type = DeviceSanitizerMemoryType::USM_HOST; + error_type = DeviceSanitizerErrorType::OUT_OF_BOUND; + break; + case kUsmSharedRedzoneMagic: + memory_type = DeviceSanitizerMemoryType::USM_SHARED; + error_type = DeviceSanitizerErrorType::OUT_OF_BOUND; + break; + case kUsmDeviceDeallocatedMagic: + memory_type = DeviceSanitizerMemoryType::USM_DEVICE; + error_type = DeviceSanitizerErrorType::USE_AFTER_FREE; + break; + case kUsmHostDeallocatedMagic: + memory_type = DeviceSanitizerMemoryType::USM_HOST; + error_type = DeviceSanitizerErrorType::USE_AFTER_FREE; + break; + case kUsmSharedDeallocatedMagic: + memory_type = DeviceSanitizerMemoryType::USM_SHARED; + error_type = DeviceSanitizerErrorType::USE_AFTER_FREE; + break; + case kPrivateLeftRedzoneMagic: + case kPrivateMidRedzoneMagic: + case kPrivateRightRedzoneMagic: + memory_type = DeviceSanitizerMemoryType::PRIVATE; + error_type = DeviceSanitizerErrorType::OUT_OF_BOUND; + break; + case kMemBufferRedzoneMagic: + memory_type = DeviceSanitizerMemoryType::MEM_BUFFER; + error_type = DeviceSanitizerErrorType::OUT_OF_BOUND; + break; + case kSharedLocalRedzoneMagic: + memory_type = DeviceSanitizerMemoryType::LOCAL; + error_type = DeviceSanitizerErrorType::OUT_OF_BOUND; + break; + default: + memory_type = DeviceSanitizerMemoryType::UNKNOWN; + error_type = DeviceSanitizerErrorType::UNKNOWN; + } + + __asan_internal_report_save(addr, as, file, line, func, is_write, size, + memory_type, error_type, is_recover); +} + +/// +/// Check if memory is poisoned +/// + +// NOTE: size < 8 +inline int __asan_address_is_poisoned(uptr a, int32_t as, size_t size) { + auto *shadow_address = (s8 *)MemToShadow(a, as); + if (shadow_address) { + auto shadow_value = *shadow_address; + if (shadow_value) { + s8 last_accessed_byte = (a & (ASAN_SHADOW_GRANULARITY - 1)) + size - 1; + return (last_accessed_byte >= shadow_value); + } + } + return false; +} + +// NOTE: size = 1 +inline int __asan_address_is_poisoned(uptr a, int32_t as) { + return __asan_address_is_poisoned(a, as, 1); +} + +inline uptr __asan_region_is_poisoned(uptr beg, int32_t as, size_t size) { + if (!size) + return 0; + + uptr end = beg + size; + uptr aligned_b = RoundUpTo(beg, ASAN_SHADOW_GRANULARITY); + uptr aligned_e = RoundDownTo(end, ASAN_SHADOW_GRANULARITY); + + uptr shadow_beg = MemToShadow(aligned_b, as); + if (!shadow_beg) { + return 0; + } + uptr shadow_end = MemToShadow(aligned_e, as); + if (!shadow_end) { + return 0; + } + + // First check the first and the last application bytes, + // then check the ASAN_SHADOW_GRANULARITY-aligned region by calling + // MemIsZero on the corresponding shadow. + if (!__asan_address_is_poisoned(beg, as) && + !__asan_address_is_poisoned(end - 1, as) && + (shadow_end <= shadow_beg || + MemIsZero((const char *)shadow_beg, shadow_end - shadow_beg))) + return 0; + + // The fast check failed, so we have a poisoned byte somewhere. + // Find it slowly. + for (; beg < end; beg++) + if (__asan_address_is_poisoned(beg, as)) + return beg; + + return 0; +} + +/// +/// ASAN Load/Store Report Built-ins +/// + +#define ASAN_REPORT_ERROR(type, is_write, size) \ + DEVICE_EXTERN_C_NOINLINE void __asan_##type##size( \ + uptr addr, int32_t as, const char __SYCL_CONSTANT__ *file, int32_t line, \ + const char __SYCL_CONSTANT__ *func) { \ + if (__asan_address_is_poisoned(addr, as, size)) { \ + __asan_report_access_error(addr, as, size, is_write, addr, file, line, \ + func); \ + } \ + } \ + DEVICE_EXTERN_C_NOINLINE void __asan_##type##size##_noabort( \ + uptr addr, int32_t as, const char __SYCL_CONSTANT__ *file, int32_t line, \ + const char __SYCL_CONSTANT__ *func) { \ + if (__asan_address_is_poisoned(addr, as, size)) { \ + __asan_report_access_error(addr, as, size, is_write, addr, file, line, \ + func, true); \ + } \ + } + +ASAN_REPORT_ERROR(load, false, 1) +ASAN_REPORT_ERROR(load, false, 2) +ASAN_REPORT_ERROR(load, false, 4) +ASAN_REPORT_ERROR(store, true, 1) +ASAN_REPORT_ERROR(store, true, 2) +ASAN_REPORT_ERROR(store, true, 4) + +#define ASAN_REPORT_ERROR_BYTE(type, is_write, size) \ + DEVICE_EXTERN_C_NOINLINE void __asan_##type##size( \ + uptr addr, int32_t as, const char __SYCL_CONSTANT__ *file, int32_t line, \ + const char __SYCL_CONSTANT__ *func) { \ + u##size *shadow_address = (u##size *)MemToShadow(addr, as); \ + if (shadow_address && *shadow_address) { \ + __asan_report_access_error(addr, as, size, is_write, addr, file, line, \ + func); \ + } \ + } \ + DEVICE_EXTERN_C_NOINLINE void __asan_##type##size##_noabort( \ + uptr addr, int32_t as, const char __SYCL_CONSTANT__ *file, int32_t line, \ + const char __SYCL_CONSTANT__ *func) { \ + u##size *shadow_address = (u##size *)MemToShadow(addr, as); \ + if (shadow_address && *shadow_address) { \ + __asan_report_access_error(addr, as, size, is_write, addr, file, line, \ + func, true); \ + } \ + } + +ASAN_REPORT_ERROR_BYTE(load, false, 8) +ASAN_REPORT_ERROR_BYTE(load, false, 16) +ASAN_REPORT_ERROR_BYTE(store, true, 8) +ASAN_REPORT_ERROR_BYTE(store, true, 16) + +#define ASAN_REPORT_ERROR_N(type, is_write) \ + DEVICE_EXTERN_C_NOINLINE void __asan_##type##N( \ + uptr addr, size_t size, int32_t as, const char __SYCL_CONSTANT__ *file, \ + int32_t line, const char __SYCL_CONSTANT__ *func) { \ + if (auto poisoned_addr = __asan_region_is_poisoned(addr, as, size)) { \ + __asan_report_access_error(addr, as, size, is_write, poisoned_addr, \ + file, line, func); \ + } \ + } \ + DEVICE_EXTERN_C_NOINLINE void __asan_##type##N_noabort( \ + uptr addr, size_t size, int32_t as, const char __SYCL_CONSTANT__ *file, \ + int32_t line, const char __SYCL_CONSTANT__ *func) { \ + if (auto poisoned_addr = __asan_region_is_poisoned(addr, as, size)) { \ + __asan_report_access_error(addr, as, size, is_write, poisoned_addr, \ + file, line, func, true); \ + } \ + } + +ASAN_REPORT_ERROR_N(load, false) +ASAN_REPORT_ERROR_N(store, true) + +DEVICE_EXTERN_C_NOINLINE void +__asan_set_shadow_local_memory(uptr ptr, size_t size, + size_t size_with_redzone) { + uptr aligned_size = RoundUpTo(size, ASAN_SHADOW_GRANULARITY); + + { + auto shadow_address = MemToShadow(ptr + aligned_size, AS_LOCAL); + auto count = (size_with_redzone - aligned_size) / ASAN_SHADOW_GRANULARITY; + for (size_t i = 0; i < count; ++i) { + ((u8 *)shadow_address)[i] = kSharedLocalRedzoneMagic; + } + } + + if (size != aligned_size) { + auto user_end = ptr + size - 1; + auto *shadow_end = (s8 *)MemToShadow(user_end, AS_LOCAL); + *shadow_end = user_end - RoundDownTo(user_end, ASAN_SHADOW_GRANULARITY); + } +} -DEVICE_EXTERN_C_NOINLINE -void __asan_load8(uptr addr) { __asan_load_n(addr, 8); } -#endif // __SPIR__ +#endif diff --git a/libdevice/spirv_vars.h b/libdevice/spirv_vars.h index 640f0dbf3cd9e..0387c322965ce 100644 --- a/libdevice/spirv_vars.h +++ b/libdevice/spirv_vars.h @@ -21,6 +21,7 @@ typedef size_t size_t_vec __attribute__((ext_vector_type(3))); __SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInGlobalInvocationId; __SPIRV_VAR_QUALIFIERS size_t __spirv_BuiltInGlobalLinearId; __SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInLocalInvocationId; +__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInNumWorkgroups; __SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInWorkgroupId; __SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInWorkgroupSize; diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 8a2864a078731..22080d30e6c57 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -727,6 +727,13 @@ struct AddressSanitizer { bool maybeInsertAsanInitAtFunctionEntry(Function &F); bool maybeInsertDynamicShadowAtFunctionEntry(Function &F); void markEscapedLocalAllocas(Function &F); + void instrumentSyclAllocateLocalMemory(CallInst *CI); + + GlobalVariable *GetOrCreateGlobalString(Module &M, StringRef Name, + StringRef Value, + unsigned AddressSpace); + void AppendDebugInfoToArgs(Instruction *InsertBefore, Value *Addr, + SmallVectorImpl &Args); private: friend struct FunctionStackPoisoner; @@ -768,7 +775,10 @@ struct AddressSanitizer { ShadowMapping Mapping; FunctionCallee AsanHandleNoReturnFunc; FunctionCallee AsanPtrCmpFunction, AsanPtrSubFunction; + FunctionCallee AsanSetShadowDeviceLocalFunc; Constant *AsanShadowGlobal; + Constant *AsanShadowDevicePrivate; + StringMap GlobalStringMap; // These arrays is indexed by AccessIsWrite, Experiment and log2(AccessSize). FunctionCallee AsanErrorCallback[2][2][kNumberOfAccessSizes]; @@ -1234,7 +1244,78 @@ static bool isUnsupportedAMDGPUAddrspace(Value *Addr) { return false; } +static bool isUnsupportedSPIRAccess(Value *Addr, Function *Func) { + Type *PtrTy = cast(Addr->getType()->getScalarType()); + // Private address space: skip kernel arguments + if (PtrTy->getPointerAddressSpace() == 0) { + return Func->getCallingConv() == CallingConv::SPIR_KERNEL && + isa(Addr); + } + + // All the rest address spaces: skip SPIR-V built-in varibles + auto *OrigValue = Addr->stripPointerCasts(); + return OrigValue->getName().starts_with("__spirv_BuiltIn"); +} + +GlobalVariable *AddressSanitizer::GetOrCreateGlobalString( + Module &M, StringRef Name, StringRef Value, unsigned AddressSpace) { + GlobalVariable *StringGV = nullptr; + if (GlobalStringMap.find(Value.str()) != GlobalStringMap.end()) + return GlobalStringMap.at(Value.str()); + + auto *Ty = ArrayType::get(Type::getInt8Ty(M.getContext()), Value.size() + 1); + StringGV = new GlobalVariable( + M, Ty, true, GlobalValue::InternalLinkage, + ConstantDataArray::getString(M.getContext(), Value), Name, nullptr, + GlobalValue::NotThreadLocal, AddressSpace); + GlobalStringMap[Value.str()] = StringGV; + + return StringGV; +} + +void AddressSanitizer::AppendDebugInfoToArgs(Instruction *InsertBefore, + Value *Addr, + SmallVectorImpl &Args) { + auto *M = InsertBefore->getModule(); + auto &C = InsertBefore->getContext(); + auto &Loc = InsertBefore->getDebugLoc(); + + // SPIR constant address space + constexpr unsigned ConstantAS = 2; + PointerType *ConstASPtrTy = Type::getInt8Ty(C)->getPointerTo(ConstantAS); + + // Address Space + Type *PtrTy = cast(Addr->getType()->getScalarType()); + Args.push_back( + ConstantInt::get(Type::getInt32Ty(C), PtrTy->getPointerAddressSpace())); + + // File & Line + if (Loc) { + StringRef FileName = Loc->getFilename(); + auto *FileNameGV = + GetOrCreateGlobalString(*M, "__asan_file", FileName, ConstantAS); + Args.push_back(ConstantExpr::getPointerCast(FileNameGV, ConstASPtrTy)); + Args.push_back(ConstantInt::get(Type::getInt32Ty(C), Loc.getLine())); + } else { + Args.push_back(ConstantPointerNull::get(ConstASPtrTy)); + Args.push_back(ConstantInt::get(Type::getInt32Ty(C), 0)); + } + + // Function + auto FuncName = InsertBefore->getFunction()->getName(); + auto *FuncNameGV = GetOrCreateGlobalString(*M, "__asan_func", + demangle(FuncName), ConstantAS); + Args.push_back(ConstantExpr::getPointerCast(FuncNameGV, ConstASPtrTy)); +} + Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) { + if (TargetTriple.isSPIR()) { + // ((Shadow & 0xffffffff) >> 3) + __AsanShadowMemoryPrivateStart; + Shadow = IRB.CreateAnd(Shadow, ConstantInt::get(IntptrTy, 0xffffffff)); + Shadow = IRB.CreateLShr(Shadow, Mapping.Scale); + Value *ShadowBase = IRB.CreateLoad(IntptrTy, AsanShadowDevicePrivate); + return IRB.CreateAdd(Shadow, ShadowBase); + } // Shadow >> scale Shadow = IRB.CreateLShr(Shadow, Mapping.Scale); if (Mapping.Offset == 0) return Shadow; @@ -1250,6 +1331,53 @@ Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) { return IRB.CreateAdd(Shadow, ShadowBase); } +static uint64_t getSizeAndRedzoneSizeForLocal(uint64_t Size, + uint64_t Granularity, + uint64_t Alignment) { + uint64_t Res = 0; + if (Size <= 4) + Res = 16; + else if (Size <= 16) + Res = 32; + else if (Size <= 128) + Res = Size + 32; + else if (Size <= 512) + Res = Size + 64; + else if (Size <= 4096) + Res = Size + 128; + else + Res = Size + 256; + return alignTo(std::max(Res, 2 * Granularity), Alignment); +} + +// Instument __sycl_allocateLocalMemory +void AddressSanitizer::instrumentSyclAllocateLocalMemory(CallInst *CI) { + InstrumentationIRBuilder IRB(CI->getNextNode()); + auto *Size = cast(CI->getArgOperand(0)); + auto *Alignment = cast(CI->getArgOperand(1)); + + const auto Granularity = 1 << Mapping.Scale; + // The base address of local memory needs to align to granularity + const auto Align = alignTo(Alignment->getZExtValue(), Granularity); + + auto *SizeWithRedZone = ConstantInt::get( + IntptrTy, getSizeAndRedzoneSizeForLocal(Size->getZExtValue(), Granularity, + Alignment->getZExtValue())); + + auto *NewCI = + IRB.CreateCall(CI->getCalledFunction(), + {SizeWithRedZone, ConstantInt::get(IntptrTy, Align)}); + + /// __asan_set_shadow_local_memory(uptr beg, size_t size, size_t + /// size_with_redzone) + IRB.CreateCall( + AsanSetShadowDeviceLocalFunc, + {IRB.CreatePointerCast(NewCI, IntptrTy), Size, SizeWithRedZone}); + + CI->replaceAllUsesWith(NewCI); + CI->eraseFromParent(); +} + // Instrument memset/memmove/memcpy void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { InstrumentationIRBuilder IRB(MI); @@ -1295,11 +1423,18 @@ bool AddressSanitizer::isInterestingAlloca(const AllocaInst &AI) { } bool AddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) { - // Instrument accesses from different address spaces only for AMDGPU. - Type *PtrTy = cast(Ptr->getType()->getScalarType()); - if (PtrTy->getPointerAddressSpace() != 0 && - !(TargetTriple.isAMDGPU() && !isUnsupportedAMDGPUAddrspace(Ptr))) - return true; + // SPIR has its own rules to filter the instrument accesses + if (TargetTriple.isSPIR()) { + if (isUnsupportedSPIRAccess(Ptr, Inst->getFunction())) + return true; + } else { + // Instrument accesses from different address spaces only for AMDGPU. + Type *PtrTy = cast(Ptr->getType()->getScalarType()); + if (PtrTy->getPointerAddressSpace() != 0 && + !(TargetTriple.isAMDGPU() && !isUnsupportedAMDGPUAddrspace(Ptr))) { + return true; + } + } // Ignore swifterror addresses. // swifterror memory addresses are mem2reg promoted by instruction @@ -1778,12 +1913,21 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); if (UseCalls) { - if (Exp == 0) - IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][0][AccessSizeIndex], - AddrLong); - else + if (Exp == 0) { + if (TargetTriple.isSPIR()) { + SmallVector Args; + Args.push_back(AddrLong); + AppendDebugInfoToArgs(InsertBefore, Addr, Args); + IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][0][AccessSizeIndex], + Args); + } else { + IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][0][AccessSizeIndex], + AddrLong); + } + } else { IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][1][AccessSizeIndex], {AddrLong, ConstantInt::get(IRB.getInt32Ty(), Exp)}); + } return; } @@ -1849,10 +1993,18 @@ void AddressSanitizer::instrumentUnusualSizeOrAlignment( Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); if (UseCalls) { - if (Exp == 0) - IRB.CreateCall(AsanMemoryAccessCallbackSized[IsWrite][0], - {AddrLong, Size}); - else + if (Exp == 0) { + if (TargetTriple.isSPIR()) { + SmallVector Args; + Args.push_back(AddrLong); + Args.push_back(Size); + AppendDebugInfoToArgs(InsertBefore, Addr, Args); + IRB.CreateCall(AsanMemoryAccessCallbackSized[IsWrite][0], Args); + } else { + IRB.CreateCall(AsanMemoryAccessCallbackSized[IsWrite][0], + {AddrLong, Size}); + } + } else IRB.CreateCall(AsanMemoryAccessCallbackSized[IsWrite][1], {AddrLong, Size, ConstantInt::get(IRB.getInt32Ty(), Exp)}); } else { @@ -2667,6 +2819,12 @@ bool ModuleAddressSanitizer::instrumentModule(Module &M) { } } + // SPIR kernel needn't AsanCtorFunction & AsanDtorFunction + if (TargetTriple.isSPIR()) { + AsanCtorFunction = nullptr; + AsanDtorFunction = nullptr; + } + const uint64_t Priority = GetCtorAndDtorPriority(TargetTriple); // Put the constructor and destructor in comdat if both @@ -2714,6 +2872,23 @@ void AddressSanitizer::initializeCallbacks(Module &M, const TargetLibraryInfo *T AL1 = AL1.addParamAttribute(*C, 1, AK); } } + + // Extend __asan_load/store arguments: unsigned int address_space, char* + // file, unsigned int line, char* func + if (TargetTriple.isSPIR()) { + constexpr unsigned ConstantAS = 2; + auto *Int8PtrTy = Type::getInt8Ty(*C)->getPointerTo(ConstantAS); + + Args1.push_back(Type::getInt32Ty(*C)); // address_space + Args1.push_back(Int8PtrTy); // file + Args1.push_back(Type::getInt32Ty(*C)); // line + Args1.push_back(Int8PtrTy); // func + + Args2.push_back(Type::getInt32Ty(*C)); // address_space + Args2.push_back(Int8PtrTy); // file + Args2.push_back(Type::getInt32Ty(*C)); // line + Args2.push_back(Int8PtrTy); // func + } AsanErrorCallbackSized[AccessIsWrite][Exp] = M.getOrInsertFunction( kAsanReportErrorTemplate + ExpStr + TypeStr + "_n" + EndingStr, FunctionType::get(IRB.getVoidTy(), Args2, false), AL2); @@ -2761,6 +2936,20 @@ void AddressSanitizer::initializeCallbacks(Module &M, const TargetLibraryInfo *T AsanShadowGlobal = M.getOrInsertGlobal("__asan_shadow", ArrayType::get(IRB.getInt8Ty(), 0)); + if (TargetTriple.isSPIR()) { + AsanShadowDevicePrivate = + M.getOrInsertGlobal("__AsanShadowMemoryPrivateStart", IntptrTy, [&] { + return new GlobalVariable(M, IntptrTy, true, + GlobalVariable::ExternalLinkage, nullptr, + "__AsanShadowMemoryPrivateStart", nullptr, + GlobalVariable::NotThreadLocal, 1); + }); + + AsanSetShadowDeviceLocalFunc = + M.getOrInsertFunction("__asan_set_shadow_local_memory", IRB.getVoidTy(), + IntptrTy, IntptrTy, IntptrTy); + } + AMDGPUAddressShared = M.getOrInsertFunction(kAMDGPUAddressSharedName, IRB.getInt1Ty(), PtrTy); AMDGPUAddressPrivate = @@ -2856,6 +3045,8 @@ bool AddressSanitizer::instrumentFunction(Function &F, if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false; if (!ClDebugFunc.empty() && ClDebugFunc == F.getName()) return false; if (F.getName().starts_with("__asan_")) return false; + if (F.getName().contains("__sycl_service_kernel__")) + return false; bool FunctionModified = false; @@ -2891,6 +3082,7 @@ bool AddressSanitizer::instrumentFunction(Function &F, SmallVector NoReturnCalls; SmallVector AllBlocks; SmallVector PointerComparisonsOrSubtracts; + SmallVector SyclAllocateLocalMemoryCalls; // Fill the set of memory operations to instrument. for (auto &BB : F) { @@ -2939,8 +3131,16 @@ bool AddressSanitizer::instrumentFunction(Function &F, if (CB->doesNotReturn()) NoReturnCalls.push_back(CB); } - if (CallInst *CI = dyn_cast(&Inst)) - maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI); + if (CallInst *CI = dyn_cast(&Inst)) { + if (TargetTriple.isSPIR() && CI->getCalledFunction() && + CI->getCalledFunction()->getCallingConv() == + llvm::CallingConv::SPIR_FUNC && + CI->getCalledFunction()->getName() == + "__sycl_allocateLocalMemory") + SyclAllocateLocalMemoryCalls.push_back(CI); + else + maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI); + } } if (NumInsnsPerBB >= ClMaxInsnsToInstrumentPerBB) break; } @@ -2962,10 +3162,17 @@ bool AddressSanitizer::instrumentFunction(Function &F, F.getParent()->getDataLayout()); FunctionModified = true; } - for (auto *Inst : IntrinToInstrument) { - if (!suppressInstrumentationSiteForDebug(NumInstrumented)) - instrumentMemIntrinsic(Inst); - FunctionModified = true; + if (TargetTriple.isSPIR()) { + for (auto *CI : SyclAllocateLocalMemoryCalls) { + instrumentSyclAllocateLocalMemory(CI); + FunctionModified = true; + } + } else { + for (auto *Inst : IntrinToInstrument) { + if (!suppressInstrumentationSiteForDebug(NumInstrumented)) + instrumentMemIntrinsic(Inst); + FunctionModified = true; + } } FunctionStackPoisoner FSP(F, *this); diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 3721564890ddb..956a2e940e96b 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -294,6 +294,10 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) { if (Name == "memset" || Name == "memcpy") return false; + // Prevent from asan interception in kernel + if (Name == "__asan_set_shadow_local_memory") + return false; + // Determine if code size heuristics need to be applied. ApplyCodeSizeHeuristics = L->getHeader()->getParent()->hasOptSize() && UseLIRCodeSizeHeurs; diff --git a/llvm/test/Instrumentation/AddressSanitizer/spir.ll b/llvm/test/Instrumentation/AddressSanitizer/spir.ll new file mode 100644 index 0000000000000..cee6a67605c5d --- /dev/null +++ b/llvm/test/Instrumentation/AddressSanitizer/spir.ll @@ -0,0 +1,156 @@ +; RUN: opt < %s -passes=asan -asan-instrumentation-with-call-threshold=0 -asan-stack=0 -asan-globals=0 -S | FileCheck %s + +; ModuleID = 'spir.cpp' +source_filename = "spir.cpp" +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +%"class.sycl::_V1::range" = type { %"class.sycl::_V1::detail::array" } +%"class.sycl::_V1::detail::array" = type { [1 x i64] } +%"class.sycl::_V1::id" = type { %"class.sycl::_V1::detail::array" } + +$_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E10FillBuffer = comdat any + +@__spirv_BuiltInLocalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 +; CHECK: __AsanShadowMemoryPrivateStart + +; Function Attrs: convergent mustprogress norecurse nounwind sanitize_address uwtable +define weak_odr dso_local spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E10FillBuffer(ptr addrspace(1) noundef align 8 %_arg_Sum, ptr addrspace(1) noundef align 8 %_arg_Accessor, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_Accessor1, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_Accessor2, ptr noundef byval(%"class.sycl::_V1::id") align 8 %_arg_Accessor3) local_unnamed_addr #0 comdat !srcloc !65 !kernel_arg_buffer_location !66 !kernel_arg_runtime_aligned !67 !kernel_arg_exclusive_ptr !67 !sycl_fixed_targets !68 { +entry: + %0 = load i64, ptr %_arg_Accessor3, align 8 + %1 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, align 32, !noalias !68 + %call.i10 = tail call spir_func ptr addrspace(3) @__sycl_allocateLocalMemory(i64 noundef 16, i64 noundef 4) #5, !noalias !69 + ; CHECK: __asan_set_shadow_local_memory + %cmp.i = icmp eq i64 %1, 0 + br i1 %cmp.i, label %if.then.i, label %_ZN4sycl3_V13ext6oneapi18group_local_memoryIA4_iNS0_5groupILi1EEEJEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS8_LNS0_6access13address_spaceE3ELNSB_9decoratedE2EEEE4typeES9_DpOT1_.exit + +if.then.i: ; preds = %entry + call void @llvm.memset.p3.i64(ptr addrspace(3) align 4 %call.i10, i8 0, i64 16, i1 false), !noalias !69 + br label %_ZN4sycl3_V13ext6oneapi18group_local_memoryIA4_iNS0_5groupILi1EEEJEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS8_LNS0_6access13address_spaceE3ELNSB_9decoratedE2EEEE4typeES9_DpOT1_.exit + +_ZN4sycl3_V13ext6oneapi18group_local_memoryIA4_iNS0_5groupILi1EEEJEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS8_LNS0_6access13address_spaceE3ELNSB_9decoratedE2EEEE4typeES9_DpOT1_.exit: ; preds = %entry, %if.then.i + %add.ptr.i = getelementptr inbounds i64, ptr addrspace(1) %_arg_Accessor, i64 %0 + tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272) #5, !noalias !69 + %cmp.i14 = icmp ult i64 %1, 2147483648 + tail call void @llvm.assume(i1 %cmp.i14) + %arrayidx.i16 = getelementptr inbounds i64, ptr addrspace(1) %add.ptr.i, i64 %1 + %2 = load i64, ptr addrspace(1) %arrayidx.i16, align 8, !tbaa !72 + ; CHECK: __asan_load8 + %arrayidx.i = getelementptr inbounds [4 x i32], ptr addrspace(3) %call.i10, i64 0, i64 %1 + %3 = load i32, ptr addrspace(3) %arrayidx.i, align 4, !tbaa !76 + ; CHECK: __asan_load4 + %conv.i = sext i32 %3 to i64 + %add.i = add i64 %2, %conv.i + %4 = load i64, ptr addrspace(1) %_arg_Sum, align 8, !tbaa !72 + ; CHECK: __asan_load8 + %add5.i = add i64 %4, %add.i + store i64 %add5.i, ptr addrspace(1) %_arg_Sum, align 8, !tbaa !72 + ret void +} + +; Function Attrs: mustprogress nocallback nofree nounwind willreturn memory(argmem: write) +declare void @llvm.memset.p4.i64(ptr addrspace(4) nocapture writeonly %0, i8 %1, i64 %2, i1 immarg %3) #1 + +; Function Attrs: convergent nounwind +declare dso_local spir_func ptr addrspace(3) @__sycl_allocateLocalMemory(i64 noundef %0, i64 noundef %1) local_unnamed_addr #2 + +; Function Attrs: convergent nounwind +declare dso_local spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef %0, i32 noundef %1, i32 noundef %2) local_unnamed_addr #2 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) +declare void @llvm.assume(i1 noundef %0) #3 + +; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) +declare void @llvm.memset.p3.i64(ptr addrspace(3) nocapture writeonly %0, i8 %1, i64 %2, i1 immarg %3) #4 + +attributes #0 = { convergent mustprogress norecurse nounwind sanitize_address uwtable "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="test.cpp" "uniform-work-group-size"="true" } +attributes #1 = { mustprogress nocallback nofree nounwind willreturn memory(argmem: write) } +attributes #2 = { convergent nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } +attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) } +attributes #5 = { convergent nounwind } + +!llvm.module.flags = !{!0, !1, !2} +!opencl.spir.version = !{!3} +!spirv.Source = !{!4} +!sycl_aspects = !{!5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36, !37, !38, !39, !40, !41, !42, !43, !44, !45, !46, !47, !48, !49, !50, !51, !52, !53, !54, !55, !56, !57, !58, !59, !60, !61, !62, !63} +!llvm.ident = !{!64} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"uwtable", i32 2} +!2 = !{i32 7, !"frame-pointer", i32 2} +!3 = !{i32 1, i32 2} +!4 = !{i32 4, i32 100000} +!5 = !{!"cpu", i32 1} +!6 = !{!"gpu", i32 2} +!7 = !{!"accelerator", i32 3} +!8 = !{!"custom", i32 4} +!9 = !{!"fp16", i32 5} +!10 = !{!"fp64", i32 6} +!11 = !{!"image", i32 9} +!12 = !{!"online_compiler", i32 10} +!13 = !{!"online_linker", i32 11} +!14 = !{!"queue_profiling", i32 12} +!15 = !{!"usm_device_allocations", i32 13} +!16 = !{!"usm_host_allocations", i32 14} +!17 = !{!"usm_shared_allocations", i32 15} +!18 = !{!"usm_system_allocations", i32 17} +!19 = !{!"ext_intel_pci_address", i32 18} +!20 = !{!"ext_intel_gpu_eu_count", i32 19} +!21 = !{!"ext_intel_gpu_eu_simd_width", i32 20} +!22 = !{!"ext_intel_gpu_slices", i32 21} +!23 = !{!"ext_intel_gpu_subslices_per_slice", i32 22} +!24 = !{!"ext_intel_gpu_eu_count_per_subslice", i32 23} +!25 = !{!"ext_intel_max_mem_bandwidth", i32 24} +!26 = !{!"ext_intel_mem_channel", i32 25} +!27 = !{!"usm_atomic_host_allocations", i32 26} +!28 = !{!"usm_atomic_shared_allocations", i32 27} +!29 = !{!"atomic64", i32 28} +!30 = !{!"ext_intel_device_info_uuid", i32 29} +!31 = !{!"ext_oneapi_srgb", i32 30} +!32 = !{!"ext_oneapi_native_assert", i32 31} +!33 = !{!"host_debuggable", i32 32} +!34 = !{!"ext_intel_gpu_hw_threads_per_eu", i32 33} +!35 = !{!"ext_oneapi_cuda_async_barrier", i32 34} +!36 = !{!"ext_oneapi_bfloat16_math_functions", i32 35} +!37 = !{!"ext_intel_free_memory", i32 36} +!38 = !{!"ext_intel_device_id", i32 37} +!39 = !{!"ext_intel_memory_clock_rate", i32 38} +!40 = !{!"ext_intel_memory_bus_width", i32 39} +!41 = !{!"emulated", i32 40} +!42 = !{!"ext_intel_legacy_image", i32 41} +!43 = !{!"ext_oneapi_bindless_images", i32 42} +!44 = !{!"ext_oneapi_bindless_images_shared_usm", i32 43} +!45 = !{!"ext_oneapi_bindless_images_1d_usm", i32 44} +!46 = !{!"ext_oneapi_bindless_images_2d_usm", i32 45} +!47 = !{!"ext_oneapi_interop_memory_import", i32 46} +!48 = !{!"ext_oneapi_interop_memory_export", i32 47} +!49 = !{!"ext_oneapi_interop_semaphore_import", i32 48} +!50 = !{!"ext_oneapi_interop_semaphore_export", i32 49} +!51 = !{!"ext_oneapi_mipmap", i32 50} +!52 = !{!"ext_oneapi_mipmap_anisotropy", i32 51} +!53 = !{!"ext_oneapi_mipmap_level_reference", i32 52} +!54 = !{!"ext_intel_esimd", i32 53} +!55 = !{!"ext_oneapi_ballot_group", i32 54} +!56 = !{!"ext_oneapi_fixed_size_group", i32 55} +!57 = !{!"ext_oneapi_opportunistic_group", i32 56} +!58 = !{!"ext_oneapi_tangle_group", i32 57} +!59 = !{!"int64_base_atomics", i32 7} +!60 = !{!"int64_extended_atomics", i32 8} +!61 = !{!"usm_system_allocator", i32 17} +!62 = !{!"usm_restricted_shared_allocations", i32 16} +!63 = !{!"host", i32 0} +!64 = !{!"clang version 18.0.0git (https://github.com/intel/llvm.git caecf6b928648a83c8ceb84988231cb246c4365e)"} +!65 = !{i32 419} +!66 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} +!67 = !{i1 false, i1 true, i1 false, i1 false, i1 false} +!68 = !{} +!69 = !{!70} +!70 = distinct !{!70, !71, !"_ZN4sycl3_V13ext6oneapi18group_local_memoryIA4_iNS0_5groupILi1EEEJEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS8_LNS0_6access13address_spaceE3ELNSB_9decoratedE2EEEE4typeES9_DpOT1_: %agg.result"} +!71 = distinct !{!71, !"_ZN4sycl3_V13ext6oneapi18group_local_memoryIA4_iNS0_5groupILi1EEEJEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS8_LNS0_6access13address_spaceE3ELNSB_9decoratedE2EEEE4typeES9_DpOT1_"} +!72 = !{!73, !73, i64 0} +!73 = !{!"long", !74, i64 0} +!74 = !{!"omnipotent char", !75, i64 0} +!75 = !{!"Simple C++ TBAA"} +!76 = !{!77, !77, i64 0} +!77 = !{!"int", !74, i64 0} diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 50cb225f6cb18..85728be8aabf0 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -313,8 +313,9 @@ std::vector getKernelNamesUsingAssert(const Module &M) { } bool isModuleUsingAsan(const Module &M) { - auto *AsanInitFunction = M.getFunction("__asan_init"); - return AsanInitFunction; + return llvm::any_of(M.functions(), [](const Function &F) { + return F.getName().starts_with("__asan_"); + }); } // Gets reqd_work_group_size information for function Func. diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc index dadccc099e016..16a950af3151a 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc @@ -973,7 +973,7 @@ int main() { syclex::if_architecture_is([&]{ // Code for PVC }).otherwise([&]{ - // Fallback code + // Generic code }); }); @@ -987,7 +987,7 @@ int main() { syclex::architecture::amd_gpu_gfx1013>([&]{ // Code for AMD devices between gfx1010 and gfx1013 (inclusive) }).otherwise([&]{ - // Fallback code + // Generic code }); }); } diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_device_if.asciidoc b/sycl/doc/extensions/proposed/sycl_ext_oneapi_device_if.asciidoc index 8d2b31b3ecd32..73c9c5a9ac9bc 100644 --- a/sycl/doc/extensions/proposed/sycl_ext_oneapi_device_if.asciidoc +++ b/sycl/doc/extensions/proposed/sycl_ext_oneapi_device_if.asciidoc @@ -152,7 +152,7 @@ executes the `if_device_has` function has **all** of the aspects listed in this pack. If the condition is `true`, the implementation calls `fn`. Otherwise, the function `fn` is potentially discarded as described below. -=== Fallback code +=== Generic code The value returned by `if_device_has` is an object _F_ of an unspecified type, which provides the following member functions: @@ -200,7 +200,7 @@ void frob() { }).else_if_device_has([] { // code that uses features tied to "bar" aspect }).otherwise([] { - // fallback code that works on all devices + // generic code that works on all devices }); } ``` diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 31c6a86f3300d..0c02a7705b51d 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -151,9 +151,10 @@ // 14.42 Added piextCommandBufferPrefetchUSM and piextCommandBufferAdviseUSM // 15.43 Changed the signature of piextMemGetNativeHandle to also take a // pi_device +// 15.44 Add coarse-grain memory advice flag for HIP. #define _PI_H_VERSION_MAJOR 15 -#define _PI_H_VERSION_MINOR 43 +#define _PI_H_VERSION_MINOR 44 #define _PI_STRING_HELPER(a) #a #define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b) @@ -582,6 +583,8 @@ typedef enum { PI_MEM_ADVICE_CUDA_UNSET_PREFERRED_LOCATION_HOST = 1 << 7, PI_MEM_ADVICE_CUDA_SET_ACCESSED_BY_HOST = 1 << 8, PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY_HOST = 1 << 9, + PI_MEM_ADVICE_HIP_SET_COARSE_GRAINED = 1 << 10, + PI_MEM_ADVICE_HIP_UNSET_COARSE_GRAINED = 1 << 11, PI_MEM_ADVICE_UNKNOWN = 0x7FFFFFFF, } _pi_mem_advice; diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 7de174f03701c..ad28f40d21edd 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -57,14 +57,14 @@ endif() if(SYCL_PI_UR_USE_FETCH_CONTENT) include(FetchContent) - set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime") - # commit 9363574db721d2388c7d76a10edb128764872352 - # Merge: 553a6b82 5e513738 + set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") + # commit cd97e172cbbfc411fccb0b80e0fff6f9126574f4 + # Merge: bd745d10 2a9ded6f # Author: Kenneth Benzie (Benie) - # Date: Thu Feb 1 11:50:16 2024 +0000 - # Merge pull request #1302 from kbenzie/benie/cl-binary-type-intermediate - # [CL] Handle INTERMEDIATE binary type - set(UNIFIED_RUNTIME_TAG 9363574db721d2388c7d76a10edb128764872352) + # Date: Fri Feb 2 14:24:16 2024 +0000 + # Merge pull request #1249 from GeorgeWeb/georgi/hip_memadvise_coarse_grained + # [HIP] Implement coarse-grained memory advice for the HIP adapter + set(UNIFIED_RUNTIME_TAG cd97e172cbbfc411fccb0b80e0fff6f9126574f4) if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO) set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}") @@ -203,6 +203,13 @@ endif() if ("opencl" IN_LIST SYCL_ENABLE_PLUGINS) add_dependencies(sycl-runtime-libraries ur_adapter_opencl) + + # Install the UR adapters too + install(TARGETS ur_adapter_opencl + LIBRARY DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev + ARCHIVE DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev + RUNTIME DESTINATION "bin" COMPONENT level-zero-sycl-dev + ) endif() if ("native_cpu" IN_LIST SYCL_ENABLE_PLUGINS) diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index c19c93a6af53a..4f0c7a8a1f885 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -3375,6 +3375,12 @@ inline pi_result piextUSMEnqueueMemAdvise(pi_queue Queue, const void *Ptr, if (Advice & PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY_HOST) { UrAdvice |= UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_HOST; } + if (Advice & PI_MEM_ADVICE_HIP_SET_COARSE_GRAINED) { + UrAdvice |= UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY; + } + if (Advice & PI_MEM_ADVICE_HIP_UNSET_COARSE_GRAINED) { + UrAdvice |= UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY; + } if (Advice & PI_MEM_ADVICE_RESET) { UrAdvice |= UR_USM_ADVICE_FLAG_DEFAULT; } diff --git a/sycl/source/detail/pi.cpp b/sycl/source/detail/pi.cpp index 92bccc2cea2b2..4dbeaccc9baf2 100644 --- a/sycl/source/detail/pi.cpp +++ b/sycl/source/detail/pi.cpp @@ -685,45 +685,39 @@ static uint16_t getELFHeaderType(const unsigned char *ImgData, size_t ImgSize) { sycl::detail::pi::PiDeviceBinaryType getBinaryImageFormat(const unsigned char *ImgData, size_t ImgSize) { // Top-level magic numbers for the recognized binary image formats. - struct { - sycl::detail::pi::PiDeviceBinaryType Fmt; - const uint32_t Magic; - } Fmts[] = {{PI_DEVICE_BINARY_TYPE_SPIRV, 0x07230203}, - {PI_DEVICE_BINARY_TYPE_LLVMIR_BITCODE, 0xDEC04342}, - // 'I', 'N', 'T', 'C' ; Intel native - {PI_DEVICE_BINARY_TYPE_NATIVE, 0x43544E49}}; - - if (ImgSize >= sizeof(Fmts[0].Magic)) { - std::remove_const_t Hdr = 0; - std::copy(ImgData, ImgData + sizeof(Hdr), reinterpret_cast(&Hdr)); - - // Check headers for direct formats. - for (const auto &Fmt : Fmts) { - if (Hdr == Fmt.Magic) - return Fmt.Fmt; - } - - // ELF e_type for recognized binary image formats. - struct { - sycl::detail::pi::PiDeviceBinaryType Fmt; - const uint16_t Magic; - } ELFFmts[] = {{PI_DEVICE_BINARY_TYPE_NATIVE, 0xFF04}, // OpenCL executable - {PI_DEVICE_BINARY_TYPE_NATIVE, 0xFF12}}; // ZEBIN executable - - // ELF files need to be parsed separately. The header type ends after 18 - // bytes. - if (Hdr == 0x464c457F && ImgSize >= 18) { - uint16_t HdrType = getELFHeaderType(ImgData, ImgSize); - for (const auto &ELFFmt : ELFFmts) { - if (HdrType == ELFFmt.Magic) - return ELFFmt.Fmt; - } - // Newer ZEBIN format does not have a special header type, but can instead - // be identified by having a required .ze_info section. - if (checkELFSectionPresent(".ze_info", ImgData, ImgSize)) - return PI_DEVICE_BINARY_TYPE_NATIVE; - } + auto MatchMagicNumber = [&](auto Number) { + return ImgSize >= sizeof(Number) && + std::memcmp(ImgData, &Number, sizeof(Number)) == 0; + }; + + if (MatchMagicNumber(uint32_t{0x07230203})) + return PI_DEVICE_BINARY_TYPE_SPIRV; + + if (MatchMagicNumber(uint32_t{0xDEC04342})) + return PI_DEVICE_BINARY_TYPE_LLVMIR_BITCODE; + + if (MatchMagicNumber(uint32_t{0x43544E49})) + // 'I', 'N', 'T', 'C' ; Intel native + return PI_DEVICE_BINARY_TYPE_LLVMIR_BITCODE; + + // Check for ELF format, size requirements include data we'll read in case of + // succesful match. + if (ImgSize >= 18 && MatchMagicNumber(uint32_t{0x464c457F})) { + uint16_t ELFHdrType = getELFHeaderType(ImgData, ImgSize); + if (ELFHdrType == 0xFF04) + // OpenCL executable. + return PI_DEVICE_BINARY_TYPE_NATIVE; + + if (ELFHdrType == 0xFF12) + // ZEBIN executable. + return PI_DEVICE_BINARY_TYPE_NATIVE; + + // Newer ZEBIN format does not have a special header type, but can instead + // be identified by having a required .ze_info section. + if (checkELFSectionPresent(".ze_info", ImgData, ImgSize)) + return PI_DEVICE_BINARY_TYPE_NATIVE; } + return PI_DEVICE_BINARY_TYPE_NONE; } diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_char.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_char.cpp new file mode 100644 index 0000000000000..84e28fd6e0883 --- /dev/null +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_char.cpp @@ -0,0 +1,40 @@ +// REQUIRES: linux, cpu +// RUN: %{build} %device_sanitizer_flags -DMALLOC_DEVICE -O1 -g -o %t +// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-DEVICE --input-file %t.txt %s +// RUN: %{build} %device_sanitizer_flags -DMALLOC_DEVICE -O2 -g -o %t +// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-DEVICE --input-file %t.txt %s +// RUN: %{build} %device_sanitizer_flags -DMALLOC_HOST -O2 -g -o %t +// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-HOST --input-file %t.txt %s +// RUN: %{build} %device_sanitizer_flags -DMALLOC_SHARED -O2 -g -o %t +// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-SHARED --input-file %t.txt %s +#include + +int main() { + sycl::queue Q; + constexpr std::size_t N = 12345; +#if defined(MALLOC_HOST) + auto *array = sycl::malloc_host(N, Q); +#elif defined(MALLOC_SHARED) + auto *array = sycl::malloc_shared(N, Q); +#elif defined(MALLOC_DEVICE) + auto *array = sycl::malloc_device(N, Q); +#elif defined(MALLOC_SYSTEM) + auto *array = new char[N]; +#else +#error "Must provide malloc type to run the test" +#endif + + Q.submit([&](sycl::handler &h) { + h.parallel_for( + sycl::nd_range<1>(N + 1, 1), + [=](sycl::nd_item<1> item) { ++array[item.get_global_id(0)]; }); + }); + Q.wait(); + // CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on USM Device Memory + // CHECK-HOST: ERROR: DeviceSanitizer: out-of-bounds-access on USM Host Memory + // CHECK-SHARED: ERROR: DeviceSanitizer: out-of-bounds-access on USM Shared Memory + // CHECK: {{READ of size 1 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(12345, 0, 0\)}} + // CHECK: {{ #0 .* .*parallel_for_char.cpp:}}[[@LINE-7]] + + return 0; +} diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_double.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_double.cpp new file mode 100644 index 0000000000000..6644d6ba81f06 --- /dev/null +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_double.cpp @@ -0,0 +1,40 @@ +// REQUIRES: linux, cpu, aspect-fp64 +// RUN: %{build} %device_sanitizer_flags -DMALLOC_DEVICE -O1 -g -o %t +// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-DEVICE --input-file %t.txt %s +// RUN: %{build} %device_sanitizer_flags -DMALLOC_DEVICE -O2 -g -o %t +// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-DEVICE --input-file %t.txt %s +// RUN: %{build} %device_sanitizer_flags -DMALLOC_HOST -O2 -g -o %t +// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-HOST --input-file %t.txt %s +// RUN: %{build} %device_sanitizer_flags -DMALLOC_SHARED -O2 -g -o %t +// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-SHARED --input-file %t.txt %s +#include + +int main() { + sycl::queue Q; + constexpr std::size_t N = 123456; +#if defined(MALLOC_HOST) + auto *array = sycl::malloc_host(N, Q); +#elif defined(MALLOC_SHARED) + auto *array = sycl::malloc_shared(N, Q); +#elif defined(MALLOC_DEVICE) + auto *array = sycl::malloc_device(N, Q); +#elif defined(MALLOC_SYSTEM) + auto *array = new double[N]; +#else +#error "Must provide malloc type to run the test" +#endif + + Q.submit([&](sycl::handler &h) { + h.parallel_for( + sycl::nd_range<1>(N + 1, 1), + [=](sycl::nd_item<1> item) { ++array[item.get_global_id(0)]; }); + }); + Q.wait(); + // CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on USM Device Memory + // CHECK-HOST: ERROR: DeviceSanitizer: out-of-bounds-access on USM Host Memory + // CHECK-SHARED: ERROR: DeviceSanitizer: out-of-bounds-access on USM Shared Memory + // CHECK: {{READ of size 8 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(123456, 0, 0\)}} + // CHECK: {{ #0 .* .*parallel_for_double.cpp:}}[[@LINE-7]] + + return 0; +} diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_func.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_func.cpp new file mode 100644 index 0000000000000..631effebe4515 --- /dev/null +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_func.cpp @@ -0,0 +1,42 @@ +// REQUIRES: linux, cpu +// RUN: %{build} %device_sanitizer_flags -DMALLOC_DEVICE -O1 -g -o %t +// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-DEVICE --input-file %t.txt %s +// RUN: %{build} %device_sanitizer_flags -DMALLOC_DEVICE -O2 -g -o %t +// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-DEVICE --input-file %t.txt %s +// RUN: %{build} %device_sanitizer_flags -DMALLOC_HOST -O2 -g -o %t +// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-HOST --input-file %t.txt %s +// RUN: %{build} %device_sanitizer_flags -DMALLOC_SHARED -O2 -g -o %t +// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-SHARED --input-file %t.txt %s +#include + +__attribute__((noinline)) void foo(int *array, size_t i) { array[i] = 1; } +// CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on USM Device Memory +// CHECK-HOST: ERROR: DeviceSanitizer: out-of-bounds-access on USM Host Memory +// CHECK-SHARED: ERROR: DeviceSanitizer: out-of-bounds-access on USM Shared Memory +// CHECK: {{WRITE of size 4 at kernel <.*MyKernel> LID\(0, 0, 0\) GID\(123, 0, 0\)}} +// CHECK: {{ #0 foo\(int\*, unsigned long\) .*parallel_for_func.cpp:}}[[@LINE-5]] + +int main() { + sycl::queue Q; + constexpr std::size_t N = 123; +#if defined(MALLOC_HOST) + auto *array = sycl::malloc_host(N, Q); +#elif defined(MALLOC_SHARED) + auto *array = sycl::malloc_shared(N, Q); +#elif defined(MALLOC_DEVICE) + auto *array = sycl::malloc_device(N, Q); +#elif defined(MALLOC_SYSTEM) + auto *array = new int[N]; +#else +#error "Must provide malloc type to run the test" +#endif + + Q.submit([&](sycl::handler &h) { + h.parallel_for( + sycl::nd_range<1>(N + 1, 1), + [=](sycl::nd_item<1> item) { foo(array, item.get_global_id(0)); }); + }); + Q.wait(); + + return 0; +} diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_int.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_int.cpp new file mode 100644 index 0000000000000..5fef6ec395bfc --- /dev/null +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_int.cpp @@ -0,0 +1,40 @@ +// REQUIRES: linux, cpu +// RUN: %{build} %device_sanitizer_flags -DMALLOC_DEVICE -O1 -g -o %t +// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-DEVICE --input-file %t.txt %s +// RUN: %{build} %device_sanitizer_flags -DMALLOC_DEVICE -O2 -g -o %t +// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-DEVICE --input-file %t.txt %s +// RUN: %{build} %device_sanitizer_flags -DMALLOC_HOST -O2 -g -o %t +// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-HOST --input-file %t.txt %s +// RUN: %{build} %device_sanitizer_flags -DMALLOC_SHARED -O2 -g -o %t +// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-SHARED --input-file %t.txt %s +#include + +int main() { + sycl::queue Q; + constexpr std::size_t N = 1234567; +#if defined(MALLOC_HOST) + auto *array = sycl::malloc_host(N, Q); +#elif defined(MALLOC_SHARED) + auto *array = sycl::malloc_shared(N, Q); +#elif defined(MALLOC_DEVICE) + auto *array = sycl::malloc_device(N, Q); +#elif defined(MALLOC_SYSTEM) + auto *array = new int[N]; +#else +#error "Must provide malloc type to run the test" +#endif + + Q.submit([&](sycl::handler &h) { + h.parallel_for( + sycl::nd_range<1>(N + 1, 1), + [=](sycl::nd_item<1> item) { ++array[item.get_global_id(0)]; }); + }); + Q.wait(); + // CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on USM Device Memory + // CHECK-HOST: ERROR: DeviceSanitizer: out-of-bounds-access on USM Host Memory + // CHECK-SHARED: ERROR: DeviceSanitizer: out-of-bounds-access on USM Shared Memory + // CHECK: {{READ of size 4 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(1234567, 0, 0\)}} + // CHECK: {{ #0 .* .*parallel_for_int.cpp:}}[[@LINE-7]] + + return 0; +} diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_short.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_short.cpp new file mode 100644 index 0000000000000..85f81619f858f --- /dev/null +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_for_short.cpp @@ -0,0 +1,40 @@ +// REQUIRES: linux, cpu +// RUN: %{build} %device_sanitizer_flags -DMALLOC_DEVICE -O1 -g -o %t +// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-DEVICE --input-file %t.txt %s +// RUN: %{build} %device_sanitizer_flags -DMALLOC_DEVICE -O2 -g -o %t +// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-DEVICE --input-file %t.txt %s +// RUN: %{build} %device_sanitizer_flags -DMALLOC_HOST -O2 -g -o %t +// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-HOST --input-file %t.txt %s +// RUN: %{build} %device_sanitizer_flags -DMALLOC_SHARED -O2 -g -o %t +// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-SHARED --input-file %t.txt %s +#include + +int main() { + sycl::queue Q; + constexpr std::size_t N = 123456789; +#if defined(MALLOC_HOST) + auto *array = sycl::malloc_host(N, Q); +#elif defined(MALLOC_SHARED) + auto *array = sycl::malloc_shared(N, Q); +#elif defined(MALLOC_DEVICE) + auto *array = sycl::malloc_device(N, Q); +#elif defined(MALLOC_SYSTEM) + auto *array = new short[N]; +#else +#error "Must provide malloc type to run the test" +#endif + + Q.submit([&](sycl::handler &h) { + h.parallel_for( + sycl::nd_range<1>(N + 1, 1), + [=](sycl::nd_item<1> item) { ++array[item.get_global_id(0)]; }); + }); + Q.wait(); + // CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on USM Device Memory + // CHECK-HOST: ERROR: DeviceSanitizer: out-of-bounds-access on USM Host Memory + // CHECK-SHARED: ERROR: DeviceSanitizer: out-of-bounds-access on USM Shared Memory + // CHECK: {{READ of size 2 at kernel <.*MyKernelR_4> LID\(0, 0, 0\) GID\(123456789, 0, 0\)}} + // CHECK: {{ #0 .* .*parallel_for_short.cpp:}}[[@LINE-7]] + + return 0; +} diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/local/local-overflow-1.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/local/local-overflow-1.cpp new file mode 100644 index 0000000000000..b8278b209aa6a --- /dev/null +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/local/local-overflow-1.cpp @@ -0,0 +1,29 @@ +// REQUIRES: linux, cpu +// RUN: %{build} %device_sanitizer_flags -g -o %t.out +// RUN: env SYCL_PREFER_UR=1 ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run-unfiltered-devices} not %t.out 2>&1 | FileCheck %s +#include + +constexpr std::size_t N = 16; +constexpr std::size_t group_size = 8; + +int main() { + sycl::queue Q; + auto *data = sycl::malloc_host(1, Q); + + Q.submit([&](sycl::handler &h) { + h.parallel_for( + sycl::nd_range<1>(N, group_size), [=](sycl::nd_item<1> item) { + sycl::multi_ptr + ptr = sycl::ext::oneapi::group_local_memory( + item.get_group()); + auto &ref = *ptr; + ref[item.get_local_linear_id() * 2 + 4] = 42; + // CHECK: ERROR: DeviceSanitizer: out-of-bounds-access on Local Memory + // CHECK: {{WRITE of size 4 at kernel <.*MyKernel> LID\(6, 0, 0\) GID\(.*, 0, 0\)}} + // CHECK: {{ #0 .* .*local-overflow-1.cpp:}}[[@LINE-3]] + }); + }); + + Q.wait(); + return 0; +} diff --git a/sycl/test-e2e/Assert/assert_in_multiple_tus.cpp b/sycl/test-e2e/Assert/assert_in_multiple_tus.cpp index a1d40ad0489a1..062f5952236ad 100644 --- a/sycl/test-e2e/Assert/assert_in_multiple_tus.cpp +++ b/sycl/test-e2e/Assert/assert_in_multiple_tus.cpp @@ -3,6 +3,9 @@ // https://github.com/intel/llvm/issues/7634 // UNSUPPORTED: hip // +// https://github.com/intel/llvm/issues/8832 +// UNSUPPORTED: cuda +// // FIXME: Remove XFAIL one intel/llvm#11364 is resolved // XFAIL: (opencl && gpu) diff --git a/sycl/test-e2e/Assert/assert_in_multiple_tus_one_ndebug.cpp b/sycl/test-e2e/Assert/assert_in_multiple_tus_one_ndebug.cpp index e5db927f33e11..1bf2ecdc98418 100644 --- a/sycl/test-e2e/Assert/assert_in_multiple_tus_one_ndebug.cpp +++ b/sycl/test-e2e/Assert/assert_in_multiple_tus_one_ndebug.cpp @@ -3,6 +3,9 @@ // https://github.com/intel/llvm/issues/7634 // UNSUPPORTED: hip // +// https://github.com/intel/llvm/issues/8832 +// UNSUPPORTED: cuda +// // FIXME: Remove XFAIL one intel/llvm#11364 is resolved // XFAIL: (opencl && gpu) diff --git a/sycl/test-e2e/FilterSelector/filter_list_cpu_gpu_acc.cpp b/sycl/test-e2e/FilterSelector/filter_list_cpu_gpu_acc.cpp index ab3154380fe07..5e9022054cb33 100644 --- a/sycl/test-e2e/FilterSelector/filter_list_cpu_gpu_acc.cpp +++ b/sycl/test-e2e/FilterSelector/filter_list_cpu_gpu_acc.cpp @@ -10,14 +10,14 @@ // RUN: %clangxx -fsycl %S/Inputs/filter_list_queries.cpp -o %t.out -// RUN: env ONEAPI_DEVICE_SELECTOR="*:acc" %{run-unfiltered-devices} %t.out | FileCheck %s --check-prefixes=CHECK-ACC-ONLY +// RUN: env ONEAPI_DEVICE_SELECTOR="*:fpga" %{run-unfiltered-devices} %t.out | FileCheck %s --check-prefixes=CHECK-ACC-ONLY // RUN: env ONEAPI_DEVICE_SELECTOR="*:gpu" %{run-unfiltered-devices} %t.out | FileCheck %s --check-prefixes=CHECK-GPU-ONLY // RUN: env ONEAPI_DEVICE_SELECTOR="*:cpu" %{run-unfiltered-devices} %t.out | FileCheck %s --check-prefixes=CHECK-CPU-ONLY // -// RUN: env ONEAPI_DEVICE_SELECTOR="*:acc,gpu" %{run-unfiltered-devices} %t.out | FileCheck %s --check-prefixes=CHECK-ACC-GPU -// RUN: env ONEAPI_DEVICE_SELECTOR="*:acc,cpu" %{run-unfiltered-devices} %t.out | FileCheck %s --check-prefixes=CHECK-ACC-CPU +// RUN: env ONEAPI_DEVICE_SELECTOR="*:fpga,gpu" %{run-unfiltered-devices} %t.out | FileCheck %s --check-prefixes=CHECK-ACC-GPU +// RUN: env ONEAPI_DEVICE_SELECTOR="*:fpga,cpu" %{run-unfiltered-devices} %t.out | FileCheck %s --check-prefixes=CHECK-ACC-CPU // -// RUN: env ONEAPI_DEVICE_SELECTOR="*:cpu,acc,gpu" %{run-unfiltered-devices} %t.out | FileCheck %s --check-prefixes=CHECK-ACC-GPU-CPU +// RUN: env ONEAPI_DEVICE_SELECTOR="*:cpu,fpga,gpu" %{run-unfiltered-devices} %t.out | FileCheck %s --check-prefixes=CHECK-ACC-GPU-CPU // // CHECK-ACC-ONLY: Device: acc // CHECK-ACC-ONLY-NOT: Device: cpu diff --git a/sycl/test-e2e/FilterSelector/select_device.cpp b/sycl/test-e2e/FilterSelector/select_device.cpp index 7516545ab59fd..ddb725b0523a4 100644 --- a/sycl/test-e2e/FilterSelector/select_device.cpp +++ b/sycl/test-e2e/FilterSelector/select_device.cpp @@ -4,7 +4,7 @@ // RUN: env ONEAPI_DEVICE_SELECTOR=level_zero:gpu %{run-unfiltered-devices} %t.out // RUN: env ONEAPI_DEVICE_SELECTOR=opencl:gpu %{run-unfiltered-devices} %t.out // RUN: env ONEAPI_DEVICE_SELECTOR='*:cpu;level_zero:gpu' %{run-unfiltered-devices} %t.out -// RUN: env ONEAPI_DEVICE_SELECTOR=opencl:acc %{run-unfiltered-devices} %t.out +// RUN: env ONEAPI_DEVICE_SELECTOR=opencl:fpga %{run-unfiltered-devices} %t.out // // Checks if only specified device types can be acquired from select_device // when ONEAPI_DEVICE_SELECTOR is set @@ -45,7 +45,7 @@ int main() { device d = cs.select_device(); } if (!envVal || forcedPIs == "*" || - forcedPIs.find("acc") != std::string::npos) { + forcedPIs.find("fpga") != std::string::npos) { accelerator_selector as; device d = as.select_device(); } diff --git a/sycl/test-e2e/FilterSelector/select_device_acc.cpp b/sycl/test-e2e/FilterSelector/select_device_acc.cpp index 38d97652f8441..f5b94b535bbd4 100644 --- a/sycl/test-e2e/FilterSelector/select_device_acc.cpp +++ b/sycl/test-e2e/FilterSelector/select_device_acc.cpp @@ -1,5 +1,5 @@ // RUN: %{build} -o %t.out -// RUN: env ONEAPI_DEVICE_SELECTOR='*:acc' %{run-unfiltered-devices} %t.out +// RUN: env ONEAPI_DEVICE_SELECTOR='*:fpga' %{run-unfiltered-devices} %t.out // // Checks if only specified device types can be acquired from select_device // when ONEAPI_DEVICE_SELECTOR is set diff --git a/sycl/test-e2e/OptionalKernelFeatures/is_compatible/is_compatible_several_targets.cpp b/sycl/test-e2e/OptionalKernelFeatures/is_compatible/is_compatible_several_targets.cpp index 6dcc4690880d6..042874b90290c 100644 --- a/sycl/test-e2e/OptionalKernelFeatures/is_compatible/is_compatible_several_targets.cpp +++ b/sycl/test-e2e/OptionalKernelFeatures/is_compatible/is_compatible_several_targets.cpp @@ -3,6 +3,6 @@ // RUN: %clangxx -fsycl -fsycl-targets=spir64_fpga,spir64_gen -Xsycl-target-backend "-device *" %S/Inputs/is_compatible_with_env.cpp -o %t.out // RUN: env ONEAPI_DEVICE_SELECTOR=opencl:cpu %{run} not %t.out -// RUN: env ONEAPI_DEVICE_SELECTOR=opencl:acc %{run} %t.out +// RUN: env ONEAPI_DEVICE_SELECTOR=opencl:fpga %{run} %t.out // RUN: env ONEAPI_DEVICE_SELECTOR=opencl:gpu %{run} %t.out // RUN: env ONEAPI_DEVICE_SELECTOR=level_zero:gpu %{run} %t.out diff --git a/sycl/test-e2e/Sampler/basic-rw.cpp b/sycl/test-e2e/Sampler/basic-rw.cpp index 4e5aae87de067..f73ce7e391ea0 100644 --- a/sycl/test-e2e/Sampler/basic-rw.cpp +++ b/sycl/test-e2e/Sampler/basic-rw.cpp @@ -15,8 +15,8 @@ ONEAPI_DEVICE_SELECTOR=level_zero:gpu ./binx.bin ONEAPI_DEVICE_SELECTOR=opencl:cpu ./binx.bin - ONEAPI_DEVICE_SELECTOR=opecl:acc ../binx.bin <-- does not support image - operations at this time. + ONEAPI_DEVICE_SELECTOR=opencl:fpga ../binx.bin <-- does not support + image operations at this time. */ diff --git a/sycl/test-e2e/USM/memadvise_flags.cpp b/sycl/test-e2e/USM/memadvise_flags.cpp index 542ee7a1ea7a2..d7c28a71b5ca4 100755 --- a/sycl/test-e2e/USM/memadvise_flags.cpp +++ b/sycl/test-e2e/USM/memadvise_flags.cpp @@ -59,6 +59,8 @@ int main() { valid_advices.emplace_back(PI_MEM_ADVICE_HIP_UNSET_PREFERRED_LOCATION_HOST); valid_advices.emplace_back(PI_MEM_ADVICE_HIP_SET_ACCESSED_BY_HOST); valid_advices.emplace_back(PI_MEM_ADVICE_HIP_UNSET_ACCESSED_BY_HOST); + valid_advices.emplace_back(PI_MEM_ADVICE_HIP_SET_COARSE_GRAINED); + valid_advices.emplace_back(PI_MEM_ADVICE_HIP_UNSET_COARSE_GRAINED); } else { // Skip return 0; diff --git a/sycl/test-e2e/USM/memory_coherency_hip.cpp b/sycl/test-e2e/USM/memory_coherency_hip.cpp new file mode 100644 index 0000000000000..dec0182812c70 --- /dev/null +++ b/sycl/test-e2e/USM/memory_coherency_hip.cpp @@ -0,0 +1,147 @@ +// RUN: %{build} -o %t1.out +// REQUIRES: hip_amd +// RUN: %{run} %t1.out + +//==---- memory_coherency_hip.cpp -----------------------------------------==// +// USM coarse/fine grain memory coherency test for the HIP-AMD backend. +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include +#include +#include + +namespace kernels { +class SquareKrnl final { + int *mPtr; + +public: + SquareKrnl(int *ptr) : mPtr{ptr} {} + + void operator()(sycl::id<1>) const { *mPtr = (*mPtr) * (*mPtr); } +}; + +class CoherencyTestKrnl final { + int *mPtr; + +public: + CoherencyTestKrnl(int *ptr) : mPtr{ptr} {} + + void operator()(sycl::id<1>) const { + auto atm = sycl::atomic_ref(mPtr[0]); + + // mPtr was initialized to 1 by the host, now set it to 2. + atm.fetch_add(1); + + // spin until mPtr is 3, then change it to 4. + int expected{3}; + int old = atm.load(); + while (true) { + old = atm.load(); + if (old == expected) { + if (atm.compare_exchange_strong(old, 4)) { + break; + } + } + } + } +}; +} // namespace kernels + +int main() { + sycl::queue q{}; + sycl::device dev = q.get_device(); + sycl::context ctx = q.get_context(); + if (!dev.get_info()) { + std::cout << "Shared USM is not supported. Skipping test.\n"; + return 0; + } + + bool coherent{false}; + + int *ptr = sycl::malloc_shared(1, q); + + // Coherency test 1 + // + // The following test validates if memory access is fine with memory allocated + // using malloc_managed() and COARSE_GRAINED advice set via mem_advise(). + // + // Coarse grained memory is only guaranteed to be coherent outside of GPU + // kernels that modify it. Changes applied to coarse-grained memory by a GPU + // kernel are only visible to the rest of the system (CPU or other GPUs) when + // the kernel has completed. A GPU kernel is only guaranteed to see changes + // applied to coarse grained memory by the rest of the system (CPU or other + // GPUs) if those changes were made before the kernel launched. + + // Hint to use coarse-grain memory. + q.mem_advise(ptr, sizeof(int), int{PI_MEM_ADVICE_HIP_SET_COARSE_GRAINED}); + + int init_val{9}; + int expected{init_val * init_val}; + + *ptr = init_val; + q.parallel_for(sycl::range{1}, kernels::SquareKrnl{ptr}); + // Synchronise the underlying stream. + q.wait(); + + // Check if caches are flushed correctly and same memory is between devices. + if (*ptr == expected) { + coherent = true; + } else { + std::cerr << "Coherency test failed. Value: " << *ptr + << " (expected: " << expected << ")\n"; + coherent = false; + } + + // Coherency test 2 + // + // The following test validates if fine-grain behavior is observed or not with + // memory allocated using malloc_managed(). + // + // Fine grained memory allows CPUs and GPUs to synchronize (via atomics) and + // coherently communicate with each other while the GPU kernel is running. + + // Hint to use fine-grain memory. + q.mem_advise(ptr, sizeof(int), int{PI_MEM_ADVICE_HIP_UNSET_COARSE_GRAINED}); + + init_val = 1; + expected = 4; + + *ptr = init_val; + q.parallel_for(sycl::range{1}, kernels::CoherencyTestKrnl{ptr}); + + // wait until ptr is 2 from the kernel (or 3 seconds), then increment to 3. + while (*ptr == 2) { + using std::chrono_literals::operator""s; + std::this_thread::sleep_for(3s); + break; + } + *ptr += 1; + + // Synchronise the underlying stream. + q.wait(); + + // Check if caches are flushed correctly and same memory is between devices. + if (*ptr == expected) { + coherent &= true; + } else { + std::cerr << "Coherency test failed. Value: " << *ptr + << " (expected: " << expected << ")\n"; + coherent = false; + } + + // Cleanup + sycl::free(ptr, q); + + // Check if all coherency tests passed. + assert(coherent); + // The above assert won't trigger with NDEBUG, so ensure the right exit code. + return coherent ? 0 : 1; +} diff --git a/sycl/test-e2e/lit.cfg.py b/sycl/test-e2e/lit.cfg.py index 8c35fdfa2b65b..e1b2b4a1ef28d 100644 --- a/sycl/test-e2e/lit.cfg.py +++ b/sycl/test-e2e/lit.cfg.py @@ -686,3 +686,7 @@ lit_config.maxIndividualTestTime = 600 except ImportError: pass + +config.substitutions.append( + ("%device_sanitizer_flags", "-Xsycl-target-frontend -fsanitize=address") +) diff --git a/sycl/test-e2e/syclcompat/atomic/atomic_class.cpp b/sycl/test-e2e/syclcompat/atomic/atomic_class.cpp index 3c6b95c1eb4af..d004a37bcc610 100644 --- a/sycl/test-e2e/syclcompat/atomic/atomic_class.cpp +++ b/sycl/test-e2e/syclcompat/atomic/atomic_class.cpp @@ -32,7 +32,7 @@ // UNSUPPORTED: hip || (windows && level_zero) -// RUN: %clangxx -std=c++20 -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %clangxx -std=c++20 -fsycl -fsycl-targets=%{sycl_triple} %if any-device-is-cuda %{ -Xsycl-target-backend --cuda-gpu-arch=sm_70 %} %s -o %t.out // RUN: %{run} %t.out #include @@ -41,8 +41,8 @@ #include "../common.hpp" #include "atomic_fixt.hpp" -constexpr size_t numBlocks = 64; -constexpr size_t numThreads = 256; +constexpr size_t numBlocks = 1; +constexpr size_t numThreads = 1; constexpr size_t numData = 6; template diff --git a/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp b/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp index 6b31bcc626ee0..4ccc67fbff53e 100644 --- a/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp +++ b/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp @@ -32,7 +32,7 @@ // UNSUPPORTED: hip -// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %if any-device-is-cuda %{ -Xsycl-target-backend --cuda-gpu-arch=sm_70 %} %s -o %t.out // RUN: %{run} %t.out #include diff --git a/sycl/unittests/pi/CMakeLists.txt b/sycl/unittests/pi/CMakeLists.txt index 0c78c9a634010..861fc41069c7e 100644 --- a/sycl/unittests/pi/CMakeLists.txt +++ b/sycl/unittests/pi/CMakeLists.txt @@ -1,9 +1,7 @@ set(CMAKE_CXX_EXTENSIONS OFF) add_sycl_unittest(PiTests OBJECT - EnqueueMemTest.cpp PiMock.cpp - PlatformTest.cpp PiUtility.cpp pi_arguments_handler.cpp piInteropRetain.cpp @@ -13,11 +11,3 @@ add_dependencies(PiTests sycl) target_include_directories(PiTests PRIVATE SYSTEM ${sycl_inc_dir}) target_include_directories(PiTests PRIVATE ${sycl_src_dir}/../tools/xpti_helpers) -if("cuda" IN_LIST SYCL_ENABLE_PLUGINS) - add_subdirectory(cuda) -endif() - -if("hip" IN_LIST SYCL_ENABLE_PLUGINS) - add_subdirectory(hip) -endif() - diff --git a/sycl/unittests/pi/EnqueueMemTest.cpp b/sycl/unittests/pi/EnqueueMemTest.cpp deleted file mode 100644 index d6439654f7bbb..0000000000000 --- a/sycl/unittests/pi/EnqueueMemTest.cpp +++ /dev/null @@ -1,152 +0,0 @@ -//==---- EnqueueMemTest.cpp --- PI unit tests ------------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "TestGetPlugin.hpp" -#include -#include -#include - -using namespace sycl; - -namespace { -class EnqueueMemTest : public testing::TestWithParam { -protected: - constexpr static size_t _numElementsX = 8; - constexpr static size_t _numElementsY = 4; - - pi_device _device = nullptr; - pi_context _context = nullptr; - pi_queue _queue = nullptr; - pi_mem _mem = nullptr; - - EnqueueMemTest() = default; - - ~EnqueueMemTest() = default; - - void SetUp() override { - - const detail::PluginPtr &plugin = GetParam(); - - pi_platform platform = nullptr; - ASSERT_EQ((plugin->call_nocheck( - 1, &platform, nullptr)), - PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - platform, PI_DEVICE_TYPE_DEFAULT, 1, &_device, nullptr)), - PI_SUCCESS); - - pi_result result = PI_ERROR_INVALID_VALUE; - result = plugin->call_nocheck( - nullptr, 1u, &_device, nullptr, nullptr, &_context); - ASSERT_EQ(result, PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - _context, _device, 0, &_queue)), - PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - _context, PI_MEM_FLAGS_ACCESS_RW, - _numElementsX * _numElementsY * sizeof(pi_int32), nullptr, - &_mem, nullptr)), - PI_SUCCESS); - } - - void TearDown() override { - - const detail::PluginPtr &plugin = GetParam(); - - ASSERT_EQ((plugin->call_nocheck(_mem)), - PI_SUCCESS); - ASSERT_EQ((plugin->call_nocheck(_queue)), - PI_SUCCESS); - ASSERT_EQ( - (plugin->call_nocheck(_context)), - PI_SUCCESS); - } - - template void TestBufferFill(const T &pattern) { - - const detail::PluginPtr &plugin = GetParam(); - - T inValues[_numElementsX] = {}; - - for (size_t i = 0; i < _numElementsX; ++i) { - ASSERT_NE(pattern, inValues[i]); - } - - pi_event event; - ASSERT_EQ((plugin->call_nocheck( - _queue, _mem, PI_TRUE, 0, _numElementsX * sizeof(T), inValues, - 0, nullptr, &event)), - PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - _queue, _mem, &pattern, sizeof(T), 0, sizeof(inValues), 0, - nullptr, &event)), - PI_SUCCESS); - ASSERT_EQ( - (plugin->call_nocheck(1, &event)), - PI_SUCCESS); - - T outValues[_numElementsX] = {}; - ASSERT_EQ((plugin->call_nocheck( - _queue, _mem, PI_TRUE, 0, _numElementsX * sizeof(T), - outValues, 0, nullptr, &event)), - PI_SUCCESS); - - for (size_t i = 0; i < _numElementsX; ++i) { - ASSERT_EQ(pattern, outValues[i]); - } - } -}; - -INSTANTIATE_TEST_SUITE_P( - EnqueueMemTestImpl, EnqueueMemTest, - testing::ValuesIn(pi::initializeAndRemoveInvalid()), - [](const testing::TestParamInfo &info) { - return pi::GetBackendString(info.param); - }); - -template struct vec4 { - T x, y, z, w; - - bool operator==(const vec4 &rhs) const { - return x == rhs.x && y == rhs.y && z == rhs.z && w == rhs.w; - } - - bool operator!=(const vec4 &rhs) const { return !(*this == rhs); } -}; - -template struct vec2 { - T x, y; - - bool operator==(const vec2 &rhs) const { return x == rhs.x && y == rhs.y; } - - bool operator!=(const vec2 &rhs) const { return !(*this == rhs); } -}; - -TEST_P(EnqueueMemTest, piEnqueueMemBufferFill) { - - TestBufferFill(float{1}); - TestBufferFill(vec2{1, 2}); - TestBufferFill(vec4{1, 2, 3, 4}); - - TestBufferFill(uint8_t{1}); - TestBufferFill(vec2{1, 2}); - TestBufferFill(vec4{1, 2, 3, 4}); - - TestBufferFill(uint16_t{1}); - TestBufferFill(vec2{1, 2}); - TestBufferFill(vec4{1, 2, 3, 4}); - - TestBufferFill(uint32_t{1}); - TestBufferFill(vec2{1, 2}); - TestBufferFill(vec4{1, 2, 3, 4}); -} -} // namespace diff --git a/sycl/unittests/pi/PlatformTest.cpp b/sycl/unittests/pi/PlatformTest.cpp deleted file mode 100644 index 61834dbb14fff..0000000000000 --- a/sycl/unittests/pi/PlatformTest.cpp +++ /dev/null @@ -1,113 +0,0 @@ -//==---- PlatformTest.cpp --- PI unit tests --------------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "TestGetPlugin.hpp" -#include -#include -#include -#include -#include - -namespace { - -using namespace sycl; - -class PlatformTest : public testing::TestWithParam { -protected: - std::vector _platforms; - PlatformTest() : _platforms{} {}; - - ~PlatformTest() override = default; - - void SetUp() override { - - const detail::PluginPtr &plugin = GetParam(); - - ASSERT_NO_FATAL_FAILURE(Test::SetUp()); - - const static char *platform_count_key = "PiPlatformCount"; - - pi_uint32 platform_count = 0u; - - // Initialize the logged number of platforms before the following assertion. - RecordProperty(platform_count_key, platform_count); - - // TODO: Change the test to check this for all plugins present. - // Currently, it is only checking for the first plugin attached. - ASSERT_EQ((plugin->call_nocheck( - 0, nullptr, &platform_count)), - PI_SUCCESS); - - // Overwrite previous log value with queried number of platforms. - RecordProperty(platform_count_key, platform_count); - - if (platform_count == 0u) { - std::cout << "WARNING: piPlatformsGet does not find any PI platforms.\n"; - - // Do not call into OpenCL below as a platform count of 0 might fail with - // OpenCL implementations if the platforms pointer is not `nullptr`. - return; - } - - _platforms.resize(platform_count, nullptr); - - ASSERT_EQ((plugin->call_nocheck( - _platforms.size(), _platforms.data(), nullptr)), - PI_SUCCESS); - } -}; - -INSTANTIATE_TEST_SUITE_P( - PlatformTestImpl, PlatformTest, - testing::ValuesIn(pi::initializeAndRemoveInvalid()), - [](const testing::TestParamInfo &info) { - return pi::GetBackendString(info.param); - }); - -TEST_P(PlatformTest, piPlatformsGet) { - // The PlatformTest::SetUp method is called to prepare for this test case - // implicitly tests the calls to `piPlatformsGet`. -} - -TEST_P(PlatformTest, piPlatformGetInfo) { - - const detail::PluginPtr &plugin = GetParam(); - - auto get_info_test = [&](pi_platform platform, _pi_platform_info info) { - size_t reported_string_length = 0; - EXPECT_EQ((plugin->call_nocheck( - platform, info, 0u, nullptr, &reported_string_length)), - PI_SUCCESS); - - // Create a larger result string to catch overwrites. - std::vector param_value(reported_string_length * 2u, '\0'); - EXPECT_EQ( - (plugin->call_nocheck( - platform, info, param_value.size(), param_value.data(), nullptr)), - PI_SUCCESS) - << "piPlatformGetInfo for " << detail::pi::platformInfoToString(info) - << " failed.\n"; - - const auto returned_string_length = strlen(param_value.data()) + 1; - - EXPECT_EQ(returned_string_length, reported_string_length) - << "Returned string length " << returned_string_length - << " does not equal reported string length " << reported_string_length - << ".\n"; - }; - - for (const auto &platform : _platforms) { - get_info_test(platform, PI_PLATFORM_INFO_NAME); - get_info_test(platform, PI_PLATFORM_INFO_VENDOR); - get_info_test(platform, PI_PLATFORM_INFO_PROFILE); - get_info_test(platform, PI_PLATFORM_INFO_VERSION); - get_info_test(platform, PI_PLATFORM_INFO_EXTENSIONS); - get_info_test(platform, PI_EXT_PLATFORM_INFO_BACKEND); - } -} -} // namespace diff --git a/sycl/unittests/pi/cuda/CMakeLists.txt b/sycl/unittests/pi/cuda/CMakeLists.txt deleted file mode 100644 index 7808340cc4302..0000000000000 --- a/sycl/unittests/pi/cuda/CMakeLists.txt +++ /dev/null @@ -1,32 +0,0 @@ -add_sycl_unittest(PiCudaTests OBJECT - test_base_objects.cpp - test_commands.cpp - test_contexts.cpp - test_device.cpp - test_interop_get_native.cpp - test_kernels.cpp - test_mem_obj.cpp - test_primary_context.cpp - test_sampler_properties.cpp -) - -add_dependencies(PiCudaTests sycl) - -target_compile_definitions(PiCudaTests - PRIVATE - GTEST_HAS_COMBINE=1) - -target_include_directories(PiCudaTests - PRIVATE - "../" - "${sycl_inc_dir}/sycl/detail/" - "${sycl_inc_dir}" - "${sycl_plugin_dir}/cuda/" - "${sycl_plugin_dir}/unified_runtime/" -) - -target_link_libraries(PiCudaTests - PRIVATE - cudadrv - UnifiedRuntime-Headers -) diff --git a/sycl/unittests/pi/cuda/CudaUtils.hpp b/sycl/unittests/pi/cuda/CudaUtils.hpp deleted file mode 100644 index f7cb8b40492d3..0000000000000 --- a/sycl/unittests/pi/cuda/CudaUtils.hpp +++ /dev/null @@ -1,20 +0,0 @@ -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#pragma once - -#include - -namespace pi { - -// utility function to clear the CUDA context stack -inline void clearCudaContext() { - CUcontext ctxt = nullptr; - do { - cuCtxSetCurrent(nullptr); - cuCtxGetCurrent(&ctxt); - } while (ctxt != nullptr); -} - -} // namespace pi diff --git a/sycl/unittests/pi/cuda/test_base_objects.cpp b/sycl/unittests/pi/cuda/test_base_objects.cpp deleted file mode 100644 index d0799a08cfff3..0000000000000 --- a/sycl/unittests/pi/cuda/test_base_objects.cpp +++ /dev/null @@ -1,139 +0,0 @@ -//==---- test_base_objects.cpp --- PI unit tests ---------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "gtest/gtest.h" - -#include - -#include "TestGetPlugin.hpp" -#include -#include -#include -#include -#include - -#include - -const unsigned int LATEST_KNOWN_CUDA_DRIVER_API_VERSION = 3020u; - -using namespace sycl; - -class CudaBaseObjectsTest : public ::testing::Test { -protected: - std::optional &plugin = - pi::initializeAndGet(backend::ext_oneapi_cuda); - - void SetUp() override { - // skip the tests if the CUDA backend is not available - if (!plugin.has_value()) { - GTEST_SKIP(); - } - } - - CudaBaseObjectsTest() = default; - - ~CudaBaseObjectsTest() = default; -}; - -TEST_F(CudaBaseObjectsTest, piContextCreate) { - pi_uint32 numPlatforms = 0; - pi_platform platform = nullptr; - pi_device device; - ASSERT_EQ(plugin->hasBackend(backend::ext_oneapi_cuda), PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - 0, nullptr, &numPlatforms)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - numPlatforms, &platform, nullptr)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_GE(numPlatforms, 1u); - ASSERT_NE(platform, nullptr); - - ASSERT_EQ((plugin->call_nocheck( - platform, PI_DEVICE_TYPE_GPU, 1, &device, nullptr)), - PI_SUCCESS) - << "piDevicesGet failed.\n"; - - pi_context ctxt = nullptr; - ASSERT_EQ((plugin->call_nocheck( - nullptr, 1, &device, nullptr, nullptr, &ctxt)), - PI_SUCCESS) - << "piContextCreate failed.\n"; - - EXPECT_NE(ctxt, nullptr); - EXPECT_EQ(ctxt->get_device(), device); - - // Retrieve the cuCtxt to check information is correct - CUcontext cudaContext = ctxt->get(); - unsigned int version = 0; - cuCtxGetApiVersion(cudaContext, &version); - EXPECT_EQ(version, LATEST_KNOWN_CUDA_DRIVER_API_VERSION); - - ASSERT_EQ((plugin->call_nocheck(ctxt)), - PI_SUCCESS); -} - -TEST_F(CudaBaseObjectsTest, piContextCreateChildThread) { - pi_uint32 numPlatforms = 0; - pi_platform platform; - pi_device device; - - ASSERT_EQ((plugin->call_nocheck( - 0, nullptr, &numPlatforms)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - numPlatforms, &platform, nullptr)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - platform, PI_DEVICE_TYPE_GPU, 1, &device, nullptr)), - PI_SUCCESS); - - pi_context ctxt; - ASSERT_EQ((plugin->call_nocheck( - nullptr, 1, &device, nullptr, nullptr, &ctxt)), - PI_SUCCESS); - EXPECT_NE(ctxt, nullptr); - - // Retrieve the cuCtxt to check information is correct - auto checkValue = [=]() { - CUcontext cudaContext = ctxt->get(); - unsigned int version = 0; - auto cuErr = cuCtxGetApiVersion(cudaContext, &version); - EXPECT_EQ(cuErr, CUDA_SUCCESS); - EXPECT_EQ(version, LATEST_KNOWN_CUDA_DRIVER_API_VERSION); - - // The current context is different from the current thread - CUcontext current; - cuErr = cuCtxGetCurrent(¤t); - EXPECT_EQ(cuErr, CUDA_SUCCESS); - EXPECT_NE(cudaContext, current); - - // Set the context from PI API as the current one - cuErr = cuCtxPushCurrent(cudaContext); - EXPECT_EQ(cuErr, CUDA_SUCCESS); - - cuErr = cuCtxGetCurrent(¤t); - EXPECT_EQ(cuErr, CUDA_SUCCESS); - EXPECT_EQ(cudaContext, current); - }; - auto callContextFromOtherThread = std::thread(checkValue); - - callContextFromOtherThread.join(); - - ASSERT_EQ((plugin->call_nocheck(ctxt)), - PI_SUCCESS); -} diff --git a/sycl/unittests/pi/cuda/test_commands.cpp b/sycl/unittests/pi/cuda/test_commands.cpp deleted file mode 100644 index 6c794fe51c899..0000000000000 --- a/sycl/unittests/pi/cuda/test_commands.cpp +++ /dev/null @@ -1,145 +0,0 @@ -//==---- test_commands.cpp --- PI unit tests -------------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "gtest/gtest.h" - -#include - -#include "CudaUtils.hpp" -#include "TestGetPlugin.hpp" -#include -#include -#include -#include - -using namespace sycl; - -struct CudaCommandsTest : public ::testing::Test { - -protected: - std::optional &plugin = - pi::initializeAndGet(backend::ext_oneapi_cuda); - - pi_platform platform_; - pi_device device_; - pi_context context_; - pi_queue queue_; - - void SetUp() override { - // skip the tests if the CUDA backend is not available - if (!plugin.has_value()) { - GTEST_SKIP(); - } - - pi::clearCudaContext(); - pi_uint32 numPlatforms = 0; - ASSERT_EQ(plugin->hasBackend(backend::ext_oneapi_cuda), PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - 0, nullptr, &numPlatforms)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - numPlatforms, &platform_, nullptr)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - platform_, PI_DEVICE_TYPE_GPU, 1, &device_, nullptr)), - PI_SUCCESS); - ASSERT_EQ((plugin->call_nocheck( - nullptr, 1, &device_, nullptr, nullptr, &context_)), - PI_SUCCESS); - ASSERT_NE(context_, nullptr); - - ASSERT_EQ((plugin->call_nocheck( - context_, device_, 0, &queue_)), - PI_SUCCESS); - ASSERT_NE(queue_, nullptr); - auto tmpCtxt = queue_->get_context(); - ASSERT_EQ(tmpCtxt, context_); - } - - void TearDown() override { - if (plugin.has_value()) { - plugin->call(queue_); - plugin->call(context_); - } - } - - CudaCommandsTest() = default; - - ~CudaCommandsTest() = default; -}; - -TEST_F(CudaCommandsTest, PIEnqueueReadBufferBlocking) { - constexpr const size_t memSize = 10u; - constexpr const size_t bytes = memSize * sizeof(int); - const int data[memSize] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - int output[memSize] = {}; - - pi_mem memObj; - ASSERT_EQ( - (plugin->call_nocheck( - context_, PI_MEM_FLAGS_ACCESS_RW, bytes, nullptr, &memObj, nullptr)), - PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - queue_, memObj, true, 0, bytes, data, 0, nullptr, nullptr)), - PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - queue_, memObj, true, 0, bytes, output, 0, nullptr, nullptr)), - PI_SUCCESS); - - bool isSame = - std::equal(std::begin(output), std::end(output), std::begin(data)); - EXPECT_TRUE(isSame); - if (!isSame) { - std::for_each(std::begin(output), std::end(output), - [](int &elem) { std::cout << elem << ","; }); - std::cout << std::endl; - } -} - -TEST_F(CudaCommandsTest, PIEnqueueReadBufferNonBlocking) { - constexpr const size_t memSize = 10u; - constexpr const size_t bytes = memSize * sizeof(int); - const int data[memSize] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - int output[memSize] = {}; - - pi_mem memObj; - ASSERT_EQ( - (plugin->call_nocheck( - context_, PI_MEM_FLAGS_ACCESS_RW, bytes, nullptr, &memObj, nullptr)), - PI_SUCCESS); - - pi_event cpIn, cpOut; - ASSERT_EQ((plugin->call_nocheck( - queue_, memObj, false, 0, bytes, data, 0, nullptr, &cpIn)), - PI_SUCCESS); - ASSERT_NE(cpIn, nullptr); - - ASSERT_EQ((plugin->call_nocheck( - queue_, memObj, false, 0, bytes, output, 0, nullptr, &cpOut)), - PI_SUCCESS); - ASSERT_NE(cpOut, nullptr); - - ASSERT_EQ((plugin->call_nocheck(1, &cpOut)), - PI_SUCCESS); - - bool isSame = - std::equal(std::begin(output), std::end(output), std::begin(data)); - EXPECT_TRUE(isSame); - if (!isSame) { - std::for_each(std::begin(output), std::end(output), - [](int &elem) { std::cout << elem << ","; }); - std::cout << std::endl; - } -} diff --git a/sycl/unittests/pi/cuda/test_contexts.cpp b/sycl/unittests/pi/cuda/test_contexts.cpp deleted file mode 100644 index 7113537ebf147..0000000000000 --- a/sycl/unittests/pi/cuda/test_contexts.cpp +++ /dev/null @@ -1,250 +0,0 @@ -//==---- test_contexts.cpp --- PI unit tests -------------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "gtest/gtest.h" - -#include -#include -#include - -#include - -#include "CudaUtils.hpp" -#include "TestGetPlugin.hpp" -#include -#include -#include -#include - -using namespace sycl; - -struct CudaContextsTest : public ::testing::Test { - -protected: - std::optional &plugin = - pi::initializeAndGet(backend::ext_oneapi_cuda); - - pi_platform platform_; - pi_device device_; - - void SetUp() override { - // skip the tests if the CUDA backend is not available - if (!plugin.has_value()) { - GTEST_SKIP(); - } - - pi_uint32 numPlatforms = 0; - ASSERT_EQ(plugin->hasBackend(backend::ext_oneapi_cuda), PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - 0, nullptr, &numPlatforms)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - numPlatforms, &platform_, nullptr)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - platform_, PI_DEVICE_TYPE_GPU, 1, &device_, nullptr)), - PI_SUCCESS); - } - - void TearDown() override {} - - CudaContextsTest() = default; - - ~CudaContextsTest() = default; -}; - -TEST_F(CudaContextsTest, ContextLifetime) { - // start with no active context - pi::clearCudaContext(); - - // create a context - pi_context context; - ASSERT_EQ((plugin->call_nocheck( - nullptr, 1, &device_, nullptr, nullptr, &context)), - PI_SUCCESS); - ASSERT_NE(context, nullptr); - - // create a queue from the context, this should use the ScopedContext - pi_queue queue; - ASSERT_EQ((plugin->call_nocheck( - context, device_, 0, &queue)), - PI_SUCCESS); - ASSERT_NE(queue, nullptr); - - // ensure the queue has the correct context - ASSERT_EQ(context, queue->get_context()); - - // check that the context is now the active CUDA context - CUcontext cudaCtxt = nullptr; - cuCtxGetCurrent(&cudaCtxt); - ASSERT_EQ(cudaCtxt, context->get()); - - plugin->call(queue); - plugin->call(context); - - // check that the context was cleaned up properly by the destructor - cuCtxGetCurrent(&cudaCtxt); - ASSERT_EQ(cudaCtxt, nullptr); -} - -TEST_F(CudaContextsTest, ContextLifetimeExisting) { - // start by setting up a CUDA context on the thread - CUcontext original; - cuCtxCreate(&original, CU_CTX_MAP_HOST, device_->get()); - - // ensure the CUDA context is active - CUcontext current = nullptr; - cuCtxGetCurrent(¤t); - ASSERT_EQ(original, current); - - // create a PI context - pi_context context; - ASSERT_EQ((plugin->call_nocheck( - nullptr, 1, &device_, nullptr, nullptr, &context)), - PI_SUCCESS); - ASSERT_NE(context, nullptr); - - // create a queue from the context, this should use the ScopedContext - pi_queue queue; - ASSERT_EQ((plugin->call_nocheck( - context, device_, 0, &queue)), - PI_SUCCESS); - ASSERT_NE(queue, nullptr); - - // ensure the queue has the correct context - ASSERT_EQ(context, queue->get_context()); - - // check that the context is now the active CUDA context - cuCtxGetCurrent(¤t); - ASSERT_EQ(current, context->get()); - - plugin->call(queue); - plugin->call(context); - - // check that the context was cleaned up, the old context will be restored - // automatically by cuCtxDestroy in piContextRelease, as it was pushed on the - // stack bu cuCtxCreate - cuCtxGetCurrent(¤t); - ASSERT_EQ(current, original); - - // release original context - cuCtxDestroy(original); -} - -// In some cases (for host_task), the SYCL runtime may call PI API functions -// from threads of the thread pool, this can cause issues because with the CUDA -// plugin these functions will set an active CUDA context on these threads, but -// never clean it up, as it will only get cleaned up in the main thread. -// -// So the following test aims to reproduce the scenario where there is a -// dangling deleted context in a separate thread and seeing if the PI calls are -// still able to work correctly in that thread. -TEST_F(CudaContextsTest, ContextThread) { - // start with no active context - pi::clearCudaContext(); - - // create two PI contexts - pi_context context1; - ASSERT_EQ((plugin->call_nocheck( - nullptr, 1, &device_, nullptr, nullptr, &context1)), - PI_SUCCESS); - ASSERT_NE(context1, nullptr); - - pi_context context2; - ASSERT_EQ((plugin->call_nocheck( - nullptr, 1, &device_, nullptr, nullptr, &context2)), - PI_SUCCESS); - ASSERT_NE(context2, nullptr); - - // setup synchronization variables between the main thread and the testing - // thread - std::mutex m; - std::condition_variable cv; - bool released = false; - bool thread_done = false; - - // create a testing thread that will create a queue with the first context, - // release the queue, then wait for the main thread to release the first - // context, and then create and release another queue with the second context - // this time - auto test_thread = std::thread([&] { - CUcontext current = nullptr; - - // create a queue with the first context - pi_queue queue; - ASSERT_EQ((plugin->call_nocheck( - context1, device_, 0, &queue)), - PI_SUCCESS); - ASSERT_NE(queue, nullptr); - - // ensure the queue has the correct context - ASSERT_EQ(context1, queue->get_context()); - - // check that the first context is now the active CUDA context - cuCtxGetCurrent(¤t); - ASSERT_EQ(current, context1->get()); - - plugin->call(queue); - - // mark the first set of processing as done and notify the main thread - std::unique_lock lock(m); - thread_done = true; - lock.unlock(); - cv.notify_one(); - - // wait for the main thread to release the first context - lock.lock(); - cv.wait(lock, [&] { return released; }); - - // check that the first context is still active, this is because deleting a - // context only cleans up the current thread - cuCtxGetCurrent(¤t); - ASSERT_EQ(current, context1->get()); - - // create a queue with the second context - ASSERT_EQ((plugin->call_nocheck( - context2, device_, 0, &queue)), - PI_SUCCESS); - ASSERT_NE(queue, nullptr); - - // ensure the queue has the correct context - ASSERT_EQ(context2, queue->get_context()); - - // check that the second context is now the active CUDA context - cuCtxGetCurrent(¤t); - ASSERT_EQ(current, context2->get()); - - plugin->call(queue); - }); - - // wait for the thread to be done with the first queue to release the first - // context - std::unique_lock lock(m); - cv.wait(lock, [&] { return thread_done; }); - plugin->call(context1); - - // notify the other thread that the context was released - released = true; - lock.unlock(); - cv.notify_one(); - - // wait for the thread to finish - test_thread.join(); - - plugin->call(context2); - - // check that there is no context set on the main thread - CUcontext current = nullptr; - cuCtxGetCurrent(¤t); - ASSERT_EQ(current, nullptr); -} diff --git a/sycl/unittests/pi/cuda/test_device.cpp b/sycl/unittests/pi/cuda/test_device.cpp deleted file mode 100644 index 9ddb62ca1a512..0000000000000 --- a/sycl/unittests/pi/cuda/test_device.cpp +++ /dev/null @@ -1,111 +0,0 @@ -//==---- test_device.cpp --- PI unit tests ---------------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "gtest/gtest.h" - -#include - -#include "TestGetPlugin.hpp" -#include -#include -#include -#include - -using namespace sycl; - -struct CudaDeviceTests : public ::testing::Test { - -protected: - std::optional &plugin = - pi::initializeAndGet(backend::ext_oneapi_cuda); - - pi_platform platform_; - pi_device device_; - pi_context context_; - - void SetUp() override { - // skip the tests if the CUDA backend is not available - if (!plugin.has_value()) { - GTEST_SKIP(); - } - - pi_uint32 numPlatforms = 0; - ASSERT_EQ(plugin->hasBackend(backend::ext_oneapi_cuda), PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - 0, nullptr, &numPlatforms)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - numPlatforms, &platform_, nullptr)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - platform_, PI_DEVICE_TYPE_GPU, 1, &device_, nullptr)), - PI_SUCCESS); - ASSERT_EQ((plugin->call_nocheck( - nullptr, 1, &device_, nullptr, nullptr, &context_)), - PI_SUCCESS); - EXPECT_NE(context_, nullptr); - } - - void TearDown() override { - if (plugin.has_value()) { - plugin->call(device_); - plugin->call(context_); - } - } - - CudaDeviceTests() = default; - ~CudaDeviceTests() = default; -}; - -TEST_F(CudaDeviceTests, PIDeviceGetInfoSimple) { - - size_t return_size = 0; - pi_device_type device_type; - ASSERT_EQ((plugin->call_nocheck( - device_, PI_DEVICE_INFO_TYPE, sizeof(pi_device_type), - &device_type, &return_size)), - PI_SUCCESS); - EXPECT_EQ(return_size, sizeof(pi_device_type)); - EXPECT_EQ( - device_type, - PI_DEVICE_TYPE_GPU); // backend pre-defined value, device must be a GPU - - pi_device parent_device = nullptr; - ASSERT_EQ((plugin->call_nocheck( - device_, PI_DEVICE_INFO_PARENT_DEVICE, sizeof(pi_device), - &parent_device, &return_size)), - PI_SUCCESS); - EXPECT_EQ(return_size, sizeof(pi_device)); - EXPECT_EQ(parent_device, - nullptr); // backend pre-set value, device cannot have a parent - - pi_platform platform = nullptr; - ASSERT_EQ((plugin->call_nocheck( - device_, PI_DEVICE_INFO_PLATFORM, sizeof(pi_platform), - &platform, &return_size)), - PI_SUCCESS); - EXPECT_EQ(return_size, sizeof(pi_platform)); - EXPECT_EQ(platform, platform_); // test fixture device was created from the - // test fixture platform - - cl_device_partition_property device_partition_property = -1; - ASSERT_EQ((plugin->call_nocheck( - device_, PI_DEVICE_INFO_PARTITION_TYPE, - sizeof(cl_device_partition_property), - &device_partition_property, &return_size)), - PI_SUCCESS); - EXPECT_EQ(device_partition_property, - 0); // PI CUDA backend will not support device partitioning, this - // function should just return 0. - EXPECT_EQ(return_size, sizeof(cl_device_partition_property)); -} diff --git a/sycl/unittests/pi/cuda/test_interop_get_native.cpp b/sycl/unittests/pi/cuda/test_interop_get_native.cpp deleted file mode 100644 index 903d44043cda6..0000000000000 --- a/sycl/unittests/pi/cuda/test_interop_get_native.cpp +++ /dev/null @@ -1,137 +0,0 @@ -//==------- test_interop_get_native.cpp - SYCL CUDA get_native tests -------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "gtest/gtest.h" - -#include - -#include "TestGetPlatforms.hpp" -#include - -#include - -using namespace sycl; - -struct CudaInteropGetNativeTests : public ::testing::TestWithParam { - -protected: - std::unique_ptr syclQueue_; - device syclDevice_; - - void SetUp() override { - syclDevice_ = GetParam().get_devices()[0]; - syclQueue_ = std::unique_ptr{new queue{syclDevice_}}; - } - - void TearDown() override { syclQueue_.reset(); } -}; - -TEST_P(CudaInteropGetNativeTests, getNativeDevice) { - CUdevice cudaDevice = get_native(syclDevice_); - char cudaDeviceName[2] = {0, 0}; - CUresult result = cuDeviceGetName(cudaDeviceName, 2, cudaDevice); - ASSERT_EQ(result, CUDA_SUCCESS); - ASSERT_NE(cudaDeviceName[0], 0); -} - -TEST_P(CudaInteropGetNativeTests, getNativeContext) { - CUcontext cudaContext = - get_native(syclQueue_->get_context()); - ASSERT_NE(cudaContext, nullptr); -} - -TEST_P(CudaInteropGetNativeTests, getNativeQueue) { - CUstream cudaStream = get_native(*syclQueue_); - ASSERT_NE(cudaStream, nullptr); - - CUcontext streamContext = nullptr; - CUresult result = cuStreamGetCtx(cudaStream, &streamContext); - ASSERT_EQ(result, CUDA_SUCCESS); - - CUcontext cudaContext = - get_native(syclQueue_->get_context()); - ASSERT_EQ(streamContext, cudaContext); -} - -TEST_P(CudaInteropGetNativeTests, interopTaskGetMem) { - buffer syclBuffer(range<1>{1}); - syclQueue_->submit([&](handler &cgh) { - auto syclAccessor = syclBuffer.get_access(cgh); - cgh.host_task([=](interop_handle ih) { - CUdeviceptr cudaPtr = - ih.get_native_mem(syclAccessor); - CUdeviceptr cudaPtrBase; - size_t cudaPtrSize = 0; - CUcontext cudaContext = - get_native(syclQueue_->get_context()); - ASSERT_EQ(CUDA_SUCCESS, cuCtxPushCurrent(cudaContext)); - ASSERT_EQ(CUDA_SUCCESS, - cuMemGetAddressRange(&cudaPtrBase, &cudaPtrSize, cudaPtr)); - ASSERT_EQ(CUDA_SUCCESS, cuCtxPopCurrent(nullptr)); - ASSERT_EQ(sizeof(int), cudaPtrSize); - }); - }); -} - -TEST_P(CudaInteropGetNativeTests, interopTaskGetQueue) { - CUstream cudaStream = get_native(*syclQueue_); - syclQueue_->submit([&](handler &cgh) { - cgh.host_task([=](interop_handle ih) { - CUstream cudaInteropStream = - ih.get_native_queue(); - ASSERT_EQ(cudaInteropStream, cudaStream); - }); - }); -} - -TEST_P(CudaInteropGetNativeTests, hostTaskGetNativeMem) { - buffer syclBuffer(range<1>{1}); - syclQueue_->submit([&](handler &cgh) { - auto syclAccessor = syclBuffer.get_access(cgh); - cgh.host_task([=](interop_handle ih) { - CUdeviceptr cudaPtr = - ih.get_native_mem(syclAccessor); - CUdeviceptr cudaPtrBase; - size_t cudaPtrSize = 0; - CUcontext cudaContext = - get_native(syclQueue_->get_context()); - ASSERT_EQ(CUDA_SUCCESS, cuCtxPushCurrent(cudaContext)); - ASSERT_EQ(CUDA_SUCCESS, - cuMemGetAddressRange(&cudaPtrBase, &cudaPtrSize, cudaPtr)); - ASSERT_EQ(CUDA_SUCCESS, cuCtxPopCurrent(nullptr)); - ASSERT_EQ(sizeof(int), cudaPtrSize); - }); - }); -} - -TEST_P(CudaInteropGetNativeTests, hostTaskGetNativeQueue) { - CUstream cudaStream = get_native(*syclQueue_); - syclQueue_->submit([&](handler &cgh) { - cgh.host_task([=](interop_handle ih) { - CUstream cudaInteropStream = - ih.get_native_queue(); - ASSERT_EQ(cudaInteropStream, cudaStream); - }); - }); -} - -TEST_P(CudaInteropGetNativeTests, hostTaskGetNativeContext) { - CUcontext cudaContext = - get_native(syclQueue_->get_context()); - syclQueue_->submit([&](handler &cgh) { - cgh.host_task([=](interop_handle ih) { - CUcontext cudaInteropContext = - ih.get_native_context(); - ASSERT_EQ(cudaInteropContext, cudaContext); - }); - }); -} - -INSTANTIATE_TEST_SUITE_P( - OnCudaPlatform, CudaInteropGetNativeTests, - ::testing::ValuesIn(pi::getPlatformsWithName("CUDA BACKEND"))); diff --git a/sycl/unittests/pi/cuda/test_kernels.cpp b/sycl/unittests/pi/cuda/test_kernels.cpp deleted file mode 100644 index 736e266b6566e..0000000000000 --- a/sycl/unittests/pi/cuda/test_kernels.cpp +++ /dev/null @@ -1,466 +0,0 @@ -//==---- test_kernels.cpp --- PI unit tests --------------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "gtest/gtest.h" - -#include - -#include "TestGetPlugin.hpp" -#include -#include -#include -#include - -// PI CUDA kernels carry an additional argument for the implicit global offset. -#define NUM_IMPLICIT_ARGS 1 - -using namespace sycl; - -struct CudaKernelsTest : public ::testing::Test { - -protected: - std::optional &plugin = - pi::initializeAndGet(backend::ext_oneapi_cuda); - pi_platform platform_; - pi_device device_; - pi_context context_; - pi_queue queue_; - - void SetUp() override { - // skip the tests if the CUDA backend is not available - if (!plugin.has_value()) { - GTEST_SKIP(); - } - - pi_uint32 numPlatforms = 0; - ASSERT_EQ(plugin->hasBackend(backend::ext_oneapi_cuda), PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - 0, nullptr, &numPlatforms)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - numPlatforms, &platform_, nullptr)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - platform_, PI_DEVICE_TYPE_GPU, 1, &device_, nullptr)), - PI_SUCCESS); - ASSERT_EQ((plugin->call_nocheck( - nullptr, 1, &device_, nullptr, nullptr, &context_)), - PI_SUCCESS); - ASSERT_NE(context_, nullptr); - - ASSERT_EQ((plugin->call_nocheck( - context_, device_, 0, &queue_)), - PI_SUCCESS); - ASSERT_NE(queue_, nullptr); - ASSERT_EQ(queue_->get_context(), context_); - } - - void TearDown() override { - if (plugin.has_value()) { - plugin->call(device_); - plugin->call(queue_); - plugin->call(context_); - } - } - - CudaKernelsTest() = default; - - ~CudaKernelsTest() = default; -}; - -const char *ptxSource = "\n\ -.version 3.2\n\ -.target sm_20\n\ -.address_size 64\n\ -.visible .entry _Z8myKernelPi(\n\ - .param .u64 _Z8myKernelPi_param_0\n\ -)\n\ -{\n\ - .reg .s32 %r<5>;\n\ - .reg .s64 %rd<5>;\n\ - ld.param.u64 %rd1, [_Z8myKernelPi_param_0];\n\ - cvta.to.global.u64 %rd2, %rd1;\n\ - .loc 1 3 1\n\ - mov.u32 %r1, %ntid.x;\n\ - mov.u32 %r2, %ctaid.x;\n\ - mov.u32 %r3, %tid.x;\n\ - mad.lo.s32 %r4, %r1, %r2, %r3;\n\ - mul.wide.s32 %rd3, %r4, 4;\n\ - add.s64 %rd4, %rd2, %rd3;\n\ - .loc 1 4 1\n\ - st.global.u32 [%rd4], %r4;\n\ - .loc 1 5 2\n\ - ret;\n\ - ret;\ -\n\ -}\ -\n\ -"; - -const char *twoParams = "\n\ -.version 3.2\n\ -.target sm_20\n\ -.address_size 64\n\ -.visible .entry twoParamKernel(\n\ - .param .u64 twoParamKernel_param_0,\n\ - .param .u64 twoParamKernel_param_1\n\ -)\n\ -{\n\ - ret;\ - \n\ -}\n\ -"; - -const char *threeParamsTwoLocal = "\n\ -.version 3.2\n\ -.target sm_20\n\ -.address_size 64\n\ -.visible .entry twoParamKernelLocal(\n\ - .param .u64 twoParamKernel_param_0,\n\ - .param .u32 twoParamKernel_param_1,\n\ - .param .u32 twoParamKernel_param_2\n\ -)\n\ -{\n\ - ret;\ - \n\ -}\n\ -"; - -TEST_F(CudaKernelsTest, PICreateProgramAndKernel) { - - pi_program prog; - pi_int32 binary_status = PI_SUCCESS; - ASSERT_EQ( - (plugin->call_nocheck( - context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, 0, - nullptr, &binary_status, &prog)), - PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - prog, 1, &device_, "", nullptr, nullptr)), - PI_SUCCESS); - - pi_kernel kern; - ASSERT_EQ((plugin->call_nocheck( - prog, "_Z8myKernelPi", &kern)), - PI_SUCCESS); - ASSERT_NE(kern, nullptr); -} - -TEST_F(CudaKernelsTest, PICreateProgramAndKernelWithMetadata) { - - std::vector reqdWorkGroupSizeMD; - reqdWorkGroupSizeMD.reserve(5); - // 64-bit representing bit size - reqdWorkGroupSizeMD.push_back(96); - reqdWorkGroupSizeMD.push_back(0); - // reqd_work_group_size x - reqdWorkGroupSizeMD.push_back(8); - // reqd_work_group_size y - reqdWorkGroupSizeMD.push_back(16); - // reqd_work_group_size z - reqdWorkGroupSizeMD.push_back(32); - - const char *reqdWorkGroupSizeMDConstName = - "_Z8myKernelPi@reqd_work_group_size"; - std::vector reqdWorkGroupSizeMDName( - reqdWorkGroupSizeMDConstName, - reqdWorkGroupSizeMDConstName + strlen(reqdWorkGroupSizeMDConstName) + 1); - _pi_device_binary_property_struct reqdWorkGroupSizeMDProp = { - reqdWorkGroupSizeMDName.data(), reqdWorkGroupSizeMD.data(), - pi_property_type::PI_PROPERTY_TYPE_BYTE_ARRAY, - sizeof(std::uint64_t) + sizeof(std::uint32_t) * 3}; - pi_device_binary_property reqdWorkGroupSizeMDPropPointer = - &reqdWorkGroupSizeMDProp; - - pi_program prog; - pi_int32 binary_status = PI_SUCCESS; - ASSERT_EQ( - (plugin->call_nocheck( - context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, 1, - &reqdWorkGroupSizeMDPropPointer, &binary_status, &prog)), - PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - prog, 1, &device_, "", nullptr, nullptr)), - PI_SUCCESS); - - pi_kernel kern; - ASSERT_EQ((plugin->call_nocheck( - prog, "_Z8myKernelPi", &kern)), - PI_SUCCESS); - ASSERT_NE(kern, nullptr); - - size_t compileWGSize[3] = {0}; - ASSERT_EQ((plugin->call_nocheck( - kern, device_, PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE, - sizeof(size_t) * 3, compileWGSize, nullptr)), - PI_SUCCESS); - for (int i = 0; i < 3; ++i) { - ASSERT_EQ(compileWGSize[i], reqdWorkGroupSizeMD[i + 2]); - } -} - -TEST_F(CudaKernelsTest, PIKernelArgumentSimple) { - - pi_program prog; - /// NOTE: `binary_status` currently unsused in the CUDA backend but in case we - /// use it at some point in the future, pass it anyway and check the result. - /// Same goes for all the other tests in this file. - pi_int32 binary_status = PI_SUCCESS; - ASSERT_EQ( - (plugin->call_nocheck( - context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, 0, - nullptr, &binary_status, &prog)), - PI_SUCCESS); - ASSERT_EQ(binary_status, PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - prog, 1, &device_, "", nullptr, nullptr)), - PI_SUCCESS); - - pi_kernel kern; - ASSERT_EQ((plugin->call_nocheck( - prog, "_Z8myKernelPi", &kern)), - PI_SUCCESS); - - int number = 10; - ASSERT_EQ((plugin->call_nocheck( - kern, 0, sizeof(int), &number)), - PI_SUCCESS); - const auto &kernArgs = kern->get_arg_indices(); - ASSERT_EQ(kernArgs.size(), (size_t)1 + NUM_IMPLICIT_ARGS); - int storedValue = *(static_cast(kernArgs[0])); - ASSERT_EQ(storedValue, number); -} - -TEST_F(CudaKernelsTest, PIKernelArgumentSetTwice) { - - pi_program prog; - pi_int32 binary_status = PI_SUCCESS; - ASSERT_EQ( - (plugin->call_nocheck( - context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, 0, - nullptr, &binary_status, &prog)), - PI_SUCCESS); - ASSERT_EQ(binary_status, PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - prog, 1, &device_, "", nullptr, nullptr)), - PI_SUCCESS); - - pi_kernel kern; - ASSERT_EQ((plugin->call_nocheck( - prog, "_Z8myKernelPi", &kern)), - PI_SUCCESS); - - int number = 10; - ASSERT_EQ((plugin->call_nocheck( - kern, 0, sizeof(int), &number)), - PI_SUCCESS); - const auto &kernArgs = kern->get_arg_indices(); - ASSERT_GT(kernArgs.size(), (size_t)0 + NUM_IMPLICIT_ARGS); - int storedValue = *(static_cast(kernArgs[0])); - ASSERT_EQ(storedValue, number); - - int otherNumber = 934; - ASSERT_EQ((plugin->call_nocheck( - kern, 0, sizeof(int), &otherNumber)), - PI_SUCCESS); - const auto &kernArgs2 = kern->get_arg_indices(); - ASSERT_EQ(kernArgs2.size(), (size_t)1 + NUM_IMPLICIT_ARGS); - storedValue = *(static_cast(kernArgs2[0])); - ASSERT_EQ(storedValue, otherNumber); -} - -TEST_F(CudaKernelsTest, PIKernelSetMemObj) { - - pi_program prog; - pi_int32 binary_status = PI_SUCCESS; - ASSERT_EQ( - (plugin->call_nocheck( - context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, 0, - nullptr, &binary_status, &prog)), - PI_SUCCESS); - ASSERT_EQ(binary_status, PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - prog, 1, &device_, "", nullptr, nullptr)), - PI_SUCCESS); - - pi_kernel kern; - ASSERT_EQ((plugin->call_nocheck( - prog, "_Z8myKernelPi", &kern)), - PI_SUCCESS); - - size_t memSize = 1024u; - pi_mem memObj; - ASSERT_EQ((plugin->call_nocheck( - context_, PI_MEM_FLAGS_ACCESS_RW, memSize, nullptr, &memObj, - nullptr)), - PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - kern, 0, sizeof(pi_mem), &memObj)), - PI_SUCCESS); - const auto &kernArgs = kern->get_arg_indices(); - ASSERT_EQ(kernArgs.size(), (size_t)1 + NUM_IMPLICIT_ARGS); - pi_mem storedValue = *(static_cast(kernArgs[0])); - ASSERT_EQ(storedValue, memObj); -} - -TEST_F(CudaKernelsTest, PIkerneldispatch) { - - pi_program prog; - pi_int32 binary_status = PI_SUCCESS; - ASSERT_EQ( - (plugin->call_nocheck( - context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, 0, - nullptr, &binary_status, &prog)), - PI_SUCCESS); - ASSERT_EQ(binary_status, PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - prog, 1, &device_, "", nullptr, nullptr)), - PI_SUCCESS); - - pi_kernel kern; - ASSERT_EQ((plugin->call_nocheck( - prog, "_Z8myKernelPi", &kern)), - PI_SUCCESS); - - size_t memSize = 1024u; - pi_mem memObj; - ASSERT_EQ((plugin->call_nocheck( - context_, PI_MEM_FLAGS_ACCESS_RW, memSize, nullptr, &memObj, - nullptr)), - PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - kern, 0, nullptr, &memObj)), - PI_SUCCESS); - - size_t workDim = 1; - size_t globalWorkOffset[] = {0}; - size_t globalWorkSize[] = {1}; - size_t localWorkSize[] = {1}; - ASSERT_EQ((plugin->call_nocheck( - queue_, kern, workDim, globalWorkOffset, globalWorkSize, - localWorkSize, 0, nullptr, nullptr)), - PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck(memObj)), - PI_SUCCESS); -} - -TEST_F(CudaKernelsTest, PIkerneldispatchTwo) { - - pi_program prog; - pi_int32 binary_status = PI_SUCCESS; - ASSERT_EQ( - (plugin->call_nocheck( - context_, 1, &device_, nullptr, (const unsigned char **)&twoParams, 0, - nullptr, &binary_status, &prog)), - PI_SUCCESS); - ASSERT_EQ(binary_status, PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - prog, 1, &device_, "", nullptr, nullptr)), - PI_SUCCESS); - - pi_kernel kern; - ASSERT_EQ((plugin->call_nocheck( - prog, "twoParamKernel", &kern)), - PI_SUCCESS); - - size_t memSize = 1024u; - pi_mem memObj; - ASSERT_EQ((plugin->call_nocheck( - context_, PI_MEM_FLAGS_ACCESS_RW, memSize, nullptr, &memObj, - nullptr)), - PI_SUCCESS); - - pi_mem memObj2; - ASSERT_EQ((plugin->call_nocheck( - context_, PI_MEM_FLAGS_ACCESS_RW, memSize, nullptr, &memObj2, - nullptr)), - PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - kern, 0, nullptr, &memObj)), - PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - kern, 1, nullptr, &memObj2)), - PI_SUCCESS); - - size_t workDim = 1; - size_t globalWorkOffset[] = {0}; - size_t globalWorkSize[] = {1}; - size_t localWorkSize[] = {1}; - ASSERT_EQ((plugin->call_nocheck( - queue_, kern, workDim, globalWorkOffset, globalWorkSize, - localWorkSize, 0, nullptr, nullptr)), - PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck(memObj)), - PI_SUCCESS); - ASSERT_EQ((plugin->call_nocheck(memObj2)), - PI_SUCCESS); -} - -TEST_F(CudaKernelsTest, PIKernelArgumentSetTwiceOneLocal) { - - pi_program prog; - pi_int32 binary_status = PI_SUCCESS; - ASSERT_EQ((plugin->call_nocheck( - context_, 1, &device_, nullptr, - (const unsigned char **)&threeParamsTwoLocal, 0, nullptr, - &binary_status, &prog)), - PI_SUCCESS); - ASSERT_EQ(binary_status, PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - prog, 1, &device_, "", nullptr, nullptr)), - PI_SUCCESS); - - pi_kernel kern; - ASSERT_EQ((plugin->call_nocheck( - prog, "twoParamKernelLocal", &kern)), - PI_SUCCESS); - - int number = 10; - ASSERT_EQ((plugin->call_nocheck( - kern, 0, sizeof(int), &number)), - PI_SUCCESS); - const auto &kernArgs = kern->get_arg_indices(); - ASSERT_GT(kernArgs.size(), (size_t)0 + NUM_IMPLICIT_ARGS); - int storedValue = *(static_cast(kernArgs[0])); - ASSERT_EQ(storedValue, number); - - ASSERT_EQ((plugin->call_nocheck( - kern, 1, sizeof(int), nullptr)), - PI_SUCCESS); - const auto &kernArgs2 = kern->get_arg_indices(); - ASSERT_EQ(kernArgs2.size(), (size_t)2 + NUM_IMPLICIT_ARGS); - storedValue = *(static_cast(kernArgs2[1])); - ASSERT_EQ(storedValue, 0); - - ASSERT_EQ((plugin->call_nocheck( - kern, 2, sizeof(int), nullptr)), - PI_SUCCESS); - const auto &kernArgs3 = kern->get_arg_indices(); - ASSERT_EQ(kernArgs3.size(), (size_t)3 + NUM_IMPLICIT_ARGS); - storedValue = *(static_cast(kernArgs3[2])); - ASSERT_EQ(storedValue, static_cast(sizeof(int))); -} diff --git a/sycl/unittests/pi/cuda/test_mem_obj.cpp b/sycl/unittests/pi/cuda/test_mem_obj.cpp deleted file mode 100644 index b0693ff30830c..0000000000000 --- a/sycl/unittests/pi/cuda/test_mem_obj.cpp +++ /dev/null @@ -1,207 +0,0 @@ -//==---- test_mem_obj.cpp --- PI unit tests --------------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "gtest/gtest.h" - -#include - -#include "CudaUtils.hpp" -#include "TestGetPlugin.hpp" -#include -#include -#include -#include -#include - -using namespace sycl; - -struct CudaTestMemObj : public ::testing::Test { - -protected: - std::optional &plugin = - pi::initializeAndGet(backend::ext_oneapi_cuda); - - pi_platform platform_; - pi_device device_; - pi_context context_; - - void SetUp() override { - // skip the tests if the CUDA backend is not available - if (!plugin.has_value()) { - GTEST_SKIP(); - } - - pi::clearCudaContext(); - pi_uint32 numPlatforms = 0; - ASSERT_EQ(plugin->hasBackend(backend::ext_oneapi_cuda), PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - 0, nullptr, &numPlatforms)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - numPlatforms, &platform_, nullptr)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - platform_, PI_DEVICE_TYPE_GPU, 1, &device_, nullptr)), - PI_SUCCESS); - ASSERT_EQ((plugin->call_nocheck( - nullptr, 1, &device_, nullptr, nullptr, &context_)), - PI_SUCCESS); - EXPECT_NE(context_, nullptr); - } - - void TearDown() override { - if (plugin.has_value()) { - plugin->call(device_); - plugin->call(context_); - } - } - - CudaTestMemObj() = default; - - ~CudaTestMemObj() = default; -}; - -TEST_F(CudaTestMemObj, piMemBufferCreateSimple) { - const size_t memSize = 1024u; - pi_mem memObj; - ASSERT_EQ((plugin->call_nocheck( - context_, PI_MEM_FLAGS_ACCESS_RW, memSize, nullptr, &memObj, - nullptr)), - PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck(memObj)), - PI_SUCCESS); -} - -TEST_F(CudaTestMemObj, piMemBufferAllocHost) { - const size_t memSize = 1024u; - pi_mem memObj; - ASSERT_EQ((plugin->call_nocheck( - context_, PI_MEM_FLAGS_ACCESS_RW | PI_MEM_FLAGS_HOST_PTR_ALLOC, - memSize, nullptr, &memObj, nullptr)), - PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck(memObj)), - PI_SUCCESS); -} - -TEST_F(CudaTestMemObj, piMemBufferCreateNoActiveContext) { - const size_t memSize = 1024u; - // Context has been destroyed - - CUcontext current = nullptr; - - // pop CUDA contexts until there is not a cuda context bound to the thread - do { - CUcontext oldContext = nullptr; - auto cuErr = cuCtxPopCurrent(&oldContext); - EXPECT_EQ(cuErr, CUDA_SUCCESS); - - // There should not be any active CUDA context - cuErr = cuCtxGetCurrent(¤t); - ASSERT_EQ(cuErr, CUDA_SUCCESS); - } while (current != nullptr); - - // The context object is passed, even if its not active it should be used - // to allocate the memory object - pi_mem memObj; - ASSERT_EQ((plugin->call_nocheck( - context_, PI_MEM_FLAGS_ACCESS_RW, memSize, nullptr, &memObj, - nullptr)), - PI_SUCCESS); - ASSERT_NE(memObj, nullptr); - - ASSERT_EQ((plugin->call_nocheck(memObj)), - PI_SUCCESS); -} - -TEST_F(CudaTestMemObj, piMemBufferPinnedMappedRead) { - const size_t memSize = sizeof(int); - const int value = 20; - - pi_queue queue; - ASSERT_EQ((plugin->call_nocheck( - context_, device_, 0, &queue)), - PI_SUCCESS); - ASSERT_NE(queue, nullptr); - ASSERT_EQ(queue->get_context(), context_); - - pi_mem memObj; - ASSERT_EQ((plugin->call_nocheck( - context_, PI_MEM_FLAGS_ACCESS_RW | PI_MEM_FLAGS_HOST_PTR_ALLOC, - memSize, nullptr, &memObj, nullptr)), - PI_SUCCESS); - - ASSERT_EQ( - (plugin->call_nocheck( - queue, memObj, true, 0, sizeof(int), &value, 0, nullptr, nullptr)), - PI_SUCCESS); - - int *host_ptr = nullptr; - ASSERT_EQ((plugin->call_nocheck( - queue, memObj, true, PI_MAP_READ, 0, sizeof(int), 0, nullptr, - nullptr, (void **)&host_ptr)), - PI_SUCCESS); - - ASSERT_EQ(*host_ptr, value); - - ASSERT_EQ((plugin->call_nocheck( - queue, memObj, host_ptr, 0, nullptr, nullptr)), - PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck(memObj)), - PI_SUCCESS); - plugin->call(queue); -} - -TEST_F(CudaTestMemObj, piMemBufferPinnedMappedWrite) { - const size_t memSize = sizeof(int); - const int value = 30; - - pi_queue queue; - ASSERT_EQ((plugin->call_nocheck( - context_, device_, 0, &queue)), - PI_SUCCESS); - ASSERT_NE(queue, nullptr); - ASSERT_EQ(queue->get_context(), context_); - - pi_mem memObj; - ASSERT_EQ((plugin->call_nocheck( - context_, PI_MEM_FLAGS_ACCESS_RW | PI_MEM_FLAGS_HOST_PTR_ALLOC, - memSize, nullptr, &memObj, nullptr)), - PI_SUCCESS); - - int *host_ptr = nullptr; - ASSERT_EQ((plugin->call_nocheck( - queue, memObj, true, PI_MAP_WRITE, 0, sizeof(int), 0, nullptr, - nullptr, (void **)&host_ptr)), - PI_SUCCESS); - - *host_ptr = value; - - ASSERT_EQ((plugin->call_nocheck( - queue, memObj, host_ptr, 0, nullptr, nullptr)), - PI_SUCCESS); - - int read_value = 0; - ASSERT_EQ((plugin->call_nocheck( - queue, memObj, true, 0, sizeof(int), &read_value, 0, nullptr, - nullptr)), - PI_SUCCESS); - - ASSERT_EQ(read_value, value); - - ASSERT_EQ((plugin->call_nocheck(memObj)), - PI_SUCCESS); - plugin->call(queue); -} diff --git a/sycl/unittests/pi/cuda/test_primary_context.cpp b/sycl/unittests/pi/cuda/test_primary_context.cpp deleted file mode 100644 index f9ce627d126ad..0000000000000 --- a/sycl/unittests/pi/cuda/test_primary_context.cpp +++ /dev/null @@ -1,94 +0,0 @@ -//==---------- pi_primary_context.cpp - PI unit tests ----------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "gtest/gtest.h" - -#include - -#include "TestGetPlatforms.hpp" -#include -#include - -#include - -using namespace sycl; - -struct CudaPrimaryContextTests : public ::testing::TestWithParam { - -protected: - device deviceA_; - device deviceB_; - - void SetUp() override { - std::vector CudaDevices = GetParam().get_devices(); - - deviceA_ = CudaDevices[0]; - deviceB_ = CudaDevices.size() > 1 ? CudaDevices[1] : deviceA_; - } - - void TearDown() override {} -}; - -TEST_P(CudaPrimaryContextTests, piSingleContext) { - std::cout << "create single context" << std::endl; - context Context( - deviceA_, async_handler{}, - {sycl::ext::oneapi::cuda::property::context::use_primary_context{}}); - - CUdevice CudaDevice = get_native(deviceA_); - CUcontext CudaContext = get_native(Context); - - CUcontext PrimaryCudaContext; - cuDevicePrimaryCtxRetain(&PrimaryCudaContext, CudaDevice); - - ASSERT_EQ(CudaContext, PrimaryCudaContext); - - cuDevicePrimaryCtxRelease(CudaDevice); -} - -TEST_P(CudaPrimaryContextTests, piMultiContextSingleDevice) { - std::cout << "create multiple contexts for one device" << std::endl; - context ContextA( - deviceA_, async_handler{}, - {sycl::ext::oneapi::cuda::property::context::use_primary_context{}}); - context ContextB( - deviceA_, async_handler{}, - {sycl::ext::oneapi::cuda::property::context::use_primary_context{}}); - - CUcontext CudaContextA = get_native(ContextA); - CUcontext CudaContextB = get_native(ContextB); - - ASSERT_EQ(CudaContextA, CudaContextB); -} - -TEST_P(CudaPrimaryContextTests, piMultiContextMultiDevice) { - if (deviceA_ == deviceB_) - return; - - CUdevice CudaDeviceA = get_native(deviceA_); - CUdevice CudaDeviceB = get_native(deviceB_); - - ASSERT_NE(CudaDeviceA, CudaDeviceB); - - std::cout << "create multiple contexts for multiple devices" << std::endl; - context ContextA( - deviceA_, async_handler{}, - {sycl::ext::oneapi::cuda::property::context::use_primary_context{}}); - context ContextB( - deviceB_, async_handler{}, - {sycl::ext::oneapi::cuda::property::context::use_primary_context{}}); - - CUcontext CudaContextA = get_native(ContextA); - CUcontext CudaContextB = get_native(ContextB); - - ASSERT_NE(CudaContextA, CudaContextB); -} - -INSTANTIATE_TEST_SUITE_P( - OnCudaPlatform, CudaPrimaryContextTests, - ::testing::ValuesIn(pi::getPlatformsWithName("CUDA BACKEND"))); diff --git a/sycl/unittests/pi/cuda/test_sampler_properties.cpp b/sycl/unittests/pi/cuda/test_sampler_properties.cpp deleted file mode 100644 index 793703d2bd1ca..0000000000000 --- a/sycl/unittests/pi/cuda/test_sampler_properties.cpp +++ /dev/null @@ -1,135 +0,0 @@ -//==---- PlatformTest.cpp --- PI unit tests --------------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "gtest/gtest.h" - -#include "TestGetPlugin.hpp" -#include -#include -#include - -#include - -namespace { - -using namespace sycl; - -class SamplerPropertiesTest - : public ::testing::TestWithParam> { -protected: - std::optional &plugin = - pi::initializeAndGet(backend::ext_oneapi_cuda); - - pi_platform platform_; - pi_device device_; - pi_context context_; - pi_sampler sampler_; - - pi_bool normalizedCoords_; - pi_sampler_filter_mode filterMode_; - pi_sampler_addressing_mode addressMode_; - - SamplerPropertiesTest() = default; - - ~SamplerPropertiesTest() override = default; - - void SetUp() override { - // skip the tests if the CUDA backend is not available - if (!plugin.has_value()) { - GTEST_SKIP(); - } - - std::tie(normalizedCoords_, filterMode_, addressMode_) = GetParam(); - - pi_uint32 numPlatforms = 0; - ASSERT_EQ(plugin->hasBackend(backend::ext_oneapi_cuda), PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - 0, nullptr, &numPlatforms)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - numPlatforms, &platform_, nullptr)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - platform_, PI_DEVICE_TYPE_GPU, 1, &device_, nullptr)), - PI_SUCCESS); - ASSERT_EQ((plugin->call_nocheck( - nullptr, 1, &device_, nullptr, nullptr, &context_)), - PI_SUCCESS); - EXPECT_NE(context_, nullptr); - - pi_sampler_properties sampler_properties[] = { - PI_SAMPLER_PROPERTIES_NORMALIZED_COORDS, - static_cast(normalizedCoords_), - PI_SAMPLER_PROPERTIES_ADDRESSING_MODE, - static_cast(addressMode_), - PI_SAMPLER_PROPERTIES_FILTER_MODE, - static_cast(filterMode_), - 0}; - - ASSERT_EQ((plugin->call_nocheck( - context_, sampler_properties, &sampler_)), - PI_SUCCESS); - } - - void TearDown() override { - if (plugin.has_value()) { - plugin->call(sampler_); - plugin->call(device_); - plugin->call(context_); - } - } -}; - -TEST_P(SamplerPropertiesTest, piCheckNormalizedCoords) { - pi_bool actualNormalizedCoords = !normalizedCoords_; - - plugin->call( - sampler_, PI_SAMPLER_INFO_NORMALIZED_COORDS, sizeof(pi_bool), - &actualNormalizedCoords, nullptr); - - ASSERT_EQ(actualNormalizedCoords, normalizedCoords_); -} - -TEST_P(SamplerPropertiesTest, piCheckFilterMode) { - pi_sampler_filter_mode actualFilterMode; - - plugin->call( - sampler_, PI_SAMPLER_INFO_FILTER_MODE, sizeof(pi_sampler_filter_mode), - &actualFilterMode, nullptr); - - ASSERT_EQ(actualFilterMode, filterMode_); -} - -TEST_P(SamplerPropertiesTest, piCheckAddressingMode) { - pi_sampler_addressing_mode actualAddressMode; - - plugin->call( - sampler_, PI_SAMPLER_INFO_ADDRESSING_MODE, - sizeof(pi_sampler_addressing_mode), &actualAddressMode, nullptr); - - ASSERT_EQ(actualAddressMode, addressMode_); -} - -INSTANTIATE_TEST_SUITE_P( - SamplerPropertiesTestImpl, SamplerPropertiesTest, - ::testing::Combine( - ::testing::Values(PI_TRUE, PI_FALSE), - ::testing::Values(PI_SAMPLER_FILTER_MODE_LINEAR, - PI_SAMPLER_FILTER_MODE_NEAREST), - ::testing::Values(PI_SAMPLER_ADDRESSING_MODE_CLAMP, - PI_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE, - PI_SAMPLER_ADDRESSING_MODE_NONE, - PI_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT, - PI_SAMPLER_ADDRESSING_MODE_REPEAT))); -} // namespace diff --git a/sycl/unittests/pi/hip/CMakeLists.txt b/sycl/unittests/pi/hip/CMakeLists.txt deleted file mode 100644 index eee75b0447551..0000000000000 --- a/sycl/unittests/pi/hip/CMakeLists.txt +++ /dev/null @@ -1,42 +0,0 @@ -add_sycl_unittest(PiHipTests OBJECT - test_base_objects.cpp - test_commands.cpp - test_contexts.cpp - test_device.cpp - test_interop_get_native.cpp - test_kernels.cpp - test_mem_obj.cpp - test_primary_context.cpp - test_sampler_properties.cpp -) - -add_dependencies(PiHipTests sycl) - -target_compile_definitions(PiHipTests - PRIVATE - GTEST_HAS_COMBINE=1) - -target_include_directories(PiHipTests - PRIVATE - "../" - "${sycl_inc_dir}/sycl/detail/" - "${sycl_inc_dir}" - "${sycl_plugin_dir}/hip/" - "${sycl_plugin_dir}/unified_runtime/" -) - -if("${SYCL_BUILD_PI_HIP_PLATFORM}" STREQUAL "AMD") - # Set HIP define to select AMD platform - target_compile_definitions(PiHipTests PRIVATE __HIP_PLATFORM_AMD__) -elseif("${SYCL_BUILD_PI_HIP_PLATFORM}" STREQUAL "NVIDIA") - # Set HIP define to select NVIDIA platform - target_compile_definitions(PiHipTests PRIVATE __HIP_PLATFORM_NVIDIA__) -else() - message(FATAL_ERROR "Unspecified PI HIP platform, please set SYCL_BUILD_PI_HIP_PLATFORM to 'AMD' or 'NVIDIA'") -endif() - -target_link_libraries(PiHipTests - PRIVATE - rocmdrv - UnifiedRuntime-Headers -) diff --git a/sycl/unittests/pi/hip/HipUtils.hpp b/sycl/unittests/pi/hip/HipUtils.hpp deleted file mode 100644 index c62bf7dfefb08..0000000000000 --- a/sycl/unittests/pi/hip/HipUtils.hpp +++ /dev/null @@ -1,20 +0,0 @@ -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#pragma once - -#include - -namespace pi { - -// utility function to clear the HIP context stack -inline void clearHipContext() { - hipCtx_t ctxt = nullptr; - do { - hipCtxSetCurrent(nullptr); - hipCtxGetCurrent(&ctxt); - } while (ctxt != nullptr); -} - -} // namespace pi diff --git a/sycl/unittests/pi/hip/test_base_objects.cpp b/sycl/unittests/pi/hip/test_base_objects.cpp deleted file mode 100644 index 86458aa62c0b1..0000000000000 --- a/sycl/unittests/pi/hip/test_base_objects.cpp +++ /dev/null @@ -1,141 +0,0 @@ -//==---- test_base_objects.cpp --- PI unit tests ---------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "gtest/gtest.h" - -#include - -#include "TestGetPlugin.hpp" -#include -#include -#include -#include - -#include - -// https://sep5.readthedocs.io/en/latest/ROCm_API_References/ -// HIP_API/Context-Management.html#_CPPv419hipCtxGetApiVersion8hipCtx_tPi -const int HIP_DRIVER_API_VERSION = 4; - -using namespace sycl; - -class HipBaseObjectsTest : public ::testing::Test { -protected: - std::optional &plugin = - pi::initializeAndGet(backend::ext_oneapi_hip); - - void SetUp() override { - // skip the tests if the HIP backend is not available - if (!plugin.has_value()) { - GTEST_SKIP(); - } - } - - HipBaseObjectsTest() = default; - - ~HipBaseObjectsTest() = default; -}; - -TEST_F(HipBaseObjectsTest, piContextCreate) { - pi_uint32 numPlatforms = 0; - pi_platform platform = nullptr; - pi_device device; - ASSERT_EQ(plugin->hasBackend(backend::ext_oneapi_hip), PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - 0, nullptr, &numPlatforms)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - numPlatforms, &platform, nullptr)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_GE(numPlatforms, 1u); - ASSERT_NE(platform, nullptr); - - ASSERT_EQ((plugin->call_nocheck( - platform, PI_DEVICE_TYPE_GPU, 1, &device, nullptr)), - PI_SUCCESS) - << "piDevicesGet failed.\n"; - - pi_context ctxt = nullptr; - ASSERT_EQ((plugin->call_nocheck( - nullptr, 1, &device, nullptr, nullptr, &ctxt)), - PI_SUCCESS) - << "piContextCreate failed.\n"; - - EXPECT_NE(ctxt, nullptr); - EXPECT_EQ(ctxt->get_device(), device); - - // Retrieve the hipCtxt to check information is correct - hipCtx_t hipContext = ctxt->get(); - int version = 0; - auto hipErr = hipCtxGetApiVersion(hipContext, &version); - EXPECT_EQ(hipErr, PI_SUCCESS); - EXPECT_EQ(version, HIP_DRIVER_API_VERSION); - - ASSERT_EQ((plugin->call_nocheck(ctxt)), - PI_SUCCESS); -} - -TEST_F(HipBaseObjectsTest, piContextCreateChildThread) { - pi_uint32 numPlatforms = 0; - pi_platform platform; - pi_device device; - - ASSERT_EQ((plugin->call_nocheck( - 0, nullptr, &numPlatforms)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - numPlatforms, &platform, nullptr)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - platform, PI_DEVICE_TYPE_GPU, 1, &device, nullptr)), - PI_SUCCESS); - - pi_context ctxt; - ASSERT_EQ((plugin->call_nocheck( - nullptr, 1, &device, nullptr, nullptr, &ctxt)), - PI_SUCCESS); - EXPECT_NE(ctxt, nullptr); - - // Retrieve the cuCtxt to check information is correct - auto checkValue = [=]() { - hipCtx_t hipContext = ctxt->get(); - int version = 0; - auto hipErr = hipCtxGetApiVersion(hipContext, &version); - EXPECT_EQ(hipErr, PI_SUCCESS); - EXPECT_EQ(version, HIP_DRIVER_API_VERSION); - - // The current context is different from the current thread - hipCtx_t current; - hipErr = hipCtxGetCurrent(¤t); - EXPECT_EQ(hipErr, PI_SUCCESS); - EXPECT_NE(hipContext, current); - - // Set the context from PI API as the current one - hipErr = hipCtxPushCurrent(hipContext); - EXPECT_EQ(hipErr, PI_SUCCESS); - - hipErr = hipCtxGetCurrent(¤t); - EXPECT_EQ(hipErr, PI_SUCCESS); - EXPECT_EQ(hipContext, current); - }; - auto callContextFromOtherThread = std::thread(checkValue); - - callContextFromOtherThread.join(); - - ASSERT_EQ((plugin->call_nocheck(ctxt)), - PI_SUCCESS); -} diff --git a/sycl/unittests/pi/hip/test_commands.cpp b/sycl/unittests/pi/hip/test_commands.cpp deleted file mode 100644 index d453e9b9da1ea..0000000000000 --- a/sycl/unittests/pi/hip/test_commands.cpp +++ /dev/null @@ -1,145 +0,0 @@ -//==---- test_commands.cpp --- PI unit tests -------------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "gtest/gtest.h" - -#include - -#include "HipUtils.hpp" -#include "TestGetPlugin.hpp" -#include -#include -#include -#include - -using namespace sycl; - -struct HipCommandsTest : public ::testing::Test { - -protected: - std::optional &plugin = - pi::initializeAndGet(backend::ext_oneapi_hip); - - pi_platform platform_; - pi_device device_; - pi_context context_; - pi_queue queue_; - - void SetUp() override { - // skip the tests if the HIP backend is not available - if (!plugin.has_value()) { - GTEST_SKIP(); - } - - pi::clearHipContext(); - pi_uint32 numPlatforms = 0; - ASSERT_EQ(plugin->hasBackend(backend::ext_oneapi_hip), PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - 0, nullptr, &numPlatforms)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - numPlatforms, &platform_, nullptr)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - platform_, PI_DEVICE_TYPE_GPU, 1, &device_, nullptr)), - PI_SUCCESS); - ASSERT_EQ((plugin->call_nocheck( - nullptr, 1, &device_, nullptr, nullptr, &context_)), - PI_SUCCESS); - ASSERT_NE(context_, nullptr); - - ASSERT_EQ((plugin->call_nocheck( - context_, device_, 0, &queue_)), - PI_SUCCESS); - ASSERT_NE(queue_, nullptr); - auto tmpCtxt = queue_->get_context(); - ASSERT_EQ(tmpCtxt, context_); - } - - void TearDown() override { - if (plugin.has_value()) { - plugin->call(queue_); - plugin->call(context_); - } - } - - HipCommandsTest() = default; - - ~HipCommandsTest() = default; -}; - -TEST_F(HipCommandsTest, PIEnqueueReadBufferBlocking) { - constexpr const size_t memSize = 10u; - constexpr const size_t bytes = memSize * sizeof(int); - const int data[memSize] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - int output[memSize] = {}; - - pi_mem memObj; - ASSERT_EQ( - (plugin->call_nocheck( - context_, PI_MEM_FLAGS_ACCESS_RW, bytes, nullptr, &memObj, nullptr)), - PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - queue_, memObj, true, 0, bytes, data, 0, nullptr, nullptr)), - PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - queue_, memObj, true, 0, bytes, output, 0, nullptr, nullptr)), - PI_SUCCESS); - - bool isSame = - std::equal(std::begin(output), std::end(output), std::begin(data)); - EXPECT_TRUE(isSame); - if (!isSame) { - std::for_each(std::begin(output), std::end(output), - [](int &elem) { std::cout << elem << ","; }); - std::cout << std::endl; - } -} - -TEST_F(HipCommandsTest, PIEnqueueReadBufferNonBlocking) { - constexpr const size_t memSize = 10u; - constexpr const size_t bytes = memSize * sizeof(int); - const int data[memSize] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - int output[memSize] = {}; - - pi_mem memObj; - ASSERT_EQ( - (plugin->call_nocheck( - context_, PI_MEM_FLAGS_ACCESS_RW, bytes, nullptr, &memObj, nullptr)), - PI_SUCCESS); - - pi_event cpIn, cpOut; - ASSERT_EQ((plugin->call_nocheck( - queue_, memObj, false, 0, bytes, data, 0, nullptr, &cpIn)), - PI_SUCCESS); - ASSERT_NE(cpIn, nullptr); - - ASSERT_EQ((plugin->call_nocheck( - queue_, memObj, false, 0, bytes, output, 0, nullptr, &cpOut)), - PI_SUCCESS); - ASSERT_NE(cpOut, nullptr); - - ASSERT_EQ((plugin->call_nocheck(1, &cpOut)), - PI_SUCCESS); - - bool isSame = - std::equal(std::begin(output), std::end(output), std::begin(data)); - EXPECT_TRUE(isSame); - if (!isSame) { - std::for_each(std::begin(output), std::end(output), - [](int &elem) { std::cout << elem << ","; }); - std::cout << std::endl; - } -} diff --git a/sycl/unittests/pi/hip/test_contexts.cpp b/sycl/unittests/pi/hip/test_contexts.cpp deleted file mode 100644 index fa20e101d00e2..0000000000000 --- a/sycl/unittests/pi/hip/test_contexts.cpp +++ /dev/null @@ -1,250 +0,0 @@ -//==---- test_contexts.cpp --- PI unit tests -------------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "gtest/gtest.h" - -#include -#include -#include - -#include - -#include "HipUtils.hpp" -#include "TestGetPlugin.hpp" -#include -#include -#include -#include - -using namespace sycl; - -struct HipContextsTest : public ::testing::Test { - -protected: - std::optional &plugin = - pi::initializeAndGet(backend::ext_oneapi_hip); - - pi_platform platform_; - pi_device device_; - - void SetUp() override { - // skip the tests if the HIP backend is not available - if (!plugin.has_value()) { - GTEST_SKIP(); - } - - pi_uint32 numPlatforms = 0; - ASSERT_EQ(plugin->hasBackend(backend::ext_oneapi_hip), PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - 0, nullptr, &numPlatforms)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - numPlatforms, &platform_, nullptr)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - platform_, PI_DEVICE_TYPE_GPU, 1, &device_, nullptr)), - PI_SUCCESS); - } - - void TearDown() override {} - - HipContextsTest() = default; - - ~HipContextsTest() = default; -}; - -TEST_F(HipContextsTest, ContextLifetime) { - // start with no active context - pi::clearHipContext(); - - // create a context - pi_context context; - ASSERT_EQ((plugin->call_nocheck( - nullptr, 1, &device_, nullptr, nullptr, &context)), - PI_SUCCESS); - ASSERT_NE(context, nullptr); - - // create a queue from the context, this should use the ScopedContext - pi_queue queue; - ASSERT_EQ((plugin->call_nocheck( - context, device_, 0, &queue)), - PI_SUCCESS); - ASSERT_NE(queue, nullptr); - - // ensure the queue has the correct context - ASSERT_EQ(context, queue->get_context()); - - // check that the context is now the active HIP context - hipCtx_t hipCtxt = nullptr; - hipCtxGetCurrent(&hipCtxt); - ASSERT_EQ(hipCtxt, context->get()); - - plugin->call(queue); - plugin->call(context); - - // check that the context was cleaned up properly by the destructor - hipCtxGetCurrent(&hipCtxt); - ASSERT_EQ(hipCtxt, nullptr); -} - -TEST_F(HipContextsTest, ContextLifetimeExisting) { - // start by setting up a HIP context on the thread - hipCtx_t original; - hipCtxCreate(&original, hipDeviceMapHost, device_->get()); - - // ensure the HIP context is active - hipCtx_t current = nullptr; - hipCtxGetCurrent(¤t); - ASSERT_EQ(original, current); - - // create a PI context - pi_context context; - ASSERT_EQ((plugin->call_nocheck( - nullptr, 1, &device_, nullptr, nullptr, &context)), - PI_SUCCESS); - ASSERT_NE(context, nullptr); - - // create a queue from the context, this should use the ScopedContext - pi_queue queue; - ASSERT_EQ((plugin->call_nocheck( - context, device_, 0, &queue)), - PI_SUCCESS); - ASSERT_NE(queue, nullptr); - - // ensure the queue has the correct context - ASSERT_EQ(context, queue->get_context()); - - // check that the context is now the active HIP context - hipCtxGetCurrent(¤t); - ASSERT_EQ(current, context->get()); - - plugin->call(queue); - plugin->call(context); - - // check that the context was cleaned up, the old context will be restored - // automatically by hipCtxDestroy in piContextRelease, as it was pushed on the - // stack bu hipCtxCreate - hipCtxGetCurrent(¤t); - ASSERT_EQ(current, original); - - // release original context - hipCtxDestroy(original); -} - -// In some cases (for host_task), the SYCL runtime may call PI API functions -// from threads of the thread pool, this can cause issues because with the HIP -// plugin these functions will set an active HIP context on these threads, but -// never clean it up, as it will only get cleaned up in the main thread. -// -// So the following test aims to reproduce the scenario where there is a -// dangling deleted context in a separate thread and seeing if the PI calls are -// still able to work correctly in that thread. -TEST_F(HipContextsTest, ContextThread) { - // start with no active context - pi::clearHipContext(); - - // create two PI contexts - pi_context context1; - ASSERT_EQ((plugin->call_nocheck( - nullptr, 1, &device_, nullptr, nullptr, &context1)), - PI_SUCCESS); - ASSERT_NE(context1, nullptr); - - pi_context context2; - ASSERT_EQ((plugin->call_nocheck( - nullptr, 1, &device_, nullptr, nullptr, &context2)), - PI_SUCCESS); - ASSERT_NE(context2, nullptr); - - // setup synchronization variables between the main thread and the testing - // thread - std::mutex m; - std::condition_variable cv; - bool released = false; - bool thread_done = false; - - // create a testing thread that will create a queue with the first context, - // release the queue, then wait for the main thread to release the first - // context, and then create and release another queue with the second context - // this time - auto test_thread = std::thread([&] { - hipCtx_t current = nullptr; - - // create a queue with the first context - pi_queue queue; - ASSERT_EQ((plugin->call_nocheck( - context1, device_, 0, &queue)), - PI_SUCCESS); - ASSERT_NE(queue, nullptr); - - // ensure the queue has the correct context - ASSERT_EQ(context1, queue->get_context()); - - // check that the first context is now the active HIP context - hipCtxGetCurrent(¤t); - ASSERT_EQ(current, context1->get()); - - plugin->call(queue); - - // mark the first set of processing as done and notify the main thread - std::unique_lock lock(m); - thread_done = true; - lock.unlock(); - cv.notify_one(); - - // wait for the main thread to release the first context - lock.lock(); - cv.wait(lock, [&] { return released; }); - - // check that the first context is still active, this is because deleting a - // context only cleans up the current thread - hipCtxGetCurrent(¤t); - ASSERT_EQ(current, context1->get()); - - // create a queue with the second context - ASSERT_EQ((plugin->call_nocheck( - context2, device_, 0, &queue)), - PI_SUCCESS); - ASSERT_NE(queue, nullptr); - - // ensure the queue has the correct context - ASSERT_EQ(context2, queue->get_context()); - - // check that the second context is now the active HIP context - hipCtxGetCurrent(¤t); - ASSERT_EQ(current, context2->get()); - - plugin->call(queue); - }); - - // wait for the thread to be done with the first queue to release the first - // context - std::unique_lock lock(m); - cv.wait(lock, [&] { return thread_done; }); - plugin->call(context1); - - // notify the other thread that the context was released - released = true; - lock.unlock(); - cv.notify_one(); - - // wait for the thread to finish - test_thread.join(); - - plugin->call(context2); - - // check that there is no context set on the main thread - hipCtx_t current = nullptr; - hipCtxGetCurrent(¤t); - ASSERT_EQ(current, nullptr); -} diff --git a/sycl/unittests/pi/hip/test_device.cpp b/sycl/unittests/pi/hip/test_device.cpp deleted file mode 100644 index a2d61c6ee87fc..0000000000000 --- a/sycl/unittests/pi/hip/test_device.cpp +++ /dev/null @@ -1,111 +0,0 @@ -//==---- test_device.cpp --- PI unit tests ---------------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "gtest/gtest.h" - -#include - -#include "TestGetPlugin.hpp" -#include -#include -#include -#include - -using namespace sycl; - -struct HipDeviceTests : public ::testing::Test { - -protected: - std::optional &plugin = - pi::initializeAndGet(backend::ext_oneapi_hip); - - pi_platform platform_; - pi_device device_; - pi_context context_; - - void SetUp() override { - // skip the tests if the HIP backend is not available - if (!plugin.has_value()) { - GTEST_SKIP(); - } - - pi_uint32 numPlatforms = 0; - ASSERT_EQ(plugin->hasBackend(backend::ext_oneapi_hip), PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - 0, nullptr, &numPlatforms)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - numPlatforms, &platform_, nullptr)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - platform_, PI_DEVICE_TYPE_GPU, 1, &device_, nullptr)), - PI_SUCCESS); - ASSERT_EQ((plugin->call_nocheck( - nullptr, 1, &device_, nullptr, nullptr, &context_)), - PI_SUCCESS); - EXPECT_NE(context_, nullptr); - } - - void TearDown() override { - if (plugin.has_value()) { - plugin->call(device_); - plugin->call(context_); - } - } - - HipDeviceTests() = default; - ~HipDeviceTests() = default; -}; - -TEST_F(HipDeviceTests, PIDeviceGetInfoSimple) { - - size_t return_size = 0; - pi_device_type device_type; - ASSERT_EQ((plugin->call_nocheck( - device_, PI_DEVICE_INFO_TYPE, sizeof(pi_device_type), - &device_type, &return_size)), - PI_SUCCESS); - EXPECT_EQ(return_size, sizeof(pi_device_type)); - EXPECT_EQ( - device_type, - PI_DEVICE_TYPE_GPU); // backend pre-defined value, device must be a GPU - - pi_device parent_device = nullptr; - ASSERT_EQ((plugin->call_nocheck( - device_, PI_DEVICE_INFO_PARENT_DEVICE, sizeof(pi_device), - &parent_device, &return_size)), - PI_SUCCESS); - EXPECT_EQ(return_size, sizeof(pi_device)); - EXPECT_EQ(parent_device, - nullptr); // backend pre-set value, device cannot have a parent - - pi_platform platform = nullptr; - ASSERT_EQ((plugin->call_nocheck( - device_, PI_DEVICE_INFO_PLATFORM, sizeof(pi_platform), - &platform, &return_size)), - PI_SUCCESS); - EXPECT_EQ(return_size, sizeof(pi_platform)); - EXPECT_EQ(platform, platform_); // test fixture device was created from the - // test fixture platform - - cl_device_partition_property device_partition_property = -1; - ASSERT_EQ((plugin->call_nocheck( - device_, PI_DEVICE_INFO_PARTITION_TYPE, - sizeof(cl_device_partition_property), - &device_partition_property, &return_size)), - PI_SUCCESS); - EXPECT_EQ(device_partition_property, - 0); // PI HIP backend will not support device partitioning, this - // function should just return 0. - EXPECT_EQ(return_size, sizeof(cl_device_partition_property)); -} diff --git a/sycl/unittests/pi/hip/test_interop_get_native.cpp b/sycl/unittests/pi/hip/test_interop_get_native.cpp deleted file mode 100644 index 39e357430036b..0000000000000 --- a/sycl/unittests/pi/hip/test_interop_get_native.cpp +++ /dev/null @@ -1,127 +0,0 @@ -//==------- test_interop_get_native.cpp - SYCL HIP get_native tests --------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "gtest/gtest.h" - -#include - -#include - -#include - -#include "TestGetPlatforms.hpp" - -#include - -using namespace sycl; - -struct HipInteropGetNativeTests : public ::testing::TestWithParam { - -protected: - std::unique_ptr syclQueue_; - device syclDevice_; - - void SetUp() override { - syclDevice_ = GetParam().get_devices()[0]; - syclQueue_ = std::unique_ptr{new queue{syclDevice_}}; - } - - void TearDown() override { syclQueue_.reset(); } -}; - -TEST_P(HipInteropGetNativeTests, getNativeDevice) { - hipDevice_t hipDevice = get_native(syclDevice_); - char hipDeviceName[2] = {0, 0}; - hipError_t result = hipDeviceGetName(hipDeviceName, 2, hipDevice); - ASSERT_EQ(result, PI_SUCCESS); - ASSERT_NE(hipDeviceName[0], 0); -} - -TEST_P(HipInteropGetNativeTests, getNativeContext) { - hipCtx_t hipContext = - get_native(syclQueue_->get_context()); - ASSERT_NE(hipContext, nullptr); -} - -TEST_P(HipInteropGetNativeTests, interopTaskGetMem) { - buffer syclBuffer(range<1>{1}); - syclQueue_->submit([&](handler &cgh) { - auto syclAccessor = syclBuffer.get_access(cgh); - cgh.host_task([=](interop_handle ih) { - hipDeviceptr_t hipPtr = - ih.get_native_mem(syclAccessor); - hipDeviceptr_t hipPtrBase; - size_t hipPtrSize = 0; - hipCtx_t hipContext = - get_native(syclQueue_->get_context()); - ASSERT_EQ(PI_SUCCESS, hipCtxPushCurrent(hipContext)); - ASSERT_EQ(PI_SUCCESS, - hipMemGetAddressRange(&hipPtrBase, &hipPtrSize, hipPtr)); - ASSERT_EQ(PI_SUCCESS, hipCtxPopCurrent(nullptr)); - ASSERT_EQ(sizeof(int), hipPtrSize); - }); - }); -} - -TEST_P(HipInteropGetNativeTests, interopTaskGetQueue) { - hipStream_t hipStream = get_native(*syclQueue_); - syclQueue_->submit([&](handler &cgh) { - cgh.host_task([=](interop_handle ih) { - hipStream_t hipInteropStream = - ih.get_native_queue(); - ASSERT_EQ(hipInteropStream, hipStream); - }); - }); -} - -TEST_P(HipInteropGetNativeTests, hostTaskGetNativeMem) { - buffer syclBuffer(range<1>{1}); - syclQueue_->submit([&](handler &cgh) { - auto syclAccessor = syclBuffer.get_access(cgh); - cgh.host_task([=](interop_handle ih) { - hipDeviceptr_t hipPtr = - ih.get_native_mem(syclAccessor); - hipDeviceptr_t hipPtrBase; - size_t hipPtrSize = 0; - hipCtx_t hipContext = - get_native(syclQueue_->get_context()); - ASSERT_EQ(PI_SUCCESS, hipCtxPushCurrent(hipContext)); - ASSERT_EQ(PI_SUCCESS, - hipMemGetAddressRange(&hipPtrBase, &hipPtrSize, hipPtr)); - ASSERT_EQ(PI_SUCCESS, hipCtxPopCurrent(nullptr)); - ASSERT_EQ(sizeof(int), hipPtrSize); - }); - }); -} - -TEST_P(HipInteropGetNativeTests, hostTaskGetNativeQueue) { - hipStream_t hipStream = get_native(*syclQueue_); - syclQueue_->submit([&](handler &cgh) { - cgh.host_task([=](interop_handle ih) { - hipStream_t hipInteropStream = - ih.get_native_queue(); - ASSERT_EQ(hipInteropStream, hipStream); - }); - }); -} - -TEST_P(HipInteropGetNativeTests, hostTaskGetNativeContext) { - hipCtx_t hipContext = - get_native(syclQueue_->get_context()); - syclQueue_->submit([&](handler &cgh) { - cgh.host_task([=](interop_handle ih) { - hipCtx_t hipInteropContext = - ih.get_native_context(); - ASSERT_EQ(hipInteropContext, hipContext); - }); - }); -} - -INSTANTIATE_TEST_SUITE_P( - OnHipPlatform, HipInteropGetNativeTests, - ::testing::ValuesIn(pi::getPlatformsWithName("HIP BACKEND"))); diff --git a/sycl/unittests/pi/hip/test_kernels.cpp b/sycl/unittests/pi/hip/test_kernels.cpp deleted file mode 100644 index 51463a5e23c29..0000000000000 --- a/sycl/unittests/pi/hip/test_kernels.cpp +++ /dev/null @@ -1,79 +0,0 @@ -//==---- test_kernels.cpp --- PI unit tests --------------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "gtest/gtest.h" - -#include - -#include "TestGetPlugin.hpp" -#include -#include -#include -#include - -// PI HIP kernels carry an additional argument for the implicit global offset. -#define NUM_IMPLICIT_ARGS 1 - -using namespace sycl; - -struct HipKernelsTest : public ::testing::Test { - -protected: - std::optional &plugin = - pi::initializeAndGet(backend::ext_oneapi_hip); - pi_platform platform_; - pi_device device_; - pi_context context_; - pi_queue queue_; - - void SetUp() override { - // skip the tests if the HIP backend is not available - if (!plugin.has_value()) { - GTEST_SKIP(); - } - - pi_uint32 numPlatforms = 0; - ASSERT_EQ(plugin->hasBackend(backend::ext_oneapi_hip), PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - 0, nullptr, &numPlatforms)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - numPlatforms, &platform_, nullptr)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - platform_, PI_DEVICE_TYPE_GPU, 1, &device_, nullptr)), - PI_SUCCESS); - ASSERT_EQ((plugin->call_nocheck( - nullptr, 1, &device_, nullptr, nullptr, &context_)), - PI_SUCCESS); - ASSERT_NE(context_, nullptr); - - ASSERT_EQ((plugin->call_nocheck( - context_, device_, 0, &queue_)), - PI_SUCCESS); - ASSERT_NE(queue_, nullptr); - ASSERT_EQ(queue_->get_context(), context_); - } - - void TearDown() override { - if (plugin.has_value()) { - plugin->call(device_); - plugin->call(queue_); - plugin->call(context_); - } - } - - HipKernelsTest() = default; - - ~HipKernelsTest() = default; -}; diff --git a/sycl/unittests/pi/hip/test_mem_obj.cpp b/sycl/unittests/pi/hip/test_mem_obj.cpp deleted file mode 100644 index 382a510bf97d9..0000000000000 --- a/sycl/unittests/pi/hip/test_mem_obj.cpp +++ /dev/null @@ -1,206 +0,0 @@ -//==---- test_mem_obj.cpp --- PI unit tests --------------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "gtest/gtest.h" - -#include - -#include "HipUtils.hpp" -#include "TestGetPlugin.hpp" -#include -#include -#include -#include - -using namespace sycl; - -struct HipTestMemObj : public ::testing::Test { - -protected: - std::optional &plugin = - pi::initializeAndGet(backend::ext_oneapi_hip); - - pi_platform platform_; - pi_device device_; - pi_context context_; - - void SetUp() override { - // skip the tests if the HIP backend is not available - if (!plugin.has_value()) { - GTEST_SKIP(); - } - - pi::clearHipContext(); - pi_uint32 numPlatforms = 0; - ASSERT_EQ(plugin->hasBackend(backend::ext_oneapi_hip), PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - 0, nullptr, &numPlatforms)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - numPlatforms, &platform_, nullptr)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - platform_, PI_DEVICE_TYPE_GPU, 1, &device_, nullptr)), - PI_SUCCESS); - ASSERT_EQ((plugin->call_nocheck( - nullptr, 1, &device_, nullptr, nullptr, &context_)), - PI_SUCCESS); - EXPECT_NE(context_, nullptr); - } - - void TearDown() override { - if (plugin.has_value()) { - plugin->call(device_); - plugin->call(context_); - } - } - - HipTestMemObj() = default; - - ~HipTestMemObj() = default; -}; - -TEST_F(HipTestMemObj, piMemBufferCreateSimple) { - const size_t memSize = 1024u; - pi_mem memObj; - ASSERT_EQ((plugin->call_nocheck( - context_, PI_MEM_FLAGS_ACCESS_RW, memSize, nullptr, &memObj, - nullptr)), - PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck(memObj)), - PI_SUCCESS); -} - -TEST_F(HipTestMemObj, piMemBufferAllocHost) { - const size_t memSize = 1024u; - pi_mem memObj; - ASSERT_EQ((plugin->call_nocheck( - context_, PI_MEM_FLAGS_ACCESS_RW | PI_MEM_FLAGS_HOST_PTR_ALLOC, - memSize, nullptr, &memObj, nullptr)), - PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck(memObj)), - PI_SUCCESS); -} - -TEST_F(HipTestMemObj, piMemBufferCreateNoActiveContext) { - const size_t memSize = 1024u; - // Context has been destroyed - - hipCtx_t current = nullptr; - - // pop HIP contexts until there is not a HIP context bound to the thread - do { - hipCtx_t oldContext = nullptr; - auto hipErr = hipCtxPopCurrent(&oldContext); - EXPECT_EQ(hipErr, PI_SUCCESS); - - // There should not be any active HIP context - hipErr = hipCtxGetCurrent(¤t); - ASSERT_EQ(hipErr, PI_SUCCESS); - } while (current != nullptr); - - // The context object is passed, even if its not active it should be used - // to allocate the memory object - pi_mem memObj; - ASSERT_EQ((plugin->call_nocheck( - context_, PI_MEM_FLAGS_ACCESS_RW, memSize, nullptr, &memObj, - nullptr)), - PI_SUCCESS); - ASSERT_NE(memObj, nullptr); - - ASSERT_EQ((plugin->call_nocheck(memObj)), - PI_SUCCESS); -} - -TEST_F(HipTestMemObj, piMemBufferPinnedMappedRead) { - const size_t memSize = sizeof(int); - const int value = 20; - - pi_queue queue; - ASSERT_EQ((plugin->call_nocheck( - context_, device_, 0, &queue)), - PI_SUCCESS); - ASSERT_NE(queue, nullptr); - ASSERT_EQ(queue->get_context(), context_); - - pi_mem memObj; - ASSERT_EQ((plugin->call_nocheck( - context_, PI_MEM_FLAGS_ACCESS_RW | PI_MEM_FLAGS_HOST_PTR_ALLOC, - memSize, nullptr, &memObj, nullptr)), - PI_SUCCESS); - - ASSERT_EQ( - (plugin->call_nocheck( - queue, memObj, true, 0, sizeof(int), &value, 0, nullptr, nullptr)), - PI_SUCCESS); - - int *host_ptr = nullptr; - ASSERT_EQ((plugin->call_nocheck( - queue, memObj, true, PI_MAP_READ, 0, sizeof(int), 0, nullptr, - nullptr, (void **)&host_ptr)), - PI_SUCCESS); - - ASSERT_EQ(*host_ptr, value); - - ASSERT_EQ((plugin->call_nocheck( - queue, memObj, host_ptr, 0, nullptr, nullptr)), - PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck(memObj)), - PI_SUCCESS); - plugin->call(queue); -} - -TEST_F(HipTestMemObj, piMemBufferPinnedMappedWrite) { - const size_t memSize = sizeof(int); - const int value = 30; - - pi_queue queue; - ASSERT_EQ((plugin->call_nocheck( - context_, device_, 0, &queue)), - PI_SUCCESS); - ASSERT_NE(queue, nullptr); - ASSERT_EQ(queue->get_context(), context_); - - pi_mem memObj; - ASSERT_EQ((plugin->call_nocheck( - context_, PI_MEM_FLAGS_ACCESS_RW | PI_MEM_FLAGS_HOST_PTR_ALLOC, - memSize, nullptr, &memObj, nullptr)), - PI_SUCCESS); - - int *host_ptr = nullptr; - ASSERT_EQ((plugin->call_nocheck( - queue, memObj, true, PI_MAP_WRITE, 0, sizeof(int), 0, nullptr, - nullptr, (void **)&host_ptr)), - PI_SUCCESS); - - *host_ptr = value; - - ASSERT_EQ((plugin->call_nocheck( - queue, memObj, host_ptr, 0, nullptr, nullptr)), - PI_SUCCESS); - - int read_value = 0; - ASSERT_EQ((plugin->call_nocheck( - queue, memObj, true, 0, sizeof(int), &read_value, 0, nullptr, - nullptr)), - PI_SUCCESS); - - ASSERT_EQ(read_value, value); - - ASSERT_EQ((plugin->call_nocheck(memObj)), - PI_SUCCESS); - plugin->call(queue); -} diff --git a/sycl/unittests/pi/hip/test_primary_context.cpp b/sycl/unittests/pi/hip/test_primary_context.cpp deleted file mode 100644 index 1ed80159d32d9..0000000000000 --- a/sycl/unittests/pi/hip/test_primary_context.cpp +++ /dev/null @@ -1,85 +0,0 @@ -//==---------- test_primary_context.cpp - PI unit tests --------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "gtest/gtest.h" - -#include - -#include "TestGetPlatforms.hpp" -#include -#include -#include - -#include - -using namespace sycl; - -struct HipPrimaryContextTests : public ::testing::TestWithParam { - -protected: - device deviceA_; - device deviceB_; - - void SetUp() override { - std::vector HipDevices = GetParam().get_devices(); - - deviceA_ = HipDevices[0]; - deviceB_ = HipDevices.size() > 1 ? HipDevices[1] : deviceA_; - } - - void TearDown() override {} -}; - -TEST_P(HipPrimaryContextTests, piSingleContext) { - std::cout << "create single context" << std::endl; - context Context(deviceA_, async_handler{}); - - hipDevice_t HipDevice = get_native(deviceA_); - hipCtx_t HipContext = get_native(Context); - - hipCtx_t PrimaryHipContext; - hipDevicePrimaryCtxRetain(&PrimaryHipContext, HipDevice); - - ASSERT_EQ(HipContext, PrimaryHipContext); - - hipDevicePrimaryCtxRelease(HipDevice); -} - -TEST_P(HipPrimaryContextTests, piMultiContextSingleDevice) { - std::cout << "create multiple contexts for one device" << std::endl; - context ContextA(deviceA_, async_handler{}); - context ContextB(deviceA_, async_handler{}); - - hipCtx_t HipContextA = get_native(ContextA); - hipCtx_t HipContextB = get_native(ContextB); - - ASSERT_EQ(HipContextA, HipContextB); -} - -TEST_P(HipPrimaryContextTests, piMultiContextMultiDevice) { - if (deviceA_ == deviceB_) - return; - - hipDevice_t HipDeviceA = get_native(deviceA_); - hipDevice_t HipDeviceB = get_native(deviceB_); - - ASSERT_NE(HipDeviceA, HipDeviceB); - - std::cout << "create multiple contexts for multiple devices" << std::endl; - context ContextA(deviceA_, async_handler{}); - context ContextB(deviceB_, async_handler{}); - - hipCtx_t HipContextA = get_native(ContextA); - hipCtx_t HipContextB = get_native(ContextB); - - ASSERT_NE(HipContextA, HipContextB); -} - -INSTANTIATE_TEST_SUITE_P( - OnHipPlatform, HipPrimaryContextTests, - ::testing::ValuesIn(pi::getPlatformsWithName("HIP BACKEND"))); diff --git a/sycl/unittests/pi/hip/test_sampler_properties.cpp b/sycl/unittests/pi/hip/test_sampler_properties.cpp deleted file mode 100644 index bed0bb9b053a4..0000000000000 --- a/sycl/unittests/pi/hip/test_sampler_properties.cpp +++ /dev/null @@ -1,135 +0,0 @@ -//==---- test_sampler_properties.cpp --- PI unit tests ---------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "gtest/gtest.h" - -#include "TestGetPlugin.hpp" -#include -#include -#include - -#include - -namespace { - -using namespace sycl; - -class SamplerPropertiesTest - : public ::testing::TestWithParam> { -protected: - std::optional &plugin = - pi::initializeAndGet(backend::ext_oneapi_hip); - - pi_platform platform_; - pi_device device_; - pi_context context_; - pi_sampler sampler_; - - pi_bool normalizedCoords_; - pi_sampler_filter_mode filterMode_; - pi_sampler_addressing_mode addressMode_; - - SamplerPropertiesTest() = default; - - ~SamplerPropertiesTest() override = default; - - void SetUp() override { - // skip the tests if the HIP backend is not available - if (!plugin.has_value()) { - GTEST_SKIP(); - } - - std::tie(normalizedCoords_, filterMode_, addressMode_) = GetParam(); - - pi_uint32 numPlatforms = 0; - ASSERT_EQ(plugin->hasBackend(backend::ext_oneapi_hip), PI_SUCCESS); - - ASSERT_EQ((plugin->call_nocheck( - 0, nullptr, &numPlatforms)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - numPlatforms, &platform_, nullptr)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin->call_nocheck( - platform_, PI_DEVICE_TYPE_GPU, 1, &device_, nullptr)), - PI_SUCCESS); - ASSERT_EQ((plugin->call_nocheck( - nullptr, 1, &device_, nullptr, nullptr, &context_)), - PI_SUCCESS); - EXPECT_NE(context_, nullptr); - - pi_sampler_properties sampler_properties[] = { - PI_SAMPLER_PROPERTIES_NORMALIZED_COORDS, - static_cast(normalizedCoords_), - PI_SAMPLER_PROPERTIES_ADDRESSING_MODE, - static_cast(addressMode_), - PI_SAMPLER_PROPERTIES_FILTER_MODE, - static_cast(filterMode_), - 0}; - - ASSERT_EQ((plugin->call_nocheck( - context_, sampler_properties, &sampler_)), - PI_SUCCESS); - } - - void TearDown() override { - if (plugin.has_value()) { - plugin->call(sampler_); - plugin->call(device_); - plugin->call(context_); - } - } -}; - -TEST_P(SamplerPropertiesTest, piCheckNormalizedCoords) { - pi_bool actualNormalizedCoords = !normalizedCoords_; - - plugin->call( - sampler_, PI_SAMPLER_INFO_NORMALIZED_COORDS, sizeof(pi_bool), - &actualNormalizedCoords, nullptr); - - ASSERT_EQ(actualNormalizedCoords, normalizedCoords_); -} - -TEST_P(SamplerPropertiesTest, piCheckFilterMode) { - pi_sampler_filter_mode actualFilterMode; - - plugin->call( - sampler_, PI_SAMPLER_INFO_FILTER_MODE, sizeof(pi_sampler_filter_mode), - &actualFilterMode, nullptr); - - ASSERT_EQ(actualFilterMode, filterMode_); -} - -TEST_P(SamplerPropertiesTest, piCheckAddressingMode) { - pi_sampler_addressing_mode actualAddressMode; - - plugin->call( - sampler_, PI_SAMPLER_INFO_ADDRESSING_MODE, - sizeof(pi_sampler_addressing_mode), &actualAddressMode, nullptr); - - ASSERT_EQ(actualAddressMode, addressMode_); -} - -INSTANTIATE_TEST_SUITE_P( - SamplerPropertiesTestImpl, SamplerPropertiesTest, - ::testing::Combine( - ::testing::Values(PI_TRUE, PI_FALSE), - ::testing::Values(PI_SAMPLER_FILTER_MODE_LINEAR, - PI_SAMPLER_FILTER_MODE_NEAREST), - ::testing::Values(PI_SAMPLER_ADDRESSING_MODE_CLAMP, - PI_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE, - PI_SAMPLER_ADDRESSING_MODE_NONE, - PI_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT, - PI_SAMPLER_ADDRESSING_MODE_REPEAT))); -} // namespace