From f5178ebcf05ddd0e42c4b8c805ad809f433f6bff Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Thu, 27 Jun 2024 16:38:27 +0200 Subject: [PATCH 01/40] [SYCL] Allow raw pointers in SYCL vec load and store (#13895) In accordance with https://github.com/KhronosGroup/SYCL-Docs/pull/555 proposal, this commit allows raw pointers in the `load` and `store` member functions on `sycl::vec`. --------- Signed-off-by: Larsen, Steffen --- sycl/include/sycl/vector.hpp | 9 + sycl/include/sycl/vector_preview.hpp | 9 + sycl/test-e2e/Basic/vector/load_store.cpp | 208 ++++++++++++++++++++++ 3 files changed, 226 insertions(+) create mode 100644 sycl/test-e2e/Basic/vector/load_store.cpp diff --git a/sycl/include/sycl/vector.hpp b/sycl/include/sycl/vector.hpp index d8b573171f73c..dd4425a270059 100644 --- a/sycl/include/sycl/vector.hpp +++ b/sycl/include/sycl/vector.hpp @@ -985,6 +985,11 @@ template class vec { MultiPtr(Acc); load(Offset, MultiPtr); } + void load(size_t Offset, const DataT *Ptr) { + for (int I = 0; I < NumElements; ++I) + setValue(I, Ptr[Offset * NumElements + I]); + } + template void store(size_t Offset, multi_ptr Ptr) const { @@ -1004,6 +1009,10 @@ template class vec { MultiPtr(Acc); store(Offset, MultiPtr); } + void store(size_t Offset, DataT *Ptr) const { + for (int I = 0; I < NumElements; ++I) + Ptr[Offset * NumElements + I] = getValue(I); + } void ConvertToDataT() { for (size_t i = 0; i < NumElements; ++i) { diff --git a/sycl/include/sycl/vector_preview.hpp b/sycl/include/sycl/vector_preview.hpp index e96d64b8a930e..80602c0b0c636 100644 --- a/sycl/include/sycl/vector_preview.hpp +++ b/sycl/include/sycl/vector_preview.hpp @@ -572,6 +572,11 @@ class vec : public detail::vec_arith { MultiPtr(Acc); load(Offset, MultiPtr); } + void load(size_t Offset, const DataT *Ptr) { + for (int I = 0; I < NumElements; ++I) + m_Data[I] = Ptr[Offset * NumElements + I]; + } + template void store(size_t Offset, multi_ptr Ptr) const { @@ -591,6 +596,10 @@ class vec : public detail::vec_arith { MultiPtr(Acc); store(Offset, MultiPtr); } + void store(size_t Offset, DataT *Ptr) const { + for (int I = 0; I < NumElements; ++I) + Ptr[Offset * NumElements + I] = m_Data[I]; + } private: // fields diff --git a/sycl/test-e2e/Basic/vector/load_store.cpp b/sycl/test-e2e/Basic/vector/load_store.cpp new file mode 100644 index 0000000000000..626fd0264fb71 --- /dev/null +++ b/sycl/test-e2e/Basic/vector/load_store.cpp @@ -0,0 +1,208 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +// RUN: %if preview-breaking-changes-supported %{ %{build} -fpreview-breaking-changes -o %t2.out %} +// RUN: %if preview-breaking-changes-supported %{ %{run} %t2.out %} + +// Tests load and store on sycl::vec. + +#include +#include +#include +#include + +namespace syclex = sycl::ext::oneapi; + +template +int CheckResult(const T0 &Actual, const T1 &Reference, const char *Category) { + int Failures = 0; + for (size_t I = 0; I < N; ++I) { + if (Actual[I] == Reference[I]) + continue; + + std::cout << "Failed at index " << I << ": " << Category << " - " + << Actual[I] << " != " << Reference[I] << std::endl; + ++Failures; + } + return Failures; +} + +template int RunTest(sycl::queue &Q) { + using ElemT = typename VecT::element_type; + + int Failures = 0; + // Load on host. + // Note: multi_ptr is not usable on host, so only raw pointer is tested. + { + const ElemT Ref[] = {0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13}; + VecT V{0}; + V.load(2, Ref); + Failures += CheckResult<4>(V, Ref + 8, "load with raw pointer on host"); + } + + // Store on host. + // Note: multi_ptr is not usable on host, so only raw pointer is tested. + { + ElemT Out[] = {0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13}; + const VecT V{4, 3, 2, 1}; + V.store(1, Out); + const ElemT Ref[] = {0, 2, 1, 4, 4, 3, 2, 1, 7, 10, 9, 12, 11, 14, 13}; + Failures += + CheckResult(Out, Ref, "store in raw pointer on host"); + } + + // Load on device. + { + const ElemT Ref[] = {0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, + 11, 14, 13, 16, 15, 18, 17, 20, 19, 22, 21, 24}; + VecT V[6] = {VecT{0}}; + + { + sycl::buffer RefBuff{Ref, std::size(Ref)}; + sycl::buffer VBuff{V, std::size(V)}; + + Q.submit([&](sycl::handler &CGH) { + sycl::accessor GlobalRefAcc{RefBuff, CGH, sycl::read_only}; + sycl::accessor VAcc{VBuff, CGH, sycl::read_write}; + sycl::local_accessor LocalRefAcc{std::size(Ref), CGH}; + CGH.parallel_for(sycl::nd_range<1>{1, 1}, [=](sycl::nd_item<1>) { + // Initialize the local and private memory copies. + ElemT PrivateRef[std::size(Ref)] = {0}; + for (size_t I = 0; I < GlobalRefAcc.size(); ++I) { + PrivateRef[I] = GlobalRefAcc[I]; + LocalRefAcc[I] = GlobalRefAcc[I]; + } + + // Load with global multi_ptr. + auto GlobalMPtr = + GlobalRefAcc + .template get_multi_ptr(); + VAcc[0].load(0, GlobalMPtr); + + // Load with local multi_ptr. + auto LocalMPtr = + LocalRefAcc.template get_multi_ptr(); + VAcc[1].load(1, LocalMPtr); + + // Load with private multi_ptr. + auto PrivateMPtr = sycl::address_space_cast< + sycl::access::address_space::private_space, + sycl::access::decorated::no>(PrivateRef); + VAcc[2].load(2, PrivateMPtr); + + // Load with global raw pointer. + const ElemT *GlobalRawPtr = GlobalMPtr.get_raw(); + VAcc[3].load(3, GlobalRawPtr); + + // Load with local raw pointer. + const ElemT *LocalRawPtr = LocalMPtr.get_raw(); + VAcc[4].load(4, LocalRawPtr); + + // Load with private raw pointer. + VAcc[5].load(5, PrivateRef); + }); + }); + } + + Failures += + CheckResult<4>(V[0], Ref, "load with global multi_ptr on device"); + Failures += + CheckResult<4>(V[1], Ref + 4, "load with local multi_ptr on device"); + Failures += + CheckResult<4>(V[2], Ref + 8, "load with private multi_ptr on device"); + Failures += CheckResult<4>(V[3], Ref + 12, + "load with global raw pointer on device"); + Failures += + CheckResult<4>(V[4], Ref + 16, "load with local raw pointer on device"); + Failures += CheckResult<4>(V[5], Ref + 20, + "load with private raw pointer on device"); + } + + // Store on device. + { + ElemT Out[24] = {0}; + const VecT V[] = {{0, 2, 1, 4}, {3, 6, 5, 8}, {7, 10, 9, 12}, + {11, 14, 13, 16}, {15, 18, 17, 20}, {19, 22, 21, 24}}; + + { + sycl::buffer OutBuff{Out, std::size(Out)}; + + Q.submit([&](sycl::handler &CGH) { + sycl::accessor OutAcc{OutBuff, CGH, sycl::read_write}; + sycl::local_accessor LocalOutAcc{std::size(Out), CGH}; + CGH.parallel_for(sycl::nd_range<1>{1, 1}, [=](sycl::nd_item<1>) { + ElemT PrivateVal[std::size(Out)] = {0}; + + // Store in global multi_ptr. + auto GlobalMPtr = + OutAcc.template get_multi_ptr(); + V[0].store(0, GlobalMPtr); + + // Store in local multi_ptr. + auto LocalMPtr = + LocalOutAcc.template get_multi_ptr(); + V[1].store(1, LocalMPtr); + + // Store in private multi_ptr. + auto PrivateMPtr = sycl::address_space_cast< + sycl::access::address_space::private_space, + sycl::access::decorated::no>(PrivateVal); + V[2].store(2, PrivateMPtr); + + // Store in global raw pointer. + ElemT *GlobalRawPtr = GlobalMPtr.get_raw(); + V[3].store(3, GlobalRawPtr); + + // Store in local raw pointer. + ElemT *LocalRawPtr = LocalMPtr.get_raw(); + V[4].store(4, LocalRawPtr); + + // Store in private raw pointer. + V[5].store(5, PrivateVal); + + // Write local and private results back to the global buffer. + for (size_t I = 0; I < 4; ++I) { + OutAcc[4 + I] = LocalMPtr[4 + I]; + OutAcc[8 + I] = PrivateVal[8 + I]; + OutAcc[16 + I] = LocalMPtr[16 + I]; + OutAcc[20 + I] = PrivateVal[20 + I]; + } + }); + }); + } + + const ElemT Ref[] = {0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, + 11, 14, 13, 16, 15, 18, 17, 20, 19, 22, 21, 24}; + + Failures += CheckResult<4>(Out, Ref, "store in global multi_ptr on device"); + Failures += + CheckResult<4>(Out + 4, Ref + 4, "store in local multi_ptr on device"); + Failures += CheckResult<4>(Out + 8, Ref + 8, + "store in private multi_ptr on device"); + Failures += CheckResult<4>(Out + 12, Ref + 12, + "store in global raw pointer on device"); + Failures += CheckResult<4>(Out + 16, Ref + 16, + "store in local raw pointer on device"); + Failures += CheckResult<4>(Out + 20, Ref + 20, + "store in private raw pointer on device"); + } + + return Failures; +} + +int main() { + sycl::queue Q; + + int Failures = 0; + + Failures += RunTest(Q); + Failures += RunTest(Q); + Failures += RunTest>(Q); + + if (Q.get_device().has(sycl::aspect::fp16)) + Failures += RunTest(Q); + if (Q.get_device().has(sycl::aspect::fp64)) + Failures += RunTest(Q); + + return Failures; +} From 4dca8234b9ccde4ecbe5ded3f002903da3658c23 Mon Sep 17 00:00:00 2001 From: aelovikov-intel Date: Thu, 27 Jun 2024 07:56:07 -0700 Subject: [PATCH 02/40] [SYCL] Remove ESIMD Emulator (#13295) --- sycl/doc/GetStartedGuide.md | 27 ------------- sycl/include/sycl/backend_types.hpp | 9 +---- sycl/include/sycl/detail/pi.h | 3 +- sycl/include/sycl/detail/pi.hpp | 4 -- .../pi_win_proxy_loader.cpp | 3 -- sycl/source/CMakeLists.txt | 1 - sycl/source/backend.cpp | 2 - sycl/source/detail/config.cpp | 6 +-- sycl/source/detail/config.hpp | 2 +- sycl/source/detail/device_impl.cpp | 2 +- sycl/source/detail/pi.cpp | 5 --- sycl/source/detail/scheduler/commands.cpp | 19 ++-------- sycl/source/device_selector.cpp | 11 ------ .../esimd_emulator_device_interface.cpp | 38 ------------------- sycl/source/handler.cpp | 38 ++----------------- sycl/source/kernel_bundle.cpp | 5 --- sycl/test/abi/sycl_symbols_linux.dump | 3 -- sycl/test/abi/sycl_symbols_windows.dump | 3 -- sycl/unittests/allowlist/ParseAllowList.cpp | 6 +-- sycl/unittests/pi/BackendString.hpp | 3 +- 20 files changed, 18 insertions(+), 172 deletions(-) delete mode 100644 sycl/source/esimd_emulator_device_interface.cpp diff --git a/sycl/doc/GetStartedGuide.md b/sycl/doc/GetStartedGuide.md index de14612ba53e5..70d180e686c02 100644 --- a/sycl/doc/GetStartedGuide.md +++ b/sycl/doc/GetStartedGuide.md @@ -12,7 +12,6 @@ and a wide range of compute accelerators such as GPU and FPGA. * [Build DPC++ toolchain with support for NVIDIA CUDA](#build-dpc-toolchain-with-support-for-nvidia-cuda) * [Build DPC++ toolchain with support for HIP AMD](#build-dpc-toolchain-with-support-for-hip-amd) * [Build DPC++ toolchain with support for HIP NVIDIA](#build-dpc-toolchain-with-support-for-hip-nvidia) - * [Build DPC++ toolchain with support for ESIMD CPU Emulation](#build-dpc-toolchain-with-support-for-esimd-cpu-emulation) * [Build DPC++ toolchain with support for runtime kernel fusion](#build-dpc-toolchain-with-support-for-runtime-kernel-fusion) * [Build DPC++ toolchain with a custom Unified Runtime](#build-dpc-toolchain-with-a-custom-unified-runtime) * [Build Doxygen documentation](#build-doxygen-documentation) @@ -302,32 +301,6 @@ as well as the CUDA Runtime API to be installed, see Currently, this has only been tried on Linux, with ROCm 4.2.0 or 4.3.0, with CUDA 11, and using a GeForce 1060 device. -### Build DPC++ toolchain with support for ESIMD CPU Emulation - -There is experimental support for DPC++ for using ESIMD CPU Emulation. - -This feature supports ESIMD CPU Emulation using CM_EMU library -[CM Emulation project](https://github.com/intel/cm-cpu-emulation). The library -package will be generated from source codes downloaded from its open source -project and installed in your deploy directory during toolchain build. - -To enable support for ESIMD CPU emulation, follow the instructions for the Linux -DPC++ toolchain, but add the `--enable-esimd-emulator`. - -Enabling this flag requires following packages installed. - -* Ubuntu 22.04 - * libva-dev / 2.7.0-2 - * libffi-dev / 3.3-4 - * libtool -* RHEL 8.\* - * libffi - * libffi-devel - * libva - * libva-devel - -Currently, this feature was tested and verified on Ubuntu 22.04 environment. - ### Build DPC++ toolchain with support for runtime kernel fusion Support for the experimental SYCL extension for user-driven kernel fusion at diff --git a/sycl/include/sycl/backend_types.hpp b/sycl/include/sycl/backend_types.hpp index ead8f4674d0ef..c0a274febc9e7 100644 --- a/sycl/include/sycl/backend_types.hpp +++ b/sycl/include/sycl/backend_types.hpp @@ -21,8 +21,8 @@ enum class backend : char { ext_oneapi_level_zero = 2, ext_oneapi_cuda = 3, all = 4, - ext_intel_esimd_emulator __SYCL_DEPRECATED( - "esimd emulator is no longer supported") = 5, + // No support anymore: + // ext_intel_esimd_emulator = 5, ext_oneapi_hip = 6, ext_oneapi_native_cpu = 7, }; @@ -50,9 +50,6 @@ inline std::ostream &operator<<(std::ostream &Out, backend be) { case backend::ext_oneapi_cuda: Out << "ext_oneapi_cuda"; break; - case backend::ext_intel_esimd_emulator: - Out << "ext_intel_esimd_emulator"; - break; case backend::ext_oneapi_hip: Out << "ext_oneapi_hip"; break; @@ -76,8 +73,6 @@ inline std::string_view get_backend_name_no_vendor(backend Backend) { return "level_zero"; case backend::ext_oneapi_cuda: return "cuda"; - case backend::ext_intel_esimd_emulator: - return "esimd_emulator"; case backend::ext_oneapi_hip: return "hip"; case backend::ext_oneapi_native_cpu: diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index f4e67f7ba6113..79d67791ffc8d 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -312,7 +312,8 @@ typedef enum { PI_EXT_PLATFORM_BACKEND_OPENCL = 2, ///< The backend is OpenCL PI_EXT_PLATFORM_BACKEND_CUDA = 3, ///< The backend is CUDA PI_EXT_PLATFORM_BACKEND_HIP = 4, ///< The backend is HIP - PI_EXT_PLATFORM_BACKEND_ESIMD = 5, ///< The backend is ESIMD + // Not supported anymore: + // PI_EXT_PLATFORM_BACKEND_ESIMD = 5, PI_EXT_PLATFORM_BACKEND_NATIVE_CPU = 6, ///< The backend is NATIVE_CPU } _pi_platform_backend; diff --git a/sycl/include/sycl/detail/pi.hpp b/sycl/include/sycl/detail/pi.hpp index 0a6713dab1096..3500c576bb599 100644 --- a/sycl/include/sycl/detail/pi.hpp +++ b/sycl/include/sycl/detail/pi.hpp @@ -69,7 +69,6 @@ bool trace(TraceLevel level); #define __SYCL_OPENCL_PLUGIN_NAME "pi_opencl.dll" #define __SYCL_LEVEL_ZERO_PLUGIN_NAME "pi_level_zero.dll" #define __SYCL_CUDA_PLUGIN_NAME "pi_cuda.dll" -#define __SYCL_ESIMD_EMULATOR_PLUGIN_NAME "pi_esimd_emulator.dll" #define __SYCL_HIP_PLUGIN_NAME "pi_hip.dll" #define __SYCL_UR_PLUGIN_NAME "pi_unified_runtime.dll" #define __SYCL_NATIVE_CPU_PLUGIN_NAME "pi_native_cpu.dll" @@ -77,7 +76,6 @@ bool trace(TraceLevel level); #define __SYCL_OPENCL_PLUGIN_NAME "libpi_opencl.dll" #define __SYCL_LEVEL_ZERO_PLUGIN_NAME "libpi_level_zero.dll" #define __SYCL_CUDA_PLUGIN_NAME "libpi_cuda.dll" -#define __SYCL_ESIMD_EMULATOR_PLUGIN_NAME "libpi_esimd_emulator.dll" #define __SYCL_HIP_PLUGIN_NAME "libpi_hip.dll" #define __SYCL_UR_PLUGIN_NAME "libpi_unified_runtime.dll" #define __SYCL_NATIVE_CPU_PLUGIN_NAME "libpi_native_cpu.dll" @@ -86,7 +84,6 @@ bool trace(TraceLevel level); #define __SYCL_OPENCL_PLUGIN_NAME "libpi_opencl.so" #define __SYCL_LEVEL_ZERO_PLUGIN_NAME "libpi_level_zero.so" #define __SYCL_CUDA_PLUGIN_NAME "libpi_cuda.so" -#define __SYCL_ESIMD_EMULATOR_PLUGIN_NAME "libpi_esimd_emulator.so" #define __SYCL_HIP_PLUGIN_NAME "libpi_hip.so" #define __SYCL_UR_PLUGIN_NAME "libpi_unified_runtime.so" #define __SYCL_NATIVE_CPU_PLUGIN_NAME "libpi_native_cpu.so" @@ -94,7 +91,6 @@ bool trace(TraceLevel level); #define __SYCL_OPENCL_PLUGIN_NAME "libpi_opencl.dylib" #define __SYCL_LEVEL_ZERO_PLUGIN_NAME "libpi_level_zero.dylib" #define __SYCL_CUDA_PLUGIN_NAME "libpi_cuda.dylib" -#define __SYCL_ESIMD_EMULATOR_PLUGIN_NAME "libpi_esimd_emulator.dylib" #define __SYCL_HIP_PLUGIN_NAME "libpi_hip.dylib" #define __SYCL_UR_PLUGIN_NAME "libpi_unified_runtime.dylib" #define __SYCL_NATIVE_CPU_PLUGIN_NAME "libpi_native_cpu.dylib" diff --git a/sycl/pi_win_proxy_loader/pi_win_proxy_loader.cpp b/sycl/pi_win_proxy_loader/pi_win_proxy_loader.cpp index 52d419c93ee05..f7561b719bd56 100644 --- a/sycl/pi_win_proxy_loader/pi_win_proxy_loader.cpp +++ b/sycl/pi_win_proxy_loader/pi_win_proxy_loader.cpp @@ -89,7 +89,6 @@ std::wstring getCurrentDSODir() { #define __SYCL_OPENCL_PLUGIN_NAME "pi_opencl.dll" #define __SYCL_LEVEL_ZERO_PLUGIN_NAME "pi_level_zero.dll" #define __SYCL_CUDA_PLUGIN_NAME "pi_cuda.dll" -#define __SYCL_ESIMD_EMULATOR_PLUGIN_NAME "pi_esimd_emulator.dll" #define __SYCL_HIP_PLUGIN_NAME "pi_hip.dll" #define __SYCL_UNIFIED_RUNTIME_PLUGIN_NAME "pi_unified_runtime.dll" #define __SYCL_NATIVE_CPU_PLUGIN_NAME "pi_native_cpu.dll" @@ -97,7 +96,6 @@ std::wstring getCurrentDSODir() { #define __SYCL_OPENCL_PLUGIN_NAME "libpi_opencl.dll" #define __SYCL_LEVEL_ZERO_PLUGIN_NAME "libpi_level_zero.dll" #define __SYCL_CUDA_PLUGIN_NAME "libpi_cuda.dll" -#define __SYCL_ESIMD_EMULATOR_PLUGIN_NAME "libpi_esimd_emulator.dll" #define __SYCL_HIP_PLUGIN_NAME "libpi_hip.dll" #define __SYCL_UNIFIED_RUNTIME_PLUGIN_NAME "libpi_unified_runtime.dll" #define __SYCL_NATIVE_CPU_PLUGIN_NAME "libpi_native_cpu.dll" @@ -147,7 +145,6 @@ void preloadLibraries() { loadPlugin(__SYCL_OPENCL_PLUGIN_NAME); loadPlugin(__SYCL_LEVEL_ZERO_PLUGIN_NAME); loadPlugin(__SYCL_CUDA_PLUGIN_NAME); - loadPlugin(__SYCL_ESIMD_EMULATOR_PLUGIN_NAME); loadPlugin(__SYCL_HIP_PLUGIN_NAME); loadPlugin(__SYCL_UNIFIED_RUNTIME_PLUGIN_NAME); loadPlugin(__SYCL_NATIVE_CPU_PLUGIN_NAME); diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index d683f32d16892..7ef8ff587f0e2 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -261,7 +261,6 @@ set(SYCL_COMMON_SOURCES "sampler.cpp" "stream.cpp" "spirv_ops.cpp" - "esimd_emulator_device_interface.cpp" "$<$:detail/windows_pi.cpp>" "$<$,$>:detail/posix_pi.cpp>" ) diff --git a/sycl/source/backend.cpp b/sycl/source/backend.cpp index ed0539f266ee2..ee21740484af9 100644 --- a/sycl/source/backend.cpp +++ b/sycl/source/backend.cpp @@ -58,8 +58,6 @@ backend convertBackend(pi_platform_backend PiBackend) { return backend::ext_oneapi_cuda; case PI_EXT_PLATFORM_BACKEND_HIP: return backend::ext_oneapi_hip; - case PI_EXT_PLATFORM_BACKEND_ESIMD: - return backend::ext_intel_esimd_emulator; case PI_EXT_PLATFORM_BACKEND_NATIVE_CPU: return backend::ext_oneapi_native_cpu; } diff --git a/sycl/source/detail/config.cpp b/sycl/source/detail/config.cpp index f632e3c94c234..21ce89458835f 100644 --- a/sycl/source/detail/config.cpp +++ b/sycl/source/detail/config.cpp @@ -163,17 +163,15 @@ void dumpConfig() { // Array is used by SYCL_DEVICE_FILTER and SYCL_DEVICE_ALLOWLIST and // ONEAPI_DEVICE_SELECTOR -// TODO: Remove esimd_emulator in the next ABI breaking window. // TODO: host device type will be removed once sycl_ext_oneapi_filter_selector // is removed. -const std::array, 8> &getSyclBeMap() { - static const std::array, 8> SyclBeMap = { +const std::array, 7> &getSyclBeMap() { + static const std::array, 7> SyclBeMap = { {{"host", backend::host}, {"opencl", backend::opencl}, {"level_zero", backend::ext_oneapi_level_zero}, {"cuda", backend::ext_oneapi_cuda}, {"hip", backend::ext_oneapi_hip}, - {"esimd_emulator", backend::ext_intel_esimd_emulator}, {"native_cpu", backend::ext_oneapi_native_cpu}, {"*", backend::all}}}; return SyclBeMap; diff --git a/sycl/source/detail/config.hpp b/sycl/source/detail/config.hpp index efbdc81fb34fb..71ba0310a24e8 100644 --- a/sycl/source/detail/config.hpp +++ b/sycl/source/detail/config.hpp @@ -251,7 +251,7 @@ getSyclDeviceTypeMap() { // Array is used by SYCL_DEVICE_FILTER and SYCL_DEVICE_ALLOWLIST and // ONEAPI_DEVICE_SELECTOR -const std::array, 8> &getSyclBeMap(); +const std::array, 7> &getSyclBeMap(); // --------------------------------------- // ONEAPI_DEVICE_SELECTOR support diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 751acd385a77d..ef02558bba55a 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -363,7 +363,7 @@ bool device_impl::has(aspect Aspect) const { return is_accelerator(); case aspect::custom: return false; - // TODO: Implement this for FPGA and ESIMD emulators. + // TODO: Implement this for FPGA emulator. case aspect::emulated: return false; case aspect::host_debuggable: diff --git a/sycl/source/detail/pi.cpp b/sycl/source/detail/pi.cpp index e98c67ea30281..ee0ef9e26c35d 100644 --- a/sycl/source/detail/pi.cpp +++ b/sycl/source/detail/pi.cpp @@ -68,9 +68,6 @@ template void *getPluginOpaqueData(void *OpaqueDataParam) { return ReturnOpaqueData; } -template __SYCL_EXPORT void * -getPluginOpaqueData(void *); - namespace pi { static void initializePlugins(std::vector &Plugins); @@ -514,8 +511,6 @@ template const PluginPtr &getPlugin() { template __SYCL_EXPORT const PluginPtr &getPlugin(); template __SYCL_EXPORT const PluginPtr & getPlugin(); -template __SYCL_EXPORT const PluginPtr & -getPlugin(); template __SYCL_EXPORT const PluginPtr &getPlugin(); template __SYCL_EXPORT const PluginPtr &getPlugin(); diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index a164c455fed54..b71ce3d41882e 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2992,8 +2992,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { NDRDescT &NDRDesc = ExecKernel->MNDRDesc; std::vector &Args = ExecKernel->MArgs; - if (MQueue->is_host() || (MQueue->getDeviceImplPtr()->getBackend() == - backend::ext_intel_esimd_emulator)) { + if (MQueue->is_host()) { for (ArgDesc &Arg : Args) if (kernel_param_kind_t::kind_accessor == Arg.MType) { Requirement *Req = (Requirement *)(Arg.MPtr); @@ -3006,20 +3005,8 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { Plugin->call(RawEvents.size(), &RawEvents[0]); } - if (MQueue->is_host()) { - ExecKernel->MHostKernel->call(NDRDesc, - getEvent()->getHostProfilingInfo()); - } else { - assert(MQueue->getDeviceImplPtr()->getBackend() == - backend::ext_intel_esimd_emulator); - if (MEvent != nullptr) - MEvent->setHostEnqueueTime(); - MQueue->getPlugin()->call( - nullptr, - reinterpret_cast(ExecKernel->MHostKernel->getPtr()), - NDRDesc.Dims, &NDRDesc.GlobalOffset[0], &NDRDesc.GlobalSize[0], - &NDRDesc.LocalSize[0], 0, nullptr, nullptr); - } + ExecKernel->MHostKernel->call(NDRDesc, + getEvent()->getHostProfilingInfo()); return PI_SUCCESS; } diff --git a/sycl/source/device_selector.cpp b/sycl/source/device_selector.cpp index 2716ae920c1e1..e857dfdf539a4 100644 --- a/sycl/source/device_selector.cpp +++ b/sycl/source/device_selector.cpp @@ -179,13 +179,6 @@ __SYCL_EXPORT int default_selector_v(const device &dev) { // The default selector doesn't reject any devices. int Score = 0; - // we give the esimd_emulator device a score of zero to prevent it from being - // chosen among other devices. The same thing is done for gpu_selector_v - // below. - if (dev.get_backend() == backend::ext_intel_esimd_emulator) { - return 0; - } - traceDeviceSelector("info::device_type::automatic"); if (dev.is_gpu()) @@ -209,10 +202,6 @@ __SYCL_EXPORT int default_selector_v(const device &dev) { __SYCL_EXPORT int gpu_selector_v(const device &dev) { int Score = detail::REJECT_DEVICE_SCORE; - if (dev.get_backend() == backend::ext_intel_esimd_emulator) { - return 0; - } - traceDeviceSelector("info::device_type::gpu"); if (dev.is_gpu()) { Score = 1000; diff --git a/sycl/source/esimd_emulator_device_interface.cpp b/sycl/source/esimd_emulator_device_interface.cpp deleted file mode 100644 index b9f065dce8552..0000000000000 --- a/sycl/source/esimd_emulator_device_interface.cpp +++ /dev/null @@ -1,38 +0,0 @@ -//==--------------- esimd_emulator_device_interface.cpp --------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -/// \file esimdcpu_device_interface.cpp -/// Definitions for ESIMD_EMULATOR-device specific definitions. -/// -/// This interface is for ESIMD intrinsic emulation implementations -/// such as slm_access to access ESIMD_EMULATOR specific-support therefore -/// it has to be defined and shared as include directory -/// -/// \ingroup sycl_pi_esimd_emulator - -#include - -namespace sycl { -inline namespace _V1 { -namespace detail { - -struct ESIMDDeviceInterface { - uintptr_t version; - void *reserved; - ESIMDDeviceInterface(); -}; - -// TODO: this function is kept only for libsycl binary backward compatibility. -// Remove it when ABI breaking changes are allowed. -__SYCL_EXPORT ESIMDDeviceInterface *getESIMDDeviceInterface() { - return nullptr; -} - -} // namespace detail -} // namespace _V1 -} // namespace sycl diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 7cef9cc6ddd93..b16441e4ff146 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -279,40 +279,10 @@ event handler::finalize() { : nullptr); Result = PI_SUCCESS; } else { - if (MQueue->getDeviceImplPtr()->getBackend() == - backend::ext_intel_esimd_emulator) { - // Capture the host timestamp for profiling (queue time) - if (NewEvent != nullptr) - NewEvent->setHostEnqueueTime(); - [&](auto... Args) { - if (MImpl->MKernelIsCooperative) { - MQueue->getPlugin() - ->call< - detail::PiApiKind::piextEnqueueCooperativeKernelLaunch>( - Args...); - } else { - MQueue->getPlugin() - ->call(Args...); - } - }(/* queue */ - nullptr, - /* kernel */ - reinterpret_cast(MHostKernel->getPtr()), - /* work_dim */ - MNDRDesc.Dims, - /* global_work_offset */ &MNDRDesc.GlobalOffset[0], - /* global_work_size */ &MNDRDesc.GlobalSize[0], - /* local_work_size */ &MNDRDesc.LocalSize[0], - /* num_events_in_wait_list */ 0, - /* event_wait_list */ nullptr, - /* event */ nullptr); - Result = PI_SUCCESS; - } else { - Result = enqueueImpKernel( - MQueue, MNDRDesc, MArgs, KernelBundleImpPtr, MKernel, - MKernelName.c_str(), RawEvents, NewEvent, nullptr, - MImpl->MKernelCacheConfig, MImpl->MKernelIsCooperative); - } + Result = enqueueImpKernel( + MQueue, MNDRDesc, MArgs, KernelBundleImpPtr, MKernel, + MKernelName.c_str(), RawEvents, NewEvent, nullptr, + MImpl->MKernelCacheConfig, MImpl->MKernelIsCooperative); } #ifdef XPTI_ENABLE_INSTRUMENTATION // Emit signal only when event is created diff --git a/sycl/source/kernel_bundle.cpp b/sycl/source/kernel_bundle.cpp index 32ec35dbee837..aace54af59ac2 100644 --- a/sycl/source/kernel_bundle.cpp +++ b/sycl/source/kernel_bundle.cpp @@ -312,11 +312,6 @@ bool is_compatible(const std::vector &KernelIDs, const device &Dev) { const detail::RTDeviceBinaryImage &Img) { const char *Target = Img.getRawData().DeviceTargetSpec; auto BE = Dev.get_backend(); - // ESIMD emulator backend is only compatible with esimd kernels. - if (BE == sycl::backend::ext_intel_esimd_emulator) { - pi_device_binary_property Prop = Img.getProperty("isEsimdImage"); - return (Prop && (detail::DeviceBinaryProperty(Prop).asUint32() != 0)); - } if (strcmp(Target, __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64) == 0) { return (BE == sycl::backend::opencl || BE == sycl::backend::ext_oneapi_level_zero); diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index dec17d9f11fe8..f2c9e3b05d6a4 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3359,7 +3359,6 @@ _ZN4sycl3_V16detail18stringifyErrorCodeEi _ZN4sycl3_V16detail19convertChannelOrderE23_pi_image_channel_order _ZN4sycl3_V16detail19convertChannelOrderENS0_19image_channel_orderE _ZN4sycl3_V16detail19getImageElementSizeEhNS0_18image_channel_typeE -_ZN4sycl3_V16detail19getPluginOpaqueDataILNS0_7backendE5EEEPvS4_ _ZN4sycl3_V16detail19kernel_bundle_plain21ext_oneapi_get_kernelERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE _ZN4sycl3_V16detail19kernel_bundle_plain21ext_oneapi_has_kernelERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE _ZN4sycl3_V16detail19kernel_bundle_plain32set_specialization_constant_implEPKcPvm @@ -3384,7 +3383,6 @@ _ZN4sycl3_V16detail22has_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6devi _ZN4sycl3_V16detail22reduGetPreferredWGSizeERSt10shared_ptrINS1_10queue_implEEm _ZN4sycl3_V16detail22removeDuplicateDevicesERKSt6vectorINS0_6deviceESaIS3_EE _ZN4sycl3_V16detail23constructorNotificationEPvS2_NS0_6access6targetENS3_4modeERKNS1_13code_locationE -_ZN4sycl3_V16detail23getESIMDDeviceInterfaceEv _ZN4sycl3_V16detail24find_device_intersectionERKSt6vectorINS0_13kernel_bundleILNS0_12bundle_stateE1EEESaIS5_EE _ZN4sycl3_V16detail26isDeviceGlobalUsedInKernelEPKv _ZN4sycl3_V16detail27getPixelCoordLinearFiltModeENS0_3vecIfLi4EEENS0_15addressing_modeENS0_5rangeILi3EEERS3_ @@ -3400,7 +3398,6 @@ _ZN4sycl3_V16detail2pi9assertionEbPKc _ZN4sycl3_V16detail2pi9getPluginILNS0_7backendE1EEERKSt10shared_ptrINS1_6pluginEEv _ZN4sycl3_V16detail2pi9getPluginILNS0_7backendE2EEERKSt10shared_ptrINS1_6pluginEEv _ZN4sycl3_V16detail2pi9getPluginILNS0_7backendE3EEERKSt10shared_ptrINS1_6pluginEEv -_ZN4sycl3_V16detail2pi9getPluginILNS0_7backendE5EEERKSt10shared_ptrINS1_6pluginEEv _ZN4sycl3_V16detail2pi9getPluginILNS0_7backendE6EEERKSt10shared_ptrINS1_6pluginEEv _ZN4sycl3_V16detail30UnsampledImageAccessorBaseHost10getAccDataEv _ZN4sycl3_V16detail30UnsampledImageAccessorBaseHost6getPtrEv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index ceba4356bee64..e71f6fb06ad43 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -17,9 +17,7 @@ ??$getPlugin@$00@pi@detail@_V1@sycl@@YAAEBV?$shared_ptr@Vplugin@detail@_V1@sycl@@@std@@XZ ??$getPlugin@$01@pi@detail@_V1@sycl@@YAAEBV?$shared_ptr@Vplugin@detail@_V1@sycl@@@std@@XZ ??$getPlugin@$02@pi@detail@_V1@sycl@@YAAEBV?$shared_ptr@Vplugin@detail@_V1@sycl@@@std@@XZ -??$getPlugin@$04@pi@detail@_V1@sycl@@YAAEBV?$shared_ptr@Vplugin@detail@_V1@sycl@@@std@@XZ ??$getPlugin@$05@pi@detail@_V1@sycl@@YAAEBV?$shared_ptr@Vplugin@detail@_V1@sycl@@@std@@XZ -??$getPluginOpaqueData@$04@detail@_V1@sycl@@YAPEAXPEAX@Z ??$get_backend_info@Ubackend_version@device@info@_V1@sycl@@@context@_V1@sycl@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ ??$get_backend_info@Ubackend_version@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ ??$get_backend_info@Ubackend_version@device@info@_V1@sycl@@@event@_V1@sycl@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ @@ -4230,7 +4228,6 @@ ?getDeviceFromHandler@detail@_V1@sycl@@YA?AVdevice@23@AEAVhandler@23@@Z ?getDevices@image_impl@detail@_V1@sycl@@AEAA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@V?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@6@@Z ?getDirName@OSUtil@detail@_V1@sycl@@SA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEBD@Z -?getESIMDDeviceInterface@detail@_V1@sycl@@YAPEAUESIMDDeviceInterface@123@XZ ?getElemSize@AccessorBaseHost@detail@_V1@sycl@@QEBAIXZ ?getElementSize@LocalAccessorBaseHost@detail@_V1@sycl@@QEAAHXZ ?getElementSize@SampledImageAccessorBaseHost@detail@_V1@sycl@@QEBAHXZ diff --git a/sycl/unittests/allowlist/ParseAllowList.cpp b/sycl/unittests/allowlist/ParseAllowList.cpp index 543436a50f96e..f38dbe4495b81 100644 --- a/sycl/unittests/allowlist/ParseAllowList.cpp +++ b/sycl/unittests/allowlist/ParseAllowList.cpp @@ -157,8 +157,6 @@ TEST(ParseAllowListTests, CheckMissingClosedDoubleCurlyBracesAreHandled) { } } -// TODO: Remove esimd_emulator from ExpectedValue in the next ABI -// breaking window. TEST(ParseAllowListTests, CheckAllValidBackendNameValuesAreProcessed) { std::string AllowList; for (const auto &SyclBe : sycl::detail::getSyclBeMap()) { @@ -171,8 +169,8 @@ TEST(ParseAllowListTests, CheckAllValidBackendNameValuesAreProcessed) { sycl::detail::AllowListParsedT ExpectedValue{ {{"BackendName", "host"}}, {{"BackendName", "opencl"}}, {{"BackendName", "level_zero"}}, {{"BackendName", "cuda"}}, - {{"BackendName", "hip"}}, {{"BackendName", "esimd_emulator"}}, - {{"BackendName", "native_cpu"}}, {{"BackendName", "*"}}}; + {{"BackendName", "hip"}}, {{"BackendName", "native_cpu"}}, + {{"BackendName", "*"}}}; EXPECT_EQ(ExpectedValue, ActualValue); } diff --git a/sycl/unittests/pi/BackendString.hpp b/sycl/unittests/pi/BackendString.hpp index ea90e3ff3eb54..ff50480163208 100644 --- a/sycl/unittests/pi/BackendString.hpp +++ b/sycl/unittests/pi/BackendString.hpp @@ -12,8 +12,7 @@ inline std::string GetBackendString(const sycl::detail::PluginPtr &Plugin) { std::stringstream Str; for (sycl::backend Backend : {sycl::backend::opencl, sycl::backend::ext_oneapi_level_zero, - sycl::backend::ext_oneapi_cuda, sycl::backend::ext_intel_esimd_emulator, - sycl::backend::ext_oneapi_hip}) { + sycl::backend::ext_oneapi_cuda, sycl::backend::ext_oneapi_hip}) { if (Plugin->hasBackend(Backend)) { Str << Backend; } From 787670e350bdba8f74ccfaa4458d0f03234abfeb Mon Sep 17 00:00:00 2001 From: aelovikov-intel Date: Thu, 27 Jun 2024 07:56:40 -0700 Subject: [PATCH 03/40] [SYCL] Remove old ABI AccessorBaseHost ctors (#13386) --- sycl/include/sycl/accessor.hpp | 13 ------------- sycl/source/accessor.cpp | 26 ------------------------- sycl/test/abi/sycl_symbols_linux.dump | 4 ---- sycl/test/abi/sycl_symbols_windows.dump | 2 -- 4 files changed, 45 deletions(-) diff --git a/sycl/include/sycl/accessor.hpp b/sycl/include/sycl/accessor.hpp index af128aa312410..1b9371769d826 100644 --- a/sycl/include/sycl/accessor.hpp +++ b/sycl/include/sycl/accessor.hpp @@ -518,19 +518,6 @@ class __SYCL_EXPORT AccessorBaseHost { AccessorBaseHost(const AccessorImplPtr &Impl) : impl{Impl} {} public: - // TODO: the following function to be removed during next ABI break window - AccessorBaseHost(id<3> Offset, range<3> AccessRange, range<3> MemoryRange, - access::mode AccessMode, void *SYCLMemObject, int Dims, - int ElemSize, int OffsetInBytes = 0, - bool IsSubBuffer = false, - const property_list &PropertyList = {}); - // TODO: the following function to be removed during next ABI break window - AccessorBaseHost(id<3> Offset, range<3> AccessRange, range<3> MemoryRange, - access::mode AccessMode, void *SYCLMemObject, int Dims, - int ElemSize, bool IsPlaceH, int OffsetInBytes = 0, - bool IsSubBuffer = false, - const property_list &PropertyList = {}); - AccessorBaseHost(id<3> Offset, range<3> AccessRange, range<3> MemoryRange, access::mode AccessMode, void *SYCLMemObject, int Dims, int ElemSize, size_t OffsetInBytes = 0, diff --git a/sycl/source/accessor.cpp b/sycl/source/accessor.cpp index ebd8e83b3c665..d3f1557871d95 100644 --- a/sycl/source/accessor.cpp +++ b/sycl/source/accessor.cpp @@ -23,32 +23,6 @@ device getDeviceFromHandler(handler &cgh) { return cgh.MGraph->getDevice(); } -// TODO: the following function to be removed during next ABI break window -AccessorBaseHost::AccessorBaseHost(id<3> Offset, range<3> AccessRange, - range<3> MemoryRange, - access::mode AccessMode, void *SYCLMemObject, - int Dims, int ElemSize, int OffsetInBytes, - bool IsSubBuffer, - const property_list &PropertyList) { - impl = std::shared_ptr( - new AccessorImplHost(Offset, AccessRange, MemoryRange, AccessMode, - (detail::SYCLMemObjI *)SYCLMemObject, Dims, ElemSize, - false, OffsetInBytes, IsSubBuffer, PropertyList)); -} - -// TODO: the following function to be removed during next ABI break window -AccessorBaseHost::AccessorBaseHost(id<3> Offset, range<3> AccessRange, - range<3> MemoryRange, - access::mode AccessMode, void *SYCLMemObject, - int Dims, int ElemSize, bool IsPlaceH, - int OffsetInBytes, bool IsSubBuffer, - const property_list &PropertyList) { - impl = std::shared_ptr( - new AccessorImplHost(Offset, AccessRange, MemoryRange, AccessMode, - (detail::SYCLMemObjI *)SYCLMemObject, Dims, ElemSize, - IsPlaceH, OffsetInBytes, IsSubBuffer, PropertyList)); -} - AccessorBaseHost::AccessorBaseHost(id<3> Offset, range<3> AccessRange, range<3> MemoryRange, access::mode AccessMode, void *SYCLMemObject, diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index f2c9e3b05d6a4..75f36dce272ec 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3334,13 +3334,9 @@ _ZN4sycl3_V16detail16AccessorBaseHost14getAccessRangeEv _ZN4sycl3_V16detail16AccessorBaseHost14getMemoryRangeEv _ZN4sycl3_V16detail16AccessorBaseHost6getPtrEv _ZN4sycl3_V16detail16AccessorBaseHost9getOffsetEv -_ZN4sycl3_V16detail16AccessorBaseHostC1ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6access4modeEPviibibRKNS0_13property_listE _ZN4sycl3_V16detail16AccessorBaseHostC1ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6access4modeEPviibmbRKNS0_13property_listE -_ZN4sycl3_V16detail16AccessorBaseHostC1ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6access4modeEPviiibRKNS0_13property_listE _ZN4sycl3_V16detail16AccessorBaseHostC1ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6access4modeEPviimbRKNS0_13property_listE -_ZN4sycl3_V16detail16AccessorBaseHostC2ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6access4modeEPviibibRKNS0_13property_listE _ZN4sycl3_V16detail16AccessorBaseHostC2ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6access4modeEPviibmbRKNS0_13property_listE -_ZN4sycl3_V16detail16AccessorBaseHostC2ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6access4modeEPviiibRKNS0_13property_listE _ZN4sycl3_V16detail16AccessorBaseHostC2ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6access4modeEPviimbRKNS0_13property_listE _ZN4sycl3_V16detail16AccessorImplHost6resizeEm _ZN4sycl3_V16detail16AccessorImplHostD1Ev diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index e71f6fb06ad43..2df8166c573cf 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -464,9 +464,7 @@ ??0AccessorBaseHost@detail@_V1@sycl@@IEAA@AEBV?$shared_ptr@VAccessorImplHost@detail@_V1@sycl@@@std@@@Z ??0AccessorBaseHost@detail@_V1@sycl@@QEAA@$$QEAV0123@@Z ??0AccessorBaseHost@detail@_V1@sycl@@QEAA@AEBV0123@@Z -??0AccessorBaseHost@detail@_V1@sycl@@QEAA@V?$id@$02@23@V?$range@$02@23@1W4mode@access@23@PEAXHHH_NAEBVproperty_list@23@@Z ??0AccessorBaseHost@detail@_V1@sycl@@QEAA@V?$id@$02@23@V?$range@$02@23@1W4mode@access@23@PEAXHH_K_NAEBVproperty_list@23@@Z -??0AccessorBaseHost@detail@_V1@sycl@@QEAA@V?$id@$02@23@V?$range@$02@23@1W4mode@access@23@PEAXHH_NH4AEBVproperty_list@23@@Z ??0AccessorBaseHost@detail@_V1@sycl@@QEAA@V?$id@$02@23@V?$range@$02@23@1W4mode@access@23@PEAXHH_N_K4AEBVproperty_list@23@@Z ??0AccessorImplHost@detail@_V1@sycl@@QEAA@AEBV0123@@Z ??0AccessorImplHost@detail@_V1@sycl@@QEAA@V?$id@$02@23@V?$range@$02@23@1W4mode@access@23@PEAXHHH_NAEBVproperty_list@23@@Z From a9e7ece91ec2843a2b656832de6d594f970f700e Mon Sep 17 00:00:00 2001 From: aelovikov-intel Date: Thu, 27 Jun 2024 07:56:49 -0700 Subject: [PATCH 04/40] [SYCL] Remove old ABI entry points from detail::stream_impl (#13387) --- sycl/source/detail/stream_impl.cpp | 11 ----------- sycl/source/detail/stream_impl.hpp | 14 -------------- sycl/source/stream.cpp | 2 +- sycl/test/abi/sycl_symbols_linux.dump | 5 ----- sycl/test/abi/sycl_symbols_windows.dump | 4 ---- .../scheduler/StreamInitDependencyOnHost.cpp | 2 +- 6 files changed, 2 insertions(+), 36 deletions(-) diff --git a/sycl/source/detail/stream_impl.cpp b/sycl/source/detail/stream_impl.cpp index 4550b5cc26629..4ff380d7295c5 100644 --- a/sycl/source/detail/stream_impl.cpp +++ b/sycl/source/detail/stream_impl.cpp @@ -18,12 +18,6 @@ namespace sycl { inline namespace _V1 { namespace detail { -stream_impl::stream_impl(size_t BufferSize, size_t MaxStatementSize, - handler &CGH) - : stream_impl(BufferSize, MaxStatementSize, {}) { - (void)CGH; -} - stream_impl::stream_impl(size_t BufferSize, size_t MaxStatementSize, const property_list &PropList) : BufferSize_(BufferSize), MaxStatementSize_(MaxStatementSize), @@ -72,10 +66,6 @@ size_t stream_impl::get_work_item_buffer_size() const { return MaxStatementSize_; } -size_t stream_impl::get_size() const { return BufferSize_; } - -size_t stream_impl::get_max_statement_size() const { return MaxStatementSize_; } - void stream_impl::initStreamHost(QueueImplPtr Queue) { // Real size of full flush buffer is saved only in buffer_impl field of // FlushBuf object. @@ -138,7 +128,6 @@ void stream_impl::flush(const EventImplPtr &LeadEvent) { } } -void stream_impl::flush() { flush(nullptr); } } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/stream_impl.hpp b/sycl/source/detail/stream_impl.hpp index 823653016c162..ab9565e0d1641 100644 --- a/sycl/source/detail/stream_impl.hpp +++ b/sycl/source/detail/stream_impl.hpp @@ -24,10 +24,6 @@ namespace detail { class __SYCL_EXPORT stream_impl { public: - // TODO: This constructor is unused. - // To be removed when API/ABI changes are allowed. - stream_impl(size_t BufferSize, size_t MaxStatementSize, handler &CGH); - stream_impl(size_t BufferSize, size_t MaxStatementSize, const property_list &PropList); @@ -49,20 +45,10 @@ class __SYCL_EXPORT stream_impl { // LeadEvent as well as in queue LeadEvent associated with. void flush(const EventImplPtr &LeadEvent); - // Enqueue task to copy stream buffer to the host and print the contents - // Remove during next ABI breaking window - void flush(); - size_t size() const noexcept; size_t get_work_item_buffer_size() const; - // TODO: Unusued. Remove when ABI-break is allowed. - size_t get_size() const; - - // TODO: Unusued. Remove when ABI-break is allowed. - size_t get_max_statement_size() const; - template bool has_property() const noexcept { return PropList_.has_property(); } diff --git a/sycl/source/stream.cpp b/sycl/source/stream.cpp index d2578bbf1f5bc..740b6de2f8b2f 100644 --- a/sycl/source/stream.cpp +++ b/sycl/source/stream.cpp @@ -56,7 +56,7 @@ stream::stream(size_t BufferSize, size_t MaxStatementSize, handler &CGH, detail::getSyclObjImpl(GlobalFlushBuf)->PerWI = true; } -size_t stream::size() const noexcept { return impl->get_size(); } +size_t stream::size() const noexcept { return impl->size(); } size_t stream::get_work_item_buffer_size() const { return impl->get_work_item_buffer_size(); diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 75f36dce272ec..1087ae8300f69 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3236,11 +3236,8 @@ _ZN4sycl3_V16detail11stream_impl15accessGlobalBufERNS0_7handlerE _ZN4sycl3_V16detail11stream_impl18accessGlobalOffsetERNS0_7handlerE _ZN4sycl3_V16detail11stream_impl20accessGlobalFlushBufERNS0_7handlerE _ZN4sycl3_V16detail11stream_impl5flushERKSt10shared_ptrINS1_10event_implEE -_ZN4sycl3_V16detail11stream_impl5flushEv _ZN4sycl3_V16detail11stream_implC1EmmRKNS0_13property_listE -_ZN4sycl3_V16detail11stream_implC1EmmRNS0_7handlerE _ZN4sycl3_V16detail11stream_implC2EmmRKNS0_13property_listE -_ZN4sycl3_V16detail11stream_implC2EmmRNS0_7handlerE _ZN4sycl3_V16detail12buffer_plain14deleteAccPropsERKNS1_16PropWithDataKindE _ZN4sycl3_V16detail12buffer_plain14set_write_backEb _ZN4sycl3_V16detail12buffer_plain23constructorNotificationERKNS1_13code_locationEPvPKvS8_jjPm @@ -3849,10 +3846,8 @@ _ZNK4sycl3_V16detail11image_plain8get_sizeEv _ZNK4sycl3_V16detail11image_plain9get_countEv _ZNK4sycl3_V16detail11image_plain9get_pitchEv _ZNK4sycl3_V16detail11image_plain9get_rangeEv -_ZNK4sycl3_V16detail11stream_impl22get_max_statement_sizeEv _ZNK4sycl3_V16detail11stream_impl25get_work_item_buffer_sizeEv _ZNK4sycl3_V16detail11stream_impl4sizeEv -_ZNK4sycl3_V16detail11stream_impl8get_sizeEv _ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_3ext6oneapi4cuda8property7context19use_primary_contextEEET_v _ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_3ext6oneapi8property5queue12priority_lowEEET_v _ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_3ext6oneapi8property5queue13priority_highEEET_v diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 2df8166c573cf..bab063f592f99 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -685,7 +685,6 @@ ??0stream@_V1@sycl@@QEAA@_K0AEAVhandler@12@AEBVproperty_list@12@@Z ??0stream_impl@detail@_V1@sycl@@QEAA@$$QEAV0123@@Z ??0stream_impl@detail@_V1@sycl@@QEAA@AEBV0123@@Z -??0stream_impl@detail@_V1@sycl@@QEAA@_K0AEAVhandler@23@@Z ??0stream_impl@detail@_V1@sycl@@QEAA@_K0AEBVproperty_list@23@@Z ??0tls_code_loc_t@detail@_V1@sycl@@QEAA@AEBUcode_location@123@@Z ??0tls_code_loc_t@detail@_V1@sycl@@QEAA@XZ @@ -4188,7 +4187,6 @@ ?finalizeImpl@executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@IEAAXXZ ?find_device_intersection@detail@_V1@sycl@@YA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@AEBV?$vector@V?$kernel_bundle@$00@_V1@sycl@@V?$allocator@V?$kernel_bundle@$00@_V1@sycl@@@std@@@5@@Z ?flush@stream_impl@detail@_V1@sycl@@QEAAXAEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@std@@@Z -?flush@stream_impl@detail@_V1@sycl@@QEAAXXZ ?free@_V1@sycl@@YAXPEAXAEBVcontext@12@AEBUcode_location@detail@12@@Z ?free@_V1@sycl@@YAXPEAXAEBVqueue@12@AEBUcode_location@detail@12@@Z ?free_image_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@AEBVdevice@45@AEBVcontext@45@@Z @@ -4362,7 +4360,6 @@ ?get_kernel_ids@_V1@sycl@@YA?AV?$vector@Vkernel_id@_V1@sycl@@V?$allocator@Vkernel_id@_V1@sycl@@@std@@@std@@XZ ?get_kernel_ids@kernel_bundle_plain@detail@_V1@sycl@@QEBA?AV?$vector@Vkernel_id@_V1@sycl@@V?$allocator@Vkernel_id@_V1@sycl@@@std@@@std@@XZ ?get_max_statement_size@stream@_V1@sycl@@QEBA_KXZ -?get_max_statement_size@stream_impl@detail@_V1@sycl@@QEBA_KXZ ?get_mip_level_mem_handle@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@U612345@IAEBVdevice@45@AEBVcontext@45@@Z ?get_mip_level_mem_handle@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@U612345@IAEBVqueue@45@@Z ?get_mip_level_mem_handle@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AUimage_mem_handle@23456@I@Z @@ -4387,7 +4384,6 @@ ?get_root_nodes@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ ?get_size@image_plain@detail@_V1@sycl@@IEBA_KXZ ?get_size@stream@_V1@sycl@@QEBA_KXZ -?get_size@stream_impl@detail@_V1@sycl@@QEBA_KXZ ?get_specialization_constant_impl@kernel_bundle_plain@detail@_V1@sycl@@IEBAXPEBDPEAX@Z ?get_stream_mode@stream@_V1@sycl@@QEBA?AW4stream_manipulator@23@XZ ?get_successors@node@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ diff --git a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp index 18c0b3e1a8070..4564028110341 100644 --- a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp +++ b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp @@ -97,7 +97,7 @@ TEST_F(SchedulerTest, StreamInitDependencyOnHost) { // Emulating construction of stream object inside command group detail::StreamImplPtr StreamImpl = - std::make_shared(1024, 200, MockCGH); + std::make_shared(1024, 200, property_list{}); detail::GlobalBufAccessorT FlushBufAcc = StreamImpl->accessGlobalFlushBuf(MockCGH); MockCGH.addStream(StreamImpl); From 65fb79e54ad5531b3f9d51236a5c753acd0509e8 Mon Sep 17 00:00:00 2001 From: aelovikov-intel Date: Thu, 27 Jun 2024 07:56:59 -0700 Subject: [PATCH 05/40] [SYCL] Remove unused ctors in AccessorImplHost (#13388) --- sycl/source/detail/accessor_impl.hpp | 24 ------------------------ sycl/test/abi/sycl_symbols_windows.dump | 2 -- 2 files changed, 26 deletions(-) diff --git a/sycl/source/detail/accessor_impl.hpp b/sycl/source/detail/accessor_impl.hpp index 7011e6902f0bf..b564f28d7f334 100644 --- a/sycl/source/detail/accessor_impl.hpp +++ b/sycl/source/detail/accessor_impl.hpp @@ -41,30 +41,6 @@ class Command; class __SYCL_EXPORT AccessorImplHost { public: - // TODO: Remove when ABI break is allowed. - AccessorImplHost(id<3> Offset, range<3> AccessRange, range<3> MemoryRange, - access::mode AccessMode, void *SYCLMemObject, int Dims, - int ElemSize, int OffsetInBytes = 0, - bool IsSubBuffer = false, - const property_list &PropertyList = {}) - : MAccData(Offset, AccessRange, MemoryRange), MAccessMode(AccessMode), - MSYCLMemObj((detail::SYCLMemObjI *)SYCLMemObject), MDims(Dims), - MElemSize(ElemSize), MOffsetInBytes(OffsetInBytes), - MIsSubBuffer(IsSubBuffer), MPropertyList(PropertyList), - MIsPlaceH(false) {} - - // TODO: Remove when ABI break is allowed. - AccessorImplHost(id<3> Offset, range<3> AccessRange, range<3> MemoryRange, - access::mode AccessMode, void *SYCLMemObject, int Dims, - int ElemSize, bool IsPlaceH, int OffsetInBytes = 0, - bool IsSubBuffer = false, - const property_list &PropertyList = {}) - : MAccData(Offset, AccessRange, MemoryRange), MAccessMode(AccessMode), - MSYCLMemObj((detail::SYCLMemObjI *)SYCLMemObject), MDims(Dims), - MElemSize(ElemSize), MOffsetInBytes(OffsetInBytes), - MIsSubBuffer(IsSubBuffer), MPropertyList(PropertyList), - MIsPlaceH(IsPlaceH) {} - AccessorImplHost(id<3> Offset, range<3> AccessRange, range<3> MemoryRange, access::mode AccessMode, void *SYCLMemObject, int Dims, int ElemSize, size_t OffsetInBytes = 0, diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index bab063f592f99..7ab461d207211 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -467,9 +467,7 @@ ??0AccessorBaseHost@detail@_V1@sycl@@QEAA@V?$id@$02@23@V?$range@$02@23@1W4mode@access@23@PEAXHH_K_NAEBVproperty_list@23@@Z ??0AccessorBaseHost@detail@_V1@sycl@@QEAA@V?$id@$02@23@V?$range@$02@23@1W4mode@access@23@PEAXHH_N_K4AEBVproperty_list@23@@Z ??0AccessorImplHost@detail@_V1@sycl@@QEAA@AEBV0123@@Z -??0AccessorImplHost@detail@_V1@sycl@@QEAA@V?$id@$02@23@V?$range@$02@23@1W4mode@access@23@PEAXHHH_NAEBVproperty_list@23@@Z ??0AccessorImplHost@detail@_V1@sycl@@QEAA@V?$id@$02@23@V?$range@$02@23@1W4mode@access@23@PEAXHH_K_NAEBVproperty_list@23@@Z -??0AccessorImplHost@detail@_V1@sycl@@QEAA@V?$id@$02@23@V?$range@$02@23@1W4mode@access@23@PEAXHH_NH4AEBVproperty_list@23@@Z ??0AccessorImplHost@detail@_V1@sycl@@QEAA@V?$id@$02@23@V?$range@$02@23@1W4mode@access@23@PEAXHH_N_K4AEBVproperty_list@23@@Z ??0HostProfilingInfo@detail@_V1@sycl@@QEAA@XZ ??0LocalAccessorBaseHost@detail@_V1@sycl@@IEAA@AEBV?$shared_ptr@VLocalAccessorImplHost@detail@_V1@sycl@@@std@@@Z From b42948487a95a31eff98d6979db327cf0939b138 Mon Sep 17 00:00:00 2001 From: aelovikov-intel Date: Thu, 27 Jun 2024 07:57:20 -0700 Subject: [PATCH 06/40] [SYCL] Remove `sycl::errc_for` (#13903) It had been removed from the specification in https://github.com/KhronosGroup/SYCL-Docs/pull/431. Originally introduced in https://github.com/intel/llvm/pull/4298 the implementation has never been completed, so, while technically a breaking change, no customer code can be really using it. --- sycl/include/sycl/backend.hpp | 6 ------ sycl/include/sycl/exception.hpp | 3 --- sycl/test-e2e/Basic/exceptions-SYCL-2020.cpp | 17 ----------------- 3 files changed, 26 deletions(-) diff --git a/sycl/include/sycl/backend.hpp b/sycl/include/sycl/backend.hpp index 0f8ebd75c4d9b..efbc03d6a2ab6 100644 --- a/sycl/include/sycl/backend.hpp +++ b/sycl/include/sycl/backend.hpp @@ -59,10 +59,6 @@ namespace sycl { inline namespace _V1 { namespace detail { -// TODO each backend can have its own custom errc enumeration -// but the details for this are not fully specified yet -enum class backend_errc : unsigned int {}; - // Convert from PI backend to SYCL backend enum backend convertBackend(pi_platform_backend PiBackend); } // namespace detail @@ -74,8 +70,6 @@ template class backend_traits { template using return_type = typename detail::BackendReturn::type; - - using errc = detail::backend_errc; }; template diff --git a/sycl/include/sycl/exception.hpp b/sycl/include/sycl/exception.hpp index 472d5b335f141..ef5c3f2d396d2 100644 --- a/sycl/include/sycl/exception.hpp +++ b/sycl/include/sycl/exception.hpp @@ -10,7 +10,6 @@ // 4.9.2 Exception Class Interface -#include // for backend #include // for cl_int #include // for __SYCL2020_DEPRECATED #include // for __SYCL_EXPORT @@ -49,8 +48,6 @@ enum class errc : unsigned int { backend_mismatch = 14, }; -template using errc_for = typename backend_traits::errc; - /// Constructs an error code using e and sycl_category() __SYCL_EXPORT std::error_code make_error_code(sycl::errc E) noexcept; diff --git a/sycl/test-e2e/Basic/exceptions-SYCL-2020.cpp b/sycl/test-e2e/Basic/exceptions-SYCL-2020.cpp index 66be50e8eb181..c93f93f3eba69 100644 --- a/sycl/test-e2e/Basic/exceptions-SYCL-2020.cpp +++ b/sycl/test-e2e/Basic/exceptions-SYCL-2020.cpp @@ -80,23 +80,6 @@ int main() { static_assert(!std::is_error_condition_enum::value, "errc enum should not identify as error condition"); - // Test errc_for and backends. Should compile without complaint. - constexpr int EC = 1; - sycl::backend_traits::errc someOpenCLErrCode{EC}; - sycl::errc_for anotherOpenCLErrCode{EC}; - assert(someOpenCLErrCode == anotherOpenCLErrCode); - sycl::backend_traits::errc - someL0ErrCode{EC}; - sycl::errc_for anotherL0ErrCode{EC}; - assert(someL0ErrCode == anotherL0ErrCode); - sycl::backend_traits::errc someCUDAErrCode{ - EC}; - sycl::errc_for anotherCUDAErrCode{EC}; - assert(someCUDAErrCode == anotherCUDAErrCode); - sycl::backend_traits::errc someHIPErrCode{EC}; - sycl::errc_for anotherHIPErrCode{EC}; - assert(someHIPErrCode == anotherHIPErrCode); - std::cout << "OK" << std::endl; return 0; } From 7193c262b1ee032790615a525dfea5ecfc628f5b Mon Sep 17 00:00:00 2001 From: aelovikov-intel Date: Thu, 27 Jun 2024 07:59:51 -0700 Subject: [PATCH 07/40] [ABI-Break][SYCL] Remove collectives in the sub-group class (#13665) Re-lands https://github.com/intel/llvm/pull/13199 and reverts https://github.com/intel/llvm/pull/13464. --- sycl/include/sycl/sub_group.hpp | 119 -------------------------------- 1 file changed, 119 deletions(-) diff --git a/sycl/include/sycl/sub_group.hpp b/sycl/include/sycl/sub_group.hpp index ef44c750d9e14..c405f436a9fe4 100644 --- a/sycl/include/sycl/sub_group.hpp +++ b/sycl/include/sycl/sub_group.hpp @@ -643,125 +643,6 @@ struct sub_group { #endif } -#ifndef __INTEL_PREVIEW_BREAKING_CHANGES__ - /* --- deprecated collective functions --- */ - template - __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use " - "sycl::ext::oneapi::broadcast instead.") - EnableIfIsScalarArithmetic broadcast(T x, id<1> local_id) const { -#ifdef __SYCL_DEVICE_ONLY__ - return sycl::detail::spirv::GroupBroadcast(x, local_id); -#else - (void)x; - (void)local_id; - throw sycl::exception(make_error_code(errc::feature_not_supported), - "Sub-groups are not supported on host."); -#endif - } - - template - __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use " - "sycl::ext::oneapi::reduce instead.") - EnableIfIsScalarArithmetic reduce(T x, BinaryOperation op) const { -#ifdef __SYCL_DEVICE_ONLY__ - return sycl::detail::calc<__spv::GroupOperation::Reduce>( - typename sycl::detail::GroupOpTag::type(), *this, x, op); -#else - (void)x; - (void)op; - throw sycl::exception(make_error_code(errc::feature_not_supported), - "Sub-groups are not supported on host."); -#endif - } - - template - __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use " - "sycl::ext::oneapi::reduce instead.") - EnableIfIsScalarArithmetic reduce(T x, T init, BinaryOperation op) const { -#ifdef __SYCL_DEVICE_ONLY__ - return op(init, reduce(x, op)); -#else - (void)x; - (void)init; - (void)op; - throw sycl::exception(make_error_code(errc::feature_not_supported), - "Sub-groups are not supported on host."); -#endif - } - - template - __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use " - "sycl::ext::oneapi::exclusive_scan instead.") - EnableIfIsScalarArithmetic exclusive_scan(T x, BinaryOperation op) const { -#ifdef __SYCL_DEVICE_ONLY__ - return sycl::detail::calc<__spv::GroupOperation::ExclusiveScan>( - typename sycl::detail::GroupOpTag::type(), *this, x, op); -#else - (void)x; - (void)op; - throw sycl::exception(make_error_code(errc::feature_not_supported), - "Sub-groups are not supported on host."); -#endif - } - - template - __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use " - "sycl::ext::oneapi::exclusive_scan instead.") - EnableIfIsScalarArithmetic exclusive_scan(T x, T init, - BinaryOperation op) const { -#ifdef __SYCL_DEVICE_ONLY__ - if (get_local_id().get(0) == 0) { - x = op(init, x); - } - T scan = exclusive_scan(x, op); - if (get_local_id().get(0) == 0) { - scan = init; - } - return scan; -#else - (void)x; - (void)init; - (void)op; - throw sycl::exception(make_error_code(errc::feature_not_supported), - "Sub-groups are not supported on host."); -#endif - } - - template - __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use " - "sycl::ext::oneapi::inclusive_scan instead.") - EnableIfIsScalarArithmetic inclusive_scan(T x, BinaryOperation op) const { -#ifdef __SYCL_DEVICE_ONLY__ - return sycl::detail::calc<__spv::GroupOperation::InclusiveScan>( - typename sycl::detail::GroupOpTag::type(), *this, x, op); -#else - (void)x; - (void)op; - throw sycl::exception(make_error_code(errc::feature_not_supported), - "Sub-groups are not supported on host."); -#endif - } - - template - __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use " - "sycl::ext::oneapi::inclusive_scan instead.") - EnableIfIsScalarArithmetic inclusive_scan(T x, BinaryOperation op, - T init) const { -#ifdef __SYCL_DEVICE_ONLY__ - if (get_local_id().get(0) == 0) { - x = op(init, x); - } - return inclusive_scan(x, op); -#else - (void)x; - (void)op; - (void)init; - throw sycl::exception(make_error_code(errc::feature_not_supported), - "Sub-groups are not supported on host."); -#endif - } -#endif // __INTEL_PREVIEW_BREAKING_CHANGES__ - linear_id_type get_group_linear_range() const { #ifdef __SYCL_DEVICE_ONLY__ return static_cast(get_group_range()[0]); From 9c9432b78174e0f487813f9e10c1bb0b2b2a20de Mon Sep 17 00:00:00 2001 From: Arvind Sudarsanam Date: Thu, 27 Jun 2024 11:43:45 -0400 Subject: [PATCH 08/40] [SPIR-V Backend][llc] Invoke SPIR-V Backend for SPIR-V generation in SYCL offloading under an option (#14262) We have added an experimental option (off by default): -fsycl-use-spirv-backend-for-spirv-gen This will cause device compilation pipeline in SYCL offloading to invoke llc (with SPIR-V target) instead of llvm-spirv to perform SPIR-V code generation. This change will result in the SPIR-V backend being built by default. Also, if spirv-val is available, a sanity check of the generated SPIR-V code will be done and any issues will be reported as warnings to the user. Thanks --------- Signed-off-by: Arvind Sudarsanam --- .github/workflows/sycl-linux-build.yml | 3 +- .github/workflows/sycl-windows-build.yml | 3 +- clang/include/clang/Driver/Options.td | 4 ++ clang/lib/Driver/ToolChains/Clang.cpp | 69 ++++++++++++++++--- clang/test/Driver/sycl-spirv-backend.cpp | 6 ++ .../llc_emit_spirv_val_error_as_warning.ll | 38 ++++++++++ llvm/tools/llc/llc.cpp | 38 ++++++++++ 7 files changed, 149 insertions(+), 12 deletions(-) create mode 100644 clang/test/Driver/sycl-spirv-backend.cpp create mode 100644 llvm/test/tools/llc/llc_emit_spirv_val_error_as_warning.ll diff --git a/.github/workflows/sycl-linux-build.yml b/.github/workflows/sycl-linux-build.yml index 62c2eaa28bf1c..17dcc83aeb437 100644 --- a/.github/workflows/sycl-linux-build.yml +++ b/.github/workflows/sycl-linux-build.yml @@ -169,7 +169,8 @@ jobs: --cmake-opt=-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ --cmake-opt="-DLLVM_INSTALL_UTILS=ON" \ --cmake-opt="-DNATIVECPU_USE_OCK=Off" \ - --cmake-opt="-DSYCL_PI_TESTS=OFF" + --cmake-opt="-DSYCL_PI_TESTS=OFF" \ + --cmake-opt="-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=SPIRV" - name: Compile id: build run: cmake --build $GITHUB_WORKSPACE/build diff --git a/.github/workflows/sycl-windows-build.yml b/.github/workflows/sycl-windows-build.yml index 54ab197a792db..4bd537146bf31 100644 --- a/.github/workflows/sycl-windows-build.yml +++ b/.github/workflows/sycl-windows-build.yml @@ -92,7 +92,8 @@ jobs: --cmake-opt="-DCMAKE_INSTALL_PREFIX=%GITHUB_WORKSPACE%\install" ^ --cmake-opt="-DCMAKE_CXX_COMPILER_LAUNCHER=sccache" ^ --cmake-opt="-DCMAKE_C_COMPILER_LAUNCHER=sccache" ^ - --cmake-opt="-DLLVM_INSTALL_UTILS=ON" + --cmake-opt="-DLLVM_INSTALL_UTILS=ON" ^ + --cmake-opt="-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=SPIRV" - name: Build id: build shell: bash diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index e0c0c5a0899ad..12a1dd9e9dcd8 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4051,6 +4051,10 @@ def fsycl_libspirv_path_EQ : Joined<["-"], "fsycl-libspirv-path=">, Visibility<[ClangOption, CLOption, DXCOption, CC1Option]>, HelpText<"Path to libspirv library">; def fno_sycl_libspirv : Flag<["-"], "fno-sycl-libspirv">, Visibility<[ClangOption, CLOption, DXCOption]>, HelpText<"Disable check for libspirv">; +def fsycl_use_spirv_backend_for_spirv_gen : Flag<["-"], "fsycl-use-spirv-backend-for-spirv-gen">, + Visibility<[ClangOption, CLOption]>, Flags<[HelpHidden]>, HelpText<"Use the SPIR-V backend for SPIR-V code generation. " + "Has effect only for SPIR-based targets. It is off by default and " + "the SPIR-V LLVM Translator is used for SPIR-V code generation. (experimental)">; def fsycl_host_compiler_EQ : Joined<["-"], "fsycl-host-compiler=">, Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CLOption, DXCOption]>, HelpText<"Specify C++ compiler binary to perform host " diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index ba052e5fe3a87..976bc1a9be6cc 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -10416,6 +10416,47 @@ void OffloadDeps::ConstructJobMultipleOutputs(Compilation &C, constructJob(C, JA, Outputs, Inputs, TCArgs, LinkingOutput); } +// Utility function to gather all arguments for SPIR-V generation using the +// SPIR-V backend. This set of arguments is expected to get updated as we add +// more features/extensions to the SPIR-V backend. +static void getSPIRVBackendOpts(const llvm::opt::ArgList &TCArgs, + ArgStringList &BackendArgs) { + BackendArgs.push_back(TCArgs.MakeArgString("-filetype=obj")); + BackendArgs.push_back( + TCArgs.MakeArgString("-mtriple=spirv64-unknown-unknown")); + // TODO: Optimization level is currently forced to -O0 due to some testing + // issues. Update optimization level after testing issues are resolved. + BackendArgs.push_back(TCArgs.MakeArgString("-O0")); + BackendArgs.push_back( + TCArgs.MakeArgString("--avoid-spirv-capabilities=Shader")); + BackendArgs.push_back( + TCArgs.MakeArgString("--translator-compatibility-mode")); + + // TODO: There is some overlap between the lists of extensions in SPIR-V + // backend and SPIR-V Trnaslator). We will try to combine them when SPIR-V + // backdn is ready. + std::string ExtArg("--spirv-ext="); + std::string DefaultExtArg = + "+SPV_EXT_shader_atomic_float_add,+SPV_EXT_shader_atomic_float_min_max" + ",+SPV_KHR_no_integer_wrap_decoration,+SPV_KHR_float_controls" + ",+SPV_KHR_expect_assume,+SPV_KHR_linkonce_odr"; + std::string INTELExtArg = ",+SPV_INTEL_subgroups,+SPV_INTEL_function_pointers" + ",+SPV_INTEL_arbitrary_precision_integers" + ",+SPV_INTEL_variable_length_array"; + ExtArg = ExtArg + DefaultExtArg + INTELExtArg; + + // Other args + ExtArg += ",+SPV_INTEL_bfloat16_conversion" + ",+SPV_KHR_uniform_group_instructions" + ",+SPV_INTEL_optnone" + ",+SPV_KHR_subgroup_rotate" + ",+SPV_INTEL_usm_storage_classes" + ",+SPV_EXT_shader_atomic_float16_add" + ",+SPV_KHR_bit_instructions"; + + BackendArgs.push_back(TCArgs.MakeArgString(ExtArg)); +} + // Utility function to gather all llvm-spirv options. // Not dependent on target triple. static void getNonTripleBasedSPIRVTransOpts(Compilation &C, @@ -10521,6 +10562,7 @@ static void getTripleBasedSPIRVTransOpts(Compilation &C, } // Begin SPIRVTranslator +// TODO: Add a unique 'llc' JobAction for SPIR-V backends. void SPIRVTranslator::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, @@ -10536,17 +10578,22 @@ void SPIRVTranslator::ConstructJob(Compilation &C, const JobAction &JA, TranslatorArgs.push_back("-o"); TranslatorArgs.push_back(Output.getFilename()); + bool UseSPIRVBackend = + TCArgs.hasArg(options::OPT_fsycl_use_spirv_backend_for_spirv_gen); if (JA.isDeviceOffloading(Action::OFK_SYCL)) { const toolchains::SYCLToolChain &TC = static_cast(getToolChain()); - getNonTripleBasedSPIRVTransOpts(C, TCArgs, TranslatorArgs); - llvm::Triple Triple = TC.getTriple(); - getTripleBasedSPIRVTransOpts(C, TCArgs, Triple, TranslatorArgs); - - // Handle -Xspirv-translator - TC.TranslateTargetOpt( - Triple, TCArgs, TranslatorArgs, options::OPT_Xspirv_translator, - options::OPT_Xspirv_translator_EQ, JA.getOffloadingArch()); + if (UseSPIRVBackend) { + getSPIRVBackendOpts(TCArgs, TranslatorArgs); + } else { + getNonTripleBasedSPIRVTransOpts(C, TCArgs, TranslatorArgs); + llvm::Triple Triple = TC.getTriple(); + getTripleBasedSPIRVTransOpts(C, TCArgs, Triple, TranslatorArgs); + // Handle -Xspirv-translator + TC.TranslateTargetOpt( + Triple, TCArgs, TranslatorArgs, options::OPT_Xspirv_translator, + options::OPT_Xspirv_translator_EQ, JA.getOffloadingArch()); + } } for (auto I : Inputs) { std::string Filename(I.getFilename()); @@ -10561,8 +10608,10 @@ void SPIRVTranslator::ConstructJob(Compilation &C, const JobAction &JA, TranslatorArgs.push_back(C.getArgs().MakeArgString(Filename)); } - auto Cmd = std::make_unique(JA, *this, ResponseFileSupport::None(), - TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())), + auto ToolName = UseSPIRVBackend ? "llc" : getShortName(); + auto Cmd = std::make_unique( + JA, *this, ResponseFileSupport::None(), + TCArgs.MakeArgString(getToolChain().GetProgramPath(ToolName)), TranslatorArgs, std::nullopt); if (!ForeachArgs.empty()) { diff --git a/clang/test/Driver/sycl-spirv-backend.cpp b/clang/test/Driver/sycl-spirv-backend.cpp new file mode 100644 index 0000000000000..db159f6deafb5 --- /dev/null +++ b/clang/test/Driver/sycl-spirv-backend.cpp @@ -0,0 +1,6 @@ +/// +/// Tests for using SPIR-V backend for SYCL offloading +/// +// RUN: %clangxx -fsycl -fsycl-use-spirv-backend-for-spirv-gen -### %s 2>&1 | FileCheck %s + +// CHECK: llc{{.*}} "-filetype=obj" "-mtriple=spirv64-unknown-unknown" "-O0" "--avoid-spirv-capabilities=Shader" "--translator-compatibility-mode" "--spirv-ext= diff --git a/llvm/test/tools/llc/llc_emit_spirv_val_error_as_warning.ll b/llvm/test/tools/llc/llc_emit_spirv_val_error_as_warning.ll new file mode 100644 index 0000000000000..a054ef2876d8b --- /dev/null +++ b/llvm/test/tools/llc/llc_emit_spirv_val_error_as_warning.ll @@ -0,0 +1,38 @@ +// REQUIRES: spirv-val,system-linux + +; RUN: llc %s -filetype=obj -mtriple=spirv64-unknown-unknown -O0 --avoid-spirv-capabilities=Shader --translator-compatibility-mode --spirv-ext=+SPV_INTEL_function_pointers -o %t.spv 2>&1 | FileCheck -check-prefix=CHECK-WARNINGS %s + +; Check for spirv-val warnings. + +; CHECK-WARNINGS: llc: warning: SPIR-V validation started. +; CHECK-WARNINGS-DAG: error: line {{[0-9]+}}: ID '16[%_Z2f1i]' has not been defined +; CHECK-WARNINGS-DAG: llc: warning: SPIR-V validation failed. + +target triple = "spirv64-unknown-unknown" + +define dso_local i32 @_Z2f1i(i32 %0) { + %2 = add nsw i32 %0, 1 + ret i32 %2 +} + +define dso_local i32 @_Z2f2i(i32 %0) { + %2 = add nsw i32 %0, 2 + ret i32 %2 +} + +define dso_local i64 @_Z3runiiPi(i32 %0, i32 %1, ptr nocapture %2) local_unnamed_addr { + %4 = icmp slt i32 %0, 10 + br i1 %4, label %5, label %7 + +5: + %6 = add nsw i32 %1, 2 + store i32 %6, ptr %2, align 4 + br label %7 + +7: + %8 = phi <2 x i64> [ , %5 ], [ , %3 ] + %9 = extractelement <2 x i64> %8, i64 0 + %10 = extractelement <2 x i64> %8, i64 1 + %11 = add nsw i64 %9, %10 + ret i64 %11 +} diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp index b292f70ba89de..53fb0fb3750e4 100644 --- a/llvm/tools/llc/llc.cpp +++ b/llvm/tools/llc/llc.cpp @@ -44,7 +44,9 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/InitLLVM.h" +#include "llvm/Support/Path.h" #include "llvm/Support/PluginLoader.h" +#include "llvm/Support/Program.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/TimeProfiler.h" @@ -314,6 +316,21 @@ static std::unique_ptr GetOutputStream(const char *TargetName, return FDOut; } +std::string getMainExecutable(const char *Name) { + void *Ptr = (void *)(intptr_t)&getMainExecutable; + auto COWPath = sys::fs::getMainExecutable(Name, Ptr); + return sys::path::parent_path(COWPath).str(); +} + +Expected findProgram(StringRef Name, ArrayRef Paths) { + ErrorOr Path = sys::findProgramByName(Name, Paths); + if (!Path) + Path = sys::findProgramByName(Name); + if (!Path) + return ""; + return *Path; +} + // main - Entry point for the llc compiler. // int main(int argc, char **argv) { @@ -410,6 +427,27 @@ int main(int argc, char **argv) { if (RemarksFile) RemarksFile->keep(); + if (StringRef(OutputFilename).ends_with(".spv")) { + // An external tool (spirv-val) is used to validate the generated SPIR-V + // code. Github page: https://github.com/KhronosGroup/SPIRV-Tools + // Currently, this tool exists out-of-tree and it is the user's + // responsibility to make it available during the compilation process. + // TODO: Replace the tool invocation with an API library call when the tool + // is made available in-tree. + Expected SPIRVValPath = + findProgram("spirv-val", {getMainExecutable("spirv-val")}); + if (!SPIRVValPath || *SPIRVValPath == "") { + WithColor::warning(errs(), argv[0]) << "spirv-val not found.\n"; + return 0; + } + SmallVector CmdArgs; + CmdArgs.push_back(*SPIRVValPath); + CmdArgs.push_back(OutputFilename); + WithColor::warning(errs(), argv[0]) << "SPIR-V validation started.\n"; + if (sys::ExecuteAndWait(*SPIRVValPath, CmdArgs)) + WithColor::warning(errs(), argv[0]) << "SPIR-V validation failed.\n"; + return 0; + } return 0; } From a14689f5a1c3914bed154013a09bfb4f84ca604f Mon Sep 17 00:00:00 2001 From: aelovikov-intel Date: Thu, 27 Jun 2024 08:52:23 -0700 Subject: [PATCH 09/40] [SYCL] Remove unused ctors in buffer_impl (#13390) --- sycl/source/detail/buffer_impl.hpp | 16 ---------------- sycl/test/abi/sycl_symbols_windows.dump | 2 -- 2 files changed, 18 deletions(-) diff --git a/sycl/source/detail/buffer_impl.hpp b/sycl/source/detail/buffer_impl.hpp index af4da07ffd198..9389b760e7737 100644 --- a/sycl/source/detail/buffer_impl.hpp +++ b/sycl/source/detail/buffer_impl.hpp @@ -129,22 +129,6 @@ class __SYCL_EXPORT buffer_impl final : public SYCLMemObjT { : BaseT(MemObject, SyclContext, OwnNativeHandle, std::move(AvailableEvent), std::move(Allocator)) {} - // TODO: remove the following 2 constructors when it is allowed to break ABI. - buffer_impl(cl_mem MemObject, const context &SyclContext, - const size_t SizeInBytes, - std::unique_ptr Allocator, - event AvailableEvent) - : buffer_impl(pi::cast(MemObject), SyclContext, - SizeInBytes, std::move(Allocator), - std::move(AvailableEvent)) {} - - buffer_impl(pi_native_handle MemObject, const context &SyclContext, - const size_t SizeInBytes, - std::unique_ptr Allocator, - event AvailableEvent) - : BaseT(MemObject, SyclContext, SizeInBytes, std::move(AvailableEvent), - std::move(Allocator)) {} - void *allocateMem(ContextImplPtr Context, bool InitFromUserData, void *HostPtr, sycl::detail::pi::PiEvent &OutEventToWait) override; diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 7ab461d207211..ad246eeffe354 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -498,12 +498,10 @@ ??0buffer_impl@detail@_V1@sycl@@QEAA@AEBV?$function@$$A6AXPEAX@Z@std@@_K_KAEBVproperty_list@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@5@_N@Z ??0buffer_impl@detail@_V1@sycl@@QEAA@AEBV?$shared_ptr@$$CBX@std@@_K_KAEBVproperty_list@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@5@_N@Z ??0buffer_impl@detail@_V1@sycl@@QEAA@PEAU_cl_mem@@AEBVcontext@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@Vevent@23@@Z -??0buffer_impl@detail@_V1@sycl@@QEAA@PEAU_cl_mem@@AEBVcontext@23@_KV?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@Vevent@23@@Z ??0buffer_impl@detail@_V1@sycl@@QEAA@PEAX_K1AEBVproperty_list@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@@Z ??0buffer_impl@detail@_V1@sycl@@QEAA@PEBX_K1AEBVproperty_list@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@@Z ??0buffer_impl@detail@_V1@sycl@@QEAA@_K0AEBVproperty_list@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@@Z ??0buffer_impl@detail@_V1@sycl@@QEAA@_KAEBVcontext@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@_NVevent@23@@Z -??0buffer_impl@detail@_V1@sycl@@QEAA@_KAEBVcontext@23@_KV?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@Vevent@23@@Z ??0buffer_plain@detail@_V1@sycl@@IEAA@AEBV?$function@$$A6AXPEAX@Z@std@@_K_KAEBVproperty_list@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@5@_N@Z ??0buffer_plain@detail@_V1@sycl@@IEAA@AEBV?$shared_ptr@$$CBX@std@@_K_KAEBVproperty_list@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@5@_N@Z ??0buffer_plain@detail@_V1@sycl@@IEAA@AEBV?$shared_ptr@Vbuffer_impl@detail@_V1@sycl@@@std@@@Z From c0eab4b9f89b0345f3fbdb1af705c23b6494766f Mon Sep 17 00:00:00 2001 From: aelovikov-intel Date: Thu, 27 Jun 2024 09:01:01 -0700 Subject: [PATCH 10/40] [SYCL] Remove old ABI entry points from memory_manager.[ch]pp (#13385) --- .../source/detail/device_global_map_entry.cpp | 2 +- sycl/source/detail/memory_manager.cpp | 113 ------------------ sycl/source/detail/memory_manager.hpp | 77 ------------ sycl/test/abi/sycl_symbols_linux.dump | 11 -- sycl/test/abi/sycl_symbols_windows.dump | 11 -- 5 files changed, 1 insertion(+), 213 deletions(-) diff --git a/sycl/source/detail/device_global_map_entry.cpp b/sycl/source/detail/device_global_map_entry.cpp index d55bf31ff2e3a..9400037a86adf 100644 --- a/sycl/source/detail/device_global_map_entry.cpp +++ b/sycl/source/detail/device_global_map_entry.cpp @@ -81,7 +81,7 @@ DeviceGlobalUSMMem &DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM( sizeof(MDeviceGlobalPtr)), QueueImpl, MDeviceGlobalTSize, NewAlloc.MPtr, std::vector{}, - &InitEvent); + &InitEvent, nullptr); NewAlloc.MInitEvent = InitEvent; } diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index 840f95ea7a643..37ede74578ed7 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -803,23 +803,6 @@ void MemoryManager::copy(SYCLMemObjI *SYCLMemObj, void *SrcMem, } } -// TODO: This function will remain until ABI-breaking change -void MemoryManager::copy(SYCLMemObjI *SYCLMemObj, void *SrcMem, - QueueImplPtr SrcQueue, unsigned int DimSrc, - sycl::range<3> SrcSize, sycl::range<3> SrcAccessRange, - sycl::id<3> SrcOffset, unsigned int SrcElemSize, - void *DstMem, QueueImplPtr TgtQueue, - unsigned int DimDst, sycl::range<3> DstSize, - sycl::range<3> DstAccessRange, sycl::id<3> DstOffset, - unsigned int DstElemSize, - std::vector DepEvents, - sycl::detail::pi::PiEvent &OutEvent) { - MemoryManager::copy(SYCLMemObj, SrcMem, SrcQueue, DimSrc, SrcSize, - SrcAccessRange, SrcOffset, SrcElemSize, DstMem, TgtQueue, - DimDst, DstSize, DstAccessRange, DstOffset, DstElemSize, - DepEvents, OutEvent, nullptr); -} - void MemoryManager::fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue, size_t PatternSize, const char *Pattern, unsigned int Dim, sycl::range<3> MemRange, @@ -868,18 +851,6 @@ void MemoryManager::fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue, } } -// TODO: This function will remain until ABI-breaking change -void MemoryManager::fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue, - size_t PatternSize, const char *Pattern, - unsigned int Dim, sycl::range<3> Size, - sycl::range<3> Range, sycl::id<3> Offset, - unsigned int ElementSize, - std::vector DepEvents, - sycl::detail::pi::PiEvent &OutEvent) { - MemoryManager::fill(SYCLMemObj, Mem, Queue, PatternSize, Pattern, Dim, Size, - Range, Offset, ElementSize, DepEvents, OutEvent, nullptr); -} - void *MemoryManager::map(SYCLMemObjI *, void *Mem, QueueImplPtr Queue, access::mode AccessMode, unsigned int, sycl::range<3>, sycl::range<3> AccessRange, sycl::id<3> AccessOffset, @@ -973,15 +944,6 @@ void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr SrcQueue, DepEvents.data(), OutEvent); } -// TODO: This function will remain until ABI-breaking change -void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr SrcQueue, - size_t Len, void *DstMem, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent) { - MemoryManager::copy_usm(SrcMem, SrcQueue, Len, DstMem, DepEvents, OutEvent, - nullptr); -} - void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, int Pattern, std::vector DepEvents, @@ -1011,15 +973,6 @@ void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, DepEvents.data(), OutEvent); } -// TODO: This function will remain until ABI-breaking change -void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, - int Pattern, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent) { - MemoryManager::fill_usm(Mem, Queue, Length, Pattern, DepEvents, OutEvent, - nullptr); // OutEventImpl); -} - void MemoryManager::prefetch_usm( void *Mem, QueueImplPtr Queue, size_t Length, std::vector DepEvents, @@ -1036,14 +989,6 @@ void MemoryManager::prefetch_usm( DepEvents.size(), DepEvents.data(), OutEvent); } -// TODO: This function will remain until ABI-breaking change -void MemoryManager::prefetch_usm( - void *Mem, QueueImplPtr Queue, size_t Length, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent) { - MemoryManager::prefetch_usm(Mem, Queue, Length, DepEvents, OutEvent, nullptr); -} - void MemoryManager::advise_usm( const void *Mem, QueueImplPtr Queue, size_t Length, pi_mem_advice Advice, std::vector /*DepEvents*/, @@ -1059,15 +1004,6 @@ void MemoryManager::advise_usm( Length, Advice, OutEvent); } -// TODO: This function will remain until ABI-breaking change -void MemoryManager::advise_usm(const void *Mem, QueueImplPtr Queue, - size_t Length, pi_mem_advice Advice, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent) { - MemoryManager::advise_usm(Mem, Queue, Length, Advice, DepEvents, OutEvent, - nullptr); -} - void MemoryManager::copy_2d_usm( const void *SrcMem, size_t SrcPitch, QueueImplPtr Queue, void *DstMem, size_t DstPitch, size_t Width, size_t Height, @@ -1146,16 +1082,6 @@ void MemoryManager::copy_2d_usm( Queue->getHandleRef(), CopyEvents.size(), CopyEvents.data(), OutEvent); } -// TODO: This function will remain until ABI-breaking change -void MemoryManager::copy_2d_usm( - const void *SrcMem, size_t SrcPitch, QueueImplPtr Queue, void *DstMem, - size_t DstPitch, size_t Width, size_t Height, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent) { - MemoryManager::copy_2d_usm(SrcMem, SrcPitch, Queue, DstMem, DstPitch, Width, - Height, DepEvents, OutEvent, nullptr); -} - void MemoryManager::fill_2d_usm( void *DstMem, QueueImplPtr Queue, size_t Pitch, size_t Width, size_t Height, const std::vector &Pattern, @@ -1187,16 +1113,6 @@ void MemoryManager::fill_2d_usm( Width, Height, DepEvents.size(), DepEvents.data(), OutEvent); } -// TODO: This function will remain until ABI-breaking change -void MemoryManager::fill_2d_usm( - void *DstMem, QueueImplPtr Queue, size_t Pitch, size_t Width, size_t Height, - const std::vector &Pattern, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent) { - MemoryManager::fill_2d_usm(DstMem, Queue, Pitch, Width, Height, Pattern, - DepEvents, OutEvent, nullptr); -} - void MemoryManager::memset_2d_usm( void *DstMem, QueueImplPtr Queue, size_t Pitch, size_t Width, size_t Height, char Value, std::vector DepEvents, @@ -1228,15 +1144,6 @@ void MemoryManager::memset_2d_usm( Height, DepEvents.size(), DepEvents.data(), OutEvent); } -// TODO: This function will remain until ABI-breaking change -void MemoryManager::memset_2d_usm( - void *DstMem, QueueImplPtr Queue, size_t Pitch, size_t Width, size_t Height, - char Value, std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent) { - MemoryManager::memset_2d_usm(DstMem, Queue, Pitch, Width, Height, Value, - DepEvents, OutEvent, nullptr); -} - static void memcpyToDeviceGlobalUSM(QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry, @@ -1391,16 +1298,6 @@ void MemoryManager::copy_to_device_global( OutEvent, OutEventImpl); } -// TODO: This function will remain until ABI-breaking change -void MemoryManager::copy_to_device_global( - const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue, - size_t NumBytes, size_t Offset, const void *SrcMem, - const std::vector &DepEvents, - sycl::detail::pi::PiEvent *OutEvent) { - copy_to_device_global(DeviceGlobalPtr, IsDeviceImageScoped, Queue, NumBytes, - Offset, SrcMem, DepEvents, OutEvent, nullptr); -} - void MemoryManager::copy_from_device_global( const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue, size_t NumBytes, size_t Offset, void *DstMem, @@ -1424,16 +1321,6 @@ void MemoryManager::copy_from_device_global( DepEvents, OutEvent, OutEventImpl); } -// TODO: This function will remain until ABI-breaking change -void MemoryManager::copy_from_device_global( - const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue, - size_t NumBytes, size_t Offset, void *DstMem, - const std::vector &DepEvents, - sycl::detail::pi::PiEvent *OutEvent) { - copy_from_device_global(DeviceGlobalPtr, IsDeviceImageScoped, Queue, NumBytes, - Offset, DstMem, DepEvents, OutEvent, nullptr); -} - // Command buffer methods void MemoryManager::ext_oneapi_copyD2D_cmd_buffer( sycl::detail::ContextImplPtr Context, diff --git a/sycl/source/detail/memory_manager.hpp b/sycl/source/detail/memory_manager.hpp index 1d2800bf9dadc..805673d40d359 100644 --- a/sycl/source/detail/memory_manager.hpp +++ b/sycl/source/detail/memory_manager.hpp @@ -120,17 +120,6 @@ class __SYCL_EXPORT MemoryManager { sycl::detail::pi::PiEvent &OutEvent, const detail::EventImplPtr &OutEventImpl); - // TODO: This function will remain until ABI-breaking change - static void copy(SYCLMemObjI *SYCLMemObj, void *SrcMem, QueueImplPtr SrcQueue, - unsigned int DimSrc, sycl::range<3> SrcSize, - sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset, - unsigned int SrcElemSize, void *DstMem, - QueueImplPtr TgtQueue, unsigned int DimDst, - sycl::range<3> DstSize, sycl::range<3> DstAccessRange, - sycl::id<3> DstOffset, unsigned int DstElemSize, - std::vector DepEvents, - sycl::detail::pi::PiEvent &OutEvent); - static void fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue, size_t PatternSize, const char *Pattern, unsigned int Dim, sycl::range<3> Size, sycl::range<3> AccessRange, @@ -139,14 +128,6 @@ class __SYCL_EXPORT MemoryManager { sycl::detail::pi::PiEvent &OutEvent, const detail::EventImplPtr &OutEventImpl); - // TODO: This function will remain until ABI-breaking change - static void fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue, - size_t PatternSize, const char *Pattern, unsigned int Dim, - sycl::range<3> Size, sycl::range<3> AccessRange, - sycl::id<3> AccessOffset, unsigned int ElementSize, - std::vector DepEvents, - sycl::detail::pi::PiEvent &OutEvent); - static void *map(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue, access::mode AccessMode, unsigned int Dim, sycl::range<3> Size, sycl::range<3> AccessRange, @@ -165,46 +146,23 @@ class __SYCL_EXPORT MemoryManager { sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl); - // TODO: This function will remain until ABI-breaking change - static void copy_usm(const void *SrcMem, QueueImplPtr Queue, size_t Len, - void *DstMem, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent); - static void fill_usm(void *DstMem, QueueImplPtr Queue, size_t Len, int Pattern, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl); - // TODO: This function will remain until ABI-breaking change - static void fill_usm(void *DstMem, QueueImplPtr Queue, size_t Len, - int Pattern, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent); - static void prefetch_usm(void *Ptr, QueueImplPtr Queue, size_t Len, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl); - // TODO: This function will remain until ABI-breaking change - static void prefetch_usm(void *Ptr, QueueImplPtr Queue, size_t Len, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent); - static void advise_usm(const void *Ptr, QueueImplPtr Queue, size_t Len, pi_mem_advice Advice, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl); - // TODO: This function will remain until ABI-breaking change - static void advise_usm(const void *Ptr, QueueImplPtr Queue, size_t Len, - pi_mem_advice Advice, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent); - static void copy_2d_usm(const void *SrcMem, size_t SrcPitch, QueueImplPtr Queue, void *DstMem, size_t DstPitch, size_t Width, size_t Height, @@ -212,13 +170,6 @@ class __SYCL_EXPORT MemoryManager { sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl); - // TODO: This function will remain until ABI-breaking change - static void copy_2d_usm(const void *SrcMem, size_t SrcPitch, - QueueImplPtr Queue, void *DstMem, size_t DstPitch, - size_t Width, size_t Height, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent); - static void fill_2d_usm(void *DstMem, QueueImplPtr Queue, size_t Pitch, size_t Width, size_t Height, const std::vector &Pattern, @@ -226,25 +177,12 @@ class __SYCL_EXPORT MemoryManager { sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl); - // TODO: This function will remain until ABI-breaking change - static void fill_2d_usm(void *DstMem, QueueImplPtr Queue, size_t Pitch, - size_t Width, size_t Height, - const std::vector &Pattern, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent); - static void memset_2d_usm(void *DstMem, QueueImplPtr Queue, size_t Pitch, size_t Width, size_t Height, char Value, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl); - // TODO: This function will remain until ABI-breaking change - static void memset_2d_usm(void *DstMem, QueueImplPtr Queue, size_t Pitch, - size_t Width, size_t Height, char Value, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent); - static void copy_to_device_global(const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue, size_t NumBytes, size_t Offset, @@ -253,14 +191,6 @@ class __SYCL_EXPORT MemoryManager { sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl); - // TODO: This function will remain until ABI-breaking change - static void - copy_to_device_global(const void *DeviceGlobalPtr, bool IsDeviceImageScoped, - QueueImplPtr Queue, size_t NumBytes, size_t Offset, - const void *SrcMem, - const std::vector &DepEvents, - sycl::detail::pi::PiEvent *OutEvent); - static void copy_from_device_global( const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue, size_t NumBytes, size_t Offset, void *DstMem, @@ -268,13 +198,6 @@ class __SYCL_EXPORT MemoryManager { sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl); - // TODO: This function will remain until ABI-breaking change - static void copy_from_device_global( - const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue, - size_t NumBytes, size_t Offset, void *DstMem, - const std::vector &DepEvents, - sycl::detail::pi::PiEvent *OutEvent); - // Command buffer extension methods static void ext_oneapi_copyD2D_cmd_buffer( sycl::detail::ContextImplPtr Context, diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 1087ae8300f69..2ff3670ba94df 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3266,15 +3266,10 @@ _ZN4sycl3_V16detail12sampler_implC2ENS0_29coordinate_normalization_modeENS0_15ad _ZN4sycl3_V16detail12sampler_implC2EP11_cl_samplerRKNS0_7contextE _ZN4sycl3_V16detail12sampler_implD1Ev _ZN4sycl3_V16detail12sampler_implD2Ev -_ZN4sycl3_V16detail13MemoryManager10advise_usmEPKvSt10shared_ptrINS1_10queue_implEEm14_pi_mem_adviceSt6vectorIP9_pi_eventSaISB_EEPSB_ _ZN4sycl3_V16detail13MemoryManager10advise_usmEPKvSt10shared_ptrINS1_10queue_implEEm14_pi_mem_adviceSt6vectorIP9_pi_eventSaISB_EEPSB_RKS5_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager11copy_2d_usmEPKvmSt10shared_ptrINS1_10queue_implEEPvmmmSt6vectorIP9_pi_eventSaISB_EEPSB_ _ZN4sycl3_V16detail13MemoryManager11copy_2d_usmEPKvmSt10shared_ptrINS1_10queue_implEEPvmmmSt6vectorIP9_pi_eventSaISB_EEPSB_RKS5_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager11fill_2d_usmEPvSt10shared_ptrINS1_10queue_implEEmmmRKSt6vectorIcSaIcEES7_IP9_pi_eventSaISD_EEPSD_ _ZN4sycl3_V16detail13MemoryManager11fill_2d_usmEPvSt10shared_ptrINS1_10queue_implEEmmmRKSt6vectorIcSaIcEES7_IP9_pi_eventSaISD_EEPSD_RKS4_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager12prefetch_usmEPvSt10shared_ptrINS1_10queue_implEEmSt6vectorIP9_pi_eventSaIS9_EEPS9_ _ZN4sycl3_V16detail13MemoryManager12prefetch_usmEPvSt10shared_ptrINS1_10queue_implEEmSt6vectorIP9_pi_eventSaIS9_EEPS9_RKS4_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager13memset_2d_usmEPvSt10shared_ptrINS1_10queue_implEEmmmcSt6vectorIP9_pi_eventSaIS9_EEPS9_ _ZN4sycl3_V16detail13MemoryManager13memset_2d_usmEPvSt10shared_ptrINS1_10queue_implEEmmmcSt6vectorIP9_pi_eventSaIS9_EEPS9_RKS4_INS1_10event_implEE _ZN4sycl3_V16detail13MemoryManager13releaseMemObjESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEPvS8_ _ZN4sycl3_V16detail13MemoryManager16allocateMemImageESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEPvbmRK14_pi_image_descRK16_pi_image_formatRKS3_INS1_10event_implEERKS5_RKNS0_13property_listERP9_pi_event @@ -3284,9 +3279,7 @@ _ZN4sycl3_V16detail13MemoryManager19allocateImageObjectESt10shared_ptrINS1_12con _ZN4sycl3_V16detail13MemoryManager19copy_image_bindlessEPvSt10shared_ptrINS1_10queue_implEES3_RK14_pi_image_descRK16_pi_image_format20_pi_image_copy_flags22pi_image_offset_structSE_22pi_image_region_structSF_RKSt6vectorIP9_pi_eventSaISI_EEPSI_ _ZN4sycl3_V16detail13MemoryManager20allocateBufferObjectESt10shared_ptrINS1_12context_implEEPvbmRKNS0_13property_listE _ZN4sycl3_V16detail13MemoryManager20allocateMemSubBufferESt10shared_ptrINS1_12context_implEEPvmmNS0_5rangeILi3EEESt6vectorIS3_INS1_10event_implEESaISB_EERP9_pi_event -_ZN4sycl3_V16detail13MemoryManager21copy_to_device_globalEPKvbSt10shared_ptrINS1_10queue_implEEmmS4_RKSt6vectorIP9_pi_eventSaISA_EEPSA_ _ZN4sycl3_V16detail13MemoryManager21copy_to_device_globalEPKvbSt10shared_ptrINS1_10queue_implEEmmS4_RKSt6vectorIP9_pi_eventSaISA_EEPSA_RKS5_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager23copy_from_device_globalEPKvbSt10shared_ptrINS1_10queue_implEEmmPvRKSt6vectorIP9_pi_eventSaISB_EEPSB_ _ZN4sycl3_V16detail13MemoryManager23copy_from_device_globalEPKvbSt10shared_ptrINS1_10queue_implEEmmPvRKSt6vectorIP9_pi_eventSaISB_EEPSB_RKS5_INS1_10event_implEE _ZN4sycl3_V16detail13MemoryManager24allocateInteropMemObjectESt10shared_ptrINS1_12context_implEEPvRKS3_INS1_10event_implEERKS5_RKNS0_13property_listERP9_pi_event _ZN4sycl3_V16detail13MemoryManager26ext_oneapi_fill_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPNS1_11SYCLMemObjIEPvmPKcjNS0_5rangeILi3EEESE_NS0_2idILi3EEEjSt6vectorIjSaIjEEPj @@ -3298,16 +3291,12 @@ _ZN4sycl3_V16detail13MemoryManager30ext_oneapi_fill_usm_cmd_bufferESt10shared_pt _ZN4sycl3_V16detail13MemoryManager32ext_oneapi_advise_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPKvm14_pi_mem_adviceSt6vectorIjSaIjEEPj _ZN4sycl3_V16detail13MemoryManager34ext_oneapi_prefetch_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPvmSt6vectorIjSaIjEEPj _ZN4sycl3_V16detail13MemoryManager3mapEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEENS0_6access4modeEjNS0_5rangeILi3EEESC_NS0_2idILi3EEEjSt6vectorIP9_pi_eventSaISH_EERSH_ -_ZN4sycl3_V16detail13MemoryManager4copyEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEEjNS0_5rangeILi3EEESA_NS0_2idILi3EEEjS5_S8_jSA_SA_SC_jSt6vectorIP9_pi_eventSaISF_EERSF_ _ZN4sycl3_V16detail13MemoryManager4copyEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEEjNS0_5rangeILi3EEESA_NS0_2idILi3EEEjS5_S8_jSA_SA_SC_jSt6vectorIP9_pi_eventSaISF_EERSF_RKS6_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager4fillEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEEmPKcjNS0_5rangeILi3EEESC_NS0_2idILi3EEEjSt6vectorIP9_pi_eventSaISH_EERSH_ _ZN4sycl3_V16detail13MemoryManager4fillEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEEmPKcjNS0_5rangeILi3EEESC_NS0_2idILi3EEEjSt6vectorIP9_pi_eventSaISH_EERSH_RKS6_INS1_10event_implEE _ZN4sycl3_V16detail13MemoryManager5unmapEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEES5_St6vectorIP9_pi_eventSaISB_EERSB_ _ZN4sycl3_V16detail13MemoryManager7releaseESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEPvSt6vectorIS3_INS1_10event_implEESaISB_EERP9_pi_event _ZN4sycl3_V16detail13MemoryManager8allocateESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEbPvSt6vectorIS3_INS1_10event_implEESaISB_EERP9_pi_event -_ZN4sycl3_V16detail13MemoryManager8copy_usmEPKvSt10shared_ptrINS1_10queue_implEEmPvSt6vectorIP9_pi_eventSaISB_EEPSB_ _ZN4sycl3_V16detail13MemoryManager8copy_usmEPKvSt10shared_ptrINS1_10queue_implEEmPvSt6vectorIP9_pi_eventSaISB_EEPSB_RKS5_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager8fill_usmEPvSt10shared_ptrINS1_10queue_implEEmiSt6vectorIP9_pi_eventSaIS9_EEPS9_ _ZN4sycl3_V16detail13MemoryManager8fill_usmEPvSt10shared_ptrINS1_10queue_implEEmiSt6vectorIP9_pi_eventSaIS9_EEPS9_RKS4_INS1_10event_implEE _ZN4sycl3_V16detail13host_pipe_map3addEPKvPKc _ZN4sycl3_V16detail13lgamma_r_implENS1_9half_impl4halfEPi diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index ad246eeffe354..45efa2732ec3a 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -3945,7 +3945,6 @@ ?addOrReplaceAccessorProperties@buffer_plain@detail@_V1@sycl@@IEAAXAEBVproperty_list@34@@Z ?addReduction@handler@_V1@sycl@@AEAAXAEBV?$shared_ptr@$$CBX@std@@@Z ?addStream@handler@_V1@sycl@@AEAAXAEBV?$shared_ptr@Vstream_impl@detail@_V1@sycl@@@std@@@Z -?advise_usm@MemoryManager@detail@_V1@sycl@@SAXPEBXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KW4_pi_mem_advice@@V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z ?advise_usm@MemoryManager@detail@_V1@sycl@@SAXPEBXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KW4_pi_mem_advice@@V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z ?alignedAlloc@OSUtil@detail@_V1@sycl@@SAPEAX_K0@Z ?alignedFree@OSUtil@detail@_V1@sycl@@SAXPEAX@Z @@ -4015,16 +4014,11 @@ ?convertChannelOrder@detail@_V1@sycl@@YA?AW4image_channel_order@23@W4_pi_image_channel_order@@@Z ?convertChannelType@detail@_V1@sycl@@YA?AW4_pi_image_channel_type@@W4image_channel_type@23@@Z ?convertChannelType@detail@_V1@sycl@@YA?AW4image_channel_type@23@W4_pi_image_channel_type@@@Z -?copy@MemoryManager@detail@_V1@sycl@@SAXPEAVSYCLMemObjI@234@PEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@IV?$range@$02@34@3V?$id@$02@34@I12I334IV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@7@AEAPEAU_pi_event@@@Z ?copy@MemoryManager@detail@_V1@sycl@@SAXPEAVSYCLMemObjI@234@PEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@IV?$range@$02@34@3V?$id@$02@34@I12I334IV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@7@AEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@7@@Z -?copy_2d_usm@MemoryManager@detail@_V1@sycl@@SAXPEBX_KV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@PEAX111V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z ?copy_2d_usm@MemoryManager@detail@_V1@sycl@@SAXPEBX_KV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@PEAX111V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z -?copy_from_device_global@MemoryManager@detail@_V1@sycl@@SAXPEBX_NV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K3PEAXAEBV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z ?copy_from_device_global@MemoryManager@detail@_V1@sycl@@SAXPEBX_NV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K3PEAXAEBV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z ?copy_image_bindless@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@0AEBU_pi_image_desc@@AEBU_pi_image_format@@W4_pi_image_copy_flags@@Upi_image_offset_struct@@5Upi_image_region_struct@@6AEBV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z -?copy_to_device_global@MemoryManager@detail@_V1@sycl@@SAXPEBX_NV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K30AEBV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z ?copy_to_device_global@MemoryManager@detail@_V1@sycl@@SAXPEBX_NV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K30AEBV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z -?copy_usm@MemoryManager@detail@_V1@sycl@@SAXPEBXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KPEAXV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z ?copy_usm@MemoryManager@detail@_V1@sycl@@SAXPEBXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KPEAXV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z ?cpu_selector_v@_V1@sycl@@YAHAEBVdevice@12@@Z ?create_image@experimental@oneapi@ext@_V1@sycl@@YA?AUsampled_image_handle@12345@AEAVimage_mem@12345@AEBUbindless_image_sampler@12345@AEBUimage_descriptor@12345@AEBVdevice@45@AEBVcontext@45@@Z @@ -4172,11 +4166,8 @@ ?ext_oneapi_wait_external_semaphore@queue@_V1@sycl@@QEAA?AVevent@23@Uinterop_semaphore_handle@experimental@oneapi@ext@23@_KV423@AEBUcode_location@detail@23@@Z ?extractArgsAndReqs@handler@_V1@sycl@@AEAAXXZ ?extractArgsAndReqsFromLambda@handler@_V1@sycl@@AEAAXPEAD_KPEBUkernel_param_desc_t@detail@23@_N@Z -?fill@MemoryManager@detail@_V1@sycl@@SAXPEAVSYCLMemObjI@234@PEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KPEBDIV?$range@$02@34@5V?$id@$02@34@IV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@7@AEAPEAU_pi_event@@@Z ?fill@MemoryManager@detail@_V1@sycl@@SAXPEAVSYCLMemObjI@234@PEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KPEBDIV?$range@$02@34@5V?$id@$02@34@IV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@7@AEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@7@@Z -?fill_2d_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K22AEBV?$vector@DV?$allocator@D@std@@@6@V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z ?fill_2d_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K22AEBV?$vector@DV?$allocator@D@std@@@6@V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z -?fill_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KHV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z ?fill_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KHV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z ?finalize@handler@_V1@sycl@@AEAA?AVevent@23@XZ ?finalize@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$command_graph@$00@34567@AEBVproperty_list@67@@Z @@ -4514,7 +4505,6 @@ ?memset@queue@_V1@sycl@@QEAA?AVevent@23@PEAXH_KAEBUcode_location@detail@23@@Z ?memset@queue@_V1@sycl@@QEAA?AVevent@23@PEAXH_KAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z ?memset@queue@_V1@sycl@@QEAA?AVevent@23@PEAXH_KV423@AEBUcode_location@detail@23@@Z -?memset_2d_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K22DV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z ?memset_2d_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K22DV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z ?message@SYCLCategory@detail@_V1@sycl@@UEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@Z ?modf_impl@detail@_V1@sycl@@YA?AVhalf@half_impl@123@V45123@PEAV45123@@Z @@ -4534,7 +4524,6 @@ ?prefetch@queue@_V1@sycl@@QEAA?AVevent@23@PEBX_KAEBUcode_location@detail@23@@Z ?prefetch@queue@_V1@sycl@@QEAA?AVevent@23@PEBX_KAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z ?prefetch@queue@_V1@sycl@@QEAA?AVevent@23@PEBX_KV423@AEBUcode_location@detail@23@@Z -?prefetch_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z ?prefetch_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z ?prepare_for_device_copy@experimental@oneapi@ext@_V1@sycl@@YAXPEBX_KAEBVcontext@45@@Z ?prepare_for_device_copy@experimental@oneapi@ext@_V1@sycl@@YAXPEBX_KAEBVqueue@45@@Z From 58e60a5681795145486c1b224991fcbb331075ce Mon Sep 17 00:00:00 2001 From: David Garcia Orozco Date: Thu, 27 Jun 2024 12:04:38 -0400 Subject: [PATCH 11/40] [SYCL][E2E] Use callable device selector in `FilterSelector/select_device_level_zero.cpp` (#14310) Fixing test that was missed in #14162 --- .../FilterSelector/select_device_level_zero.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/sycl/test-e2e/FilterSelector/select_device_level_zero.cpp b/sycl/test-e2e/FilterSelector/select_device_level_zero.cpp index da3d34df48f37..5d962d2a51948 100644 --- a/sycl/test-e2e/FilterSelector/select_device_level_zero.cpp +++ b/sycl/test-e2e/FilterSelector/select_device_level_zero.cpp @@ -22,30 +22,26 @@ int main() { } { - default_selector ds; - device d = ds.select_device(); + device d(default_selector_v); string name = d.get_platform().get_info(); assert(name.find("Level-Zero") != string::npos); } { - gpu_selector gs; - device d = gs.select_device(); + device d(gpu_selector_v); string name = d.get_platform().get_info(); assert(name.find("Level-Zero") != string::npos); } { - cpu_selector cs; try { - device d = cs.select_device(); + device d(cpu_selector_v); cerr << "CPU device is found in error: " << d.is_cpu() << std::endl; return -1; } catch (...) { } } { - accelerator_selector as; try { - device d = as.select_device(); + device d(accelerator_selector_v); cerr << "ACC device is found in error: " << d.is_accelerator() << std::endl; } catch (...) { From 666cf66258363ba1c416d054cab38c85c04fe389 Mon Sep 17 00:00:00 2001 From: Ian Li Date: Thu, 27 Jun 2024 13:33:34 -0400 Subject: [PATCH 12/40] [SYCL] Move non device code checks outside of check_device_code (#14175) In the same vein as #14174, this PR moves test cases that do not check device code outside of the `check_device_code` folder. This is a separate PR as #14174 because the code is technically owned by another team. --- .../matrix => matrix/cuda}/matrix-nvptx-compile-query-test.cpp | 0 .../hip/matrix => matrix/hip}/compile-query-hip-gfx90a.cpp | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename sycl/test/{check_device_code/cuda/matrix => matrix/cuda}/matrix-nvptx-compile-query-test.cpp (100%) rename sycl/test/{check_device_code/hip/matrix => matrix/hip}/compile-query-hip-gfx90a.cpp (100%) diff --git a/sycl/test/check_device_code/cuda/matrix/matrix-nvptx-compile-query-test.cpp b/sycl/test/matrix/cuda/matrix-nvptx-compile-query-test.cpp similarity index 100% rename from sycl/test/check_device_code/cuda/matrix/matrix-nvptx-compile-query-test.cpp rename to sycl/test/matrix/cuda/matrix-nvptx-compile-query-test.cpp diff --git a/sycl/test/check_device_code/hip/matrix/compile-query-hip-gfx90a.cpp b/sycl/test/matrix/hip/compile-query-hip-gfx90a.cpp similarity index 100% rename from sycl/test/check_device_code/hip/matrix/compile-query-hip-gfx90a.cpp rename to sycl/test/matrix/hip/compile-query-hip-gfx90a.cpp From c60a12ef9b1aa721032967213a539ea435a95a8d Mon Sep 17 00:00:00 2001 From: Michael Toguchi Date: Thu, 27 Jun 2024 11:03:48 -0700 Subject: [PATCH 13/40] [Driver][SYCL] Restrict --coverage for SYCL device (#14312) When using -fsycl --coverage, we should not enable code coverage for device compilations as code coverage for device is not supported at this time. --- clang/lib/Driver/ToolChains/SYCL.cpp | 3 ++- clang/test/Driver/sycl-unsupported.cpp | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 1db4500bd3b51..39a6d6d5534e8 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -1223,7 +1223,7 @@ void SYCL::x86_64::BackendCompiler::ConstructJob( // Unsupported options for device compilation // -fcf-protection, -fsanitize, -fprofile-generate, -fprofile-instr-generate // -ftest-coverage, -fcoverage-mapping, -fcreate-profile, -fprofile-arcs -// -fcs-profile-generate -forder-file-instrumentation +// -fcs-profile-generate -forder-file-instrumentation, --coverage static std::vector getUnsupportedOpts(void) { std::vector UnsupportedOpts = { options::OPT_fsanitize_EQ, @@ -1235,6 +1235,7 @@ static std::vector getUnsupportedOpts(void) { options::OPT_fno_test_coverage, options::OPT_fcoverage_mapping, options::OPT_fno_coverage_mapping, + options::OPT_coverage, options::OPT_fprofile_instr_generate, options::OPT_fprofile_instr_generate_EQ, options::OPT_fprofile_arcs, diff --git a/clang/test/Driver/sycl-unsupported.cpp b/clang/test/Driver/sycl-unsupported.cpp index ae256a2d9b60a..f1ade4801af54 100644 --- a/clang/test/Driver/sycl-unsupported.cpp +++ b/clang/test/Driver/sycl-unsupported.cpp @@ -35,6 +35,14 @@ // RUN: -check-prefixes=UNSUPPORTED_OPT_DIAG,UNSUPPORTED_OPT // RUN: %clangxx -fsycl -forder-file-instrumentation -### %s 2>&1 \ // RUN: | FileCheck %s -DARCH=spir64 -DOPT=-forder-file-instrumentation +// RUN: %clangxx -fsycl --coverage -### %s 2>&1 \ +// RUN: | FileCheck %s -DARCH=spir64 -DOPT=--coverage \ +// RUN: -DOPT_CC1=-coverage-notes-file \ +// RUN: -check-prefixes=UNSUPPORTED_OPT_DIAG,UNSUPPORTED_OPT +// RUN: %clang_cl -fsycl --coverage -### %s 2>&1 \ +// RUN: | FileCheck %s -DARCH=spir64 -DOPT=--coverage \ +// RUN: -DOPT_CC1=-coverage-notes-file \ +// RUN: -check-prefixes=UNSUPPORTED_OPT_DIAG,UNSUPPORTED_OPT // Check to make sure our '-fsanitize=address' exception isn't triggered by a // different option // RUN: %clangxx -fsycl -fprofile-instr-generate=address -### %s 2>&1 \ From f4220d7cfb865ef17c0bedbcf7f94288e57210dd Mon Sep 17 00:00:00 2001 From: Jinsong Ji Date: Thu, 27 Jun 2024 12:15:18 -0700 Subject: [PATCH 14/40] [NFC][SYCL] Remove unused var in CGBulitin (#13972) error: unused variable 'DecorateAddressIndex' [-Werror,-Wunused-variable] From d5eb1e52fa57d65d7cc8ab581a725a6dc1a03e24 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Fri, 28 Jun 2024 09:16:38 +0200 Subject: [PATCH 15/40] [SYCL][E2E] Reenable in_order_profiling_queue for L0 (#14328) Due to some confusion about the output from the in_order_profiling_queue test on L0, the test was disabled. However, the test can be safely reenabled for that target, while keeping it disabled for FPGA. Additionally, the failure in profiling_queue is believed to be due to the same issue, so the JIRA has been added to it and the note in in_order_profiling_queue has been updated to reflect the known information about the failure. Signed-off-by: Larsen, Steffen --- sycl/test-e2e/ProfilingTag/in_order_profiling_queue.cpp | 4 ++-- sycl/test-e2e/ProfilingTag/profiling_queue.cpp | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sycl/test-e2e/ProfilingTag/in_order_profiling_queue.cpp b/sycl/test-e2e/ProfilingTag/in_order_profiling_queue.cpp index f34be43ae9587..2b8871ef5a3b6 100644 --- a/sycl/test-e2e/ProfilingTag/in_order_profiling_queue.cpp +++ b/sycl/test-e2e/ProfilingTag/in_order_profiling_queue.cpp @@ -21,9 +21,9 @@ // https://github.com/intel/llvm/issues/14053 // UNSUPPORTED: cuda -// Fails on FPGA and level_zero too +// FPGA emulator seems to return unexpected start time for the fallback barrier. // https://github.com/intel/llvm/issues/14315 -// UNSUPPORTED: accelerator || level_zero +// UNSUPPORTED: accelerator #include "common.hpp" diff --git a/sycl/test-e2e/ProfilingTag/profiling_queue.cpp b/sycl/test-e2e/ProfilingTag/profiling_queue.cpp index d0da7612d4ea9..a028278ed957a 100644 --- a/sycl/test-e2e/ProfilingTag/profiling_queue.cpp +++ b/sycl/test-e2e/ProfilingTag/profiling_queue.cpp @@ -17,6 +17,7 @@ // UNSUPPORTED: hip // FPGA emulator seems to return unexpected start time for the fallback barrier. +// https://github.com/intel/llvm/issues/14315 // UNSUPPORTED: accelerator // Flaky on CUDA From df0dc3b397c13648ebecddfbbaf71cd8109cabd7 Mon Sep 17 00:00:00 2001 From: Yang Zhao Date: Fri, 28 Jun 2024 20:35:14 +0800 Subject: [PATCH 16/40] [DeviceSanitizer] Support out-of-bounds on private memory (#13935) UR: https://github.com/oneapi-src/unified-runtime/pull/1676 Instrument "__asan_mem_to_shadow" to convert private address to its shadow memory address Other steps are same with ASan on stack. --- libdevice/include/asan_libdevice.hpp | 6 +- libdevice/sanitizer_utils.cpp | 53 ++++++++- .../Instrumentation/AddressSanitizer.cpp | 102 ++++++++++-------- .../SPIRV/instrument_private_address_space.ll | 32 ++++++ sycl/plugins/unified_runtime/CMakeLists.txt | 12 +-- .../AddressSanitizer/common/kernel-debug.cpp | 16 ++- .../USM/parallel_no_local_size.cpp | 41 +++++++ .../private/multiple_private.cpp | 77 +++++++++++++ .../out-of-bounds/private/single_private.cpp | 32 ++++++ 9 files changed, 308 insertions(+), 63 deletions(-) create mode 100644 llvm/test/Instrumentation/AddressSanitizer/SPIRV/instrument_private_address_space.ll create mode 100644 sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_no_local_size.cpp create mode 100644 sycl/test-e2e/AddressSanitizer/out-of-bounds/private/multiple_private.cpp create mode 100644 sycl/test-e2e/AddressSanitizer/out-of-bounds/private/single_private.cpp diff --git a/libdevice/include/asan_libdevice.hpp b/libdevice/include/asan_libdevice.hpp index 21ddb7478173f..5f04b557e5acd 100644 --- a/libdevice/include/asan_libdevice.hpp +++ b/libdevice/include/asan_libdevice.hpp @@ -66,8 +66,8 @@ struct LocalArgsInfo { constexpr std::size_t ASAN_MAX_NUM_REPORTS = 10; struct LaunchInfo { - // Don't move this field, we use it in AddressSanitizerPass uintptr_t PrivateShadowOffset = 0; + uintptr_t PrivateShadowOffsetEnd = 0; uintptr_t LocalShadowOffset = 0; uintptr_t LocalShadowOffsetEnd = 0; @@ -82,8 +82,8 @@ constexpr unsigned ASAN_SHADOW_SCALE = 4; constexpr unsigned ASAN_SHADOW_GRANULARITY = 1ULL << ASAN_SHADOW_SCALE; // Based on the observation, only the last 24 bits of the address of the private -// variable have changed, we use 31 bits(2G) to be safe. -constexpr std::size_t ASAN_PRIVATE_SIZE = 0x7fffffffULL + 1; +// variable have changed +constexpr std::size_t ASAN_PRIVATE_SIZE = 0xffffffULL + 1; // These magic values are written to shadow for better error // reporting. diff --git a/libdevice/sanitizer_utils.cpp b/libdevice/sanitizer_utils.cpp index e63c634e30930..651067be69851 100644 --- a/libdevice/sanitizer_utils.cpp +++ b/libdevice/sanitizer_utils.cpp @@ -65,6 +65,9 @@ static const __SYCL_CONSTANT__ char __global_shadow_out_of_bound[] = static const __SYCL_CONSTANT__ char __local_shadow_out_of_bound[] = "[kernel] Local shadow memory out-of-bound (ptr: %p -> %p, wg: %d, base: " "%p)\n"; +static const __SYCL_CONSTANT__ char __private_shadow_out_of_bound[] = + "[kernel] Private shadow memory out-of-bound (ptr: %p -> %p, wg: %d, base: " + "%p)\n"; static const __SYCL_CONSTANT__ char __asan_print_unsupport_device_type[] = "[kernel] Unsupport device type: %d\n"; @@ -123,7 +126,7 @@ inline uptr MemToShadow_DG2(uptr addr, uint32_t as) { } if (shadow_ptr > __AsanShadowMemoryGlobalEnd) { - if (__asan_report_out_of_shadow_bounds() && __AsanDebug) { + if (__asan_report_out_of_shadow_bounds()) { __spirv_ocl_printf(__global_shadow_out_of_bound, addr, shadow_ptr); } } @@ -171,7 +174,7 @@ inline uptr MemToShadow_PVC(uptr addr, uint32_t as) { } if (shadow_ptr > __AsanShadowMemoryGlobalEnd) { - if (__asan_report_out_of_shadow_bounds() && __AsanDebug) { + if (__asan_report_out_of_shadow_bounds()) { __spirv_ocl_printf(__global_shadow_out_of_bound, addr, shadow_ptr, (uptr)__AsanShadowMemoryGlobalStart); } @@ -207,13 +210,46 @@ inline uptr MemToShadow_PVC(uptr addr, uint32_t as) { ((addr & (SLM_SIZE - 1)) >> ASAN_SHADOW_SCALE); if (shadow_ptr > shadow_offset_end) { - if (__asan_report_out_of_shadow_bounds() && __AsanDebug) { + if (__asan_report_out_of_shadow_bounds()) { __spirv_ocl_printf(__local_shadow_out_of_bound, addr, shadow_ptr, wg_lid, (uptr)shadow_offset); } return 0; } return shadow_ptr; + } else if (as == ADDRESS_SPACE_PRIVATE) { // private + // work-group linear id + const auto WG_LID = + __spirv_BuiltInWorkgroupId.x * __spirv_BuiltInNumWorkgroups.y * + __spirv_BuiltInNumWorkgroups.z + + __spirv_BuiltInWorkgroupId.y * __spirv_BuiltInNumWorkgroups.z + + __spirv_BuiltInWorkgroupId.z; + + auto launch_info = (__SYCL_GLOBAL__ const LaunchInfo *)__AsanLaunchInfo; + const auto shadow_offset = launch_info->PrivateShadowOffset; + const auto shadow_offset_end = launch_info->PrivateShadowOffsetEnd; + + if (shadow_offset == 0) { + return 0; + } + + if (__AsanDebug) + __spirv_ocl_printf(__mem_launch_info, launch_info, + launch_info->PrivateShadowOffset, 0, + launch_info->NumLocalArgs, launch_info->LocalArgs); + + uptr shadow_ptr = shadow_offset + + ((WG_LID * ASAN_PRIVATE_SIZE) >> ASAN_SHADOW_SCALE) + + ((addr & (ASAN_PRIVATE_SIZE - 1)) >> ASAN_SHADOW_SCALE); + + if (shadow_ptr > shadow_offset_end) { + if (__asan_report_out_of_shadow_bounds()) { + __spirv_ocl_printf(__private_shadow_out_of_bound, addr, shadow_ptr, + WG_LID, (uptr)shadow_offset); + } + return 0; + } + return shadow_ptr; } return 0; @@ -233,6 +269,8 @@ inline uptr MemToShadow(uptr addr, uint32_t as) { return shadow_ptr; } +// FIXME: OCL "O2" optimizer doesn't work well with following code +#if 0 if (__AsanDebug) { if (shadow_ptr) { if (as == ADDRESS_SPACE_PRIVATE) @@ -244,6 +282,7 @@ inline uptr MemToShadow(uptr addr, uint32_t as) { __spirv_ocl_printf(__asan_print_shadow_value2, addr, as, shadow_ptr); } } +#endif return shadow_ptr; } @@ -606,6 +645,14 @@ ASAN_REPORT_ERROR(store, true, 16) ASAN_REPORT_ERROR_N(load, false) ASAN_REPORT_ERROR_N(store, true) +/// +/// ASAN convert memory address to shadow memory address +/// + +DEVICE_EXTERN_C_NOINLINE uptr __asan_mem_to_shadow(uptr ptr, uint32_t as) { + return MemToShadow(ptr, as); +} + /// /// ASAN initialize shdadow memory of local memory /// diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 92d72184f759a..9e893938b3432 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -179,6 +179,8 @@ const char kAMDGPUAddressPrivateName[] = "llvm.amdgcn.is.private"; const char kAMDGPUBallotName[] = "llvm.amdgcn.ballot.i64"; const char kAMDGPUUnreachableName[] = "llvm.amdgcn.unreachable"; +const char kAsanMemToShadow[] = "__asan_mem_to_shadow"; + // Accesses sizes are powers of two: 1, 2, 4, 8, 16. static const size_t kNumberOfAccessSizes = 5; @@ -447,7 +449,7 @@ static cl::opt ClOverrideDestructorKind( static cl::opt ClSpirOffloadPrivates("asan-spir-privates", cl::desc("instrument private pointer"), cl::Hidden, - cl::init(false)); + cl::init(true)); static cl::opt ClSpirOffloadGlobals("asan-spir-globals", cl::desc("instrument global pointer"), @@ -820,14 +822,15 @@ struct AddressSanitizer { Value *SizeArgument, uint32_t Exp, RuntimeCallInserter &RTCI); void instrumentMemIntrinsic(MemIntrinsic *MI, RuntimeCallInserter &RTCI); - Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); + Value *memToShadow(Value *Shadow, IRBuilder<> &IRB, + uint32_t AddressSpace = kSpirOffloadPrivateAS); bool suppressInstrumentationSiteForDebug(int &Instrumented); bool instrumentFunction(Function &F, const TargetLibraryInfo *TLI); bool maybeInsertAsanInitAtFunctionEntry(Function &F); bool maybeInsertDynamicShadowAtFunctionEntry(Function &F); void markEscapedLocalAllocas(Function &F); void instrumentSyclStaticLocalMemory(CallInst *CI); - void instrumentSyclDynamicLocalMemory(Function &F); + bool instrumentSyclDynamicLocalMemory(Function &F); GlobalVariable *GetOrCreateGlobalString(Module &M, StringRef Name, StringRef Value, @@ -899,6 +902,8 @@ struct AddressSanitizer { FunctionCallee AMDGPUAddressPrivate; int InstrumentationWithCallsThreshold; uint32_t MaxInlinePoisoningSize; + + FunctionCallee AsanMemToShadow; }; class ModuleAddressSanitizer { @@ -1067,7 +1072,7 @@ struct FunctionStackPoisoner : public InstVisitor { DIB(*F.getParent(), /*AllowUnresolved*/ false), C(ASan.C), IntptrTy(ASan.IntptrTy), IntptrPtrTy(PointerType::get(IntptrTy, 0)), Mapping(ASan.Mapping), - PoisonStack(ClStack && + PoisonStack((ClStack || ClSpirOffloadPrivates) && !Triple(F.getParent()->getTargetTriple()).isAMDGPU()) {} bool runOnFunction() { @@ -1350,7 +1355,7 @@ static void ExtendSpirKernelArgs(Module &M, FunctionAnalysisManager &FAM) { } // Fixup all users - for (auto [F, NewF] : SpirFuncs) { + for (auto &[F, NewF] : SpirFuncs) { SmallVector Users(F->users()); for (User *U : Users) { if (auto *CI = dyn_cast(U)) { @@ -1544,13 +1549,13 @@ void AddressSanitizer::AppendDebugInfoToArgs(Instruction *InsertBefore, Args.push_back(ConstantExpr::getPointerCast(FuncNameGV, ConstASPtrTy)); } -Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) { +Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB, + uint32_t AddressSpace) { if (TargetTriple.isSPIR()) { - // ((Shadow & 0xffffffff) >> 3) + __AsanShadowMemoryPrivateStart; - Shadow = IRB.CreateAnd(Shadow, ConstantInt::get(IntptrTy, 0xffffffff)); - Shadow = IRB.CreateLShr(Shadow, Mapping.Scale); - Value *ShadowBase = IRB.CreateLoad(IntptrTy, AsanShadowDevicePrivate); - return IRB.CreateAdd(Shadow, ShadowBase); + return IRB.CreateCall( + AsanMemToShadow, + {Shadow, ConstantInt::get(IRB.getInt32Ty(), AddressSpace)}, + "shadow_ptr"); } // Shadow >> scale Shadow = IRB.CreateLShr(Shadow, Mapping.Scale); @@ -1619,7 +1624,7 @@ void AddressSanitizer::instrumentSyclStaticLocalMemory(CallInst *CI) { } // Instument dynamic local memory -void AddressSanitizer::instrumentSyclDynamicLocalMemory(Function &F) { +bool AddressSanitizer::instrumentSyclDynamicLocalMemory(Function &F) { InstrumentationIRBuilder IRB(F.getEntryBlock().getFirstNonPHI()); // Save "__asan_launch" into local memory "__AsanLaunchInfo" @@ -1631,13 +1636,12 @@ void AddressSanitizer::instrumentSyclDynamicLocalMemory(Function &F) { SmallVector LocalArgs; for (auto &Arg : F.args()) { Type *PtrTy = dyn_cast(Arg.getType()->getScalarType()); - // Local address space - if (PtrTy && PtrTy->getPointerAddressSpace() == 3) + if (PtrTy && PtrTy->getPointerAddressSpace() == kSpirOffloadLocalAS) LocalArgs.push_back(&Arg); } if (LocalArgs.empty()) - return; + return false; AllocaInst *ArgsArray = IRB.CreateAlloca( IntptrTy, ConstantInt::get(Int32Ty, LocalArgs.size()), "local_args"); @@ -1649,6 +1653,7 @@ void AddressSanitizer::instrumentSyclDynamicLocalMemory(Function &F) { IRB.CreateCall(AsanSetShadowDynamicLocalFunc, {IRB.CreatePointerCast(ArgsArray, IntptrTy), ConstantInt::get(Int32Ty, LocalArgs.size())}); + return true; } // Instrument memset/memmove/memcpy @@ -3232,14 +3237,6 @@ void AddressSanitizer::initializeCallbacks(Module &M, const TargetLibraryInfo *T ArrayType::get(IRB.getInt8Ty(), 0)); if (TargetTriple.isSPIR()) { - AsanShadowDevicePrivate = - M.getOrInsertGlobal("__AsanShadowMemoryPrivateStart", IntptrTy, [&] { - return new GlobalVariable(M, IntptrTy, true, - GlobalVariable::ExternalLinkage, nullptr, - "__AsanShadowMemoryPrivateStart", nullptr, - GlobalVariable::NotThreadLocal, 1); - }); - // __asan_set_shadow_static_local( // uptr ptr, // size_t size, @@ -3263,6 +3260,9 @@ void AddressSanitizer::initializeCallbacks(Module &M, const TargetLibraryInfo *T GlobalVariable::ExternalLinkage, nullptr, "__AsanLaunchInfo", nullptr, GlobalVariable::NotThreadLocal, kSpirOffloadLocalAS); }); + + AsanMemToShadow = M.getOrInsertFunction(kAsanMemToShadow, IntptrTy, + IntptrTy, Type::getInt32Ty(*C)); } AMDGPUAddressShared = @@ -3391,10 +3391,6 @@ bool AddressSanitizer::instrumentFunction(Function &F, // can be passed to that intrinsic. markEscapedLocalAllocas(F); - if (F.getCallingConv() == CallingConv::SPIR_KERNEL) { - instrumentSyclDynamicLocalMemory(F); - } - // We want to instrument every address only once per basic block (unless there // are calls between uses). SmallPtrSet TempsToInstrument; @@ -3514,6 +3510,11 @@ bool AddressSanitizer::instrumentFunction(Function &F, if (ChangedStack || !NoReturnCalls.empty()) FunctionModified = true; + // We need to instrument dynamic local arguments after stack poisoner + if (F.getCallingConv() == CallingConv::SPIR_KERNEL) { + FunctionModified |= instrumentSyclDynamicLocalMemory(F); + } + LLVM_DEBUG(dbgs() << "ASAN done instrumenting: " << FunctionModified << " " << F << "\n"); @@ -3999,32 +4000,39 @@ void FunctionStackPoisoner::processStaticAllocas() { AI->replaceAllUsesWith(NewAllocaPtr); } + auto TargetTriple = Triple(F.getParent()->getTargetTriple()); + // The left-most redzone has enough space for at least 4 pointers. - // Write the Magic value to redzone[0]. Value *BasePlus0 = IRB.CreateIntToPtr(LocalStackBase, IntptrPtrTy); - IRB.CreateStore(ConstantInt::get(IntptrTy, kCurrentStackFrameMagic), - BasePlus0); - // Write the frame description constant to redzone[1]. - Value *BasePlus1 = IRB.CreateIntToPtr( - IRB.CreateAdd(LocalStackBase, - ConstantInt::get(IntptrTy, ASan.LongSize / 8)), - IntptrPtrTy); - GlobalVariable *StackDescriptionGlobal = - createPrivateGlobalForString(*F.getParent(), DescriptionString, - /*AllowMerging*/ true, kAsanGenPrefix); - Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal, IntptrTy); - IRB.CreateStore(Description, BasePlus1); - // Write the PC to redzone[2]. - Value *BasePlus2 = IRB.CreateIntToPtr( - IRB.CreateAdd(LocalStackBase, - ConstantInt::get(IntptrTy, 2 * ASan.LongSize / 8)), - IntptrPtrTy); - IRB.CreateStore(IRB.CreatePointerCast(&F, IntptrTy), BasePlus2); + // SPIRV doesn't use the following metadata + if (!TargetTriple.isSPIR()) { + // Write the Magic value to redzone[0]. + IRB.CreateStore(ConstantInt::get(IntptrTy, kCurrentStackFrameMagic), + BasePlus0); + // Write the frame description constant to redzone[1]. + Value *BasePlus1 = IRB.CreateIntToPtr( + IRB.CreateAdd(LocalStackBase, + ConstantInt::get(IntptrTy, ASan.LongSize / 8)), + IntptrPtrTy); + GlobalVariable *StackDescriptionGlobal = + createPrivateGlobalForString(*F.getParent(), DescriptionString, + /*AllowMerging*/ true, kAsanGenPrefix); + Value *Description = + IRB.CreatePointerCast(StackDescriptionGlobal, IntptrTy); + IRB.CreateStore(Description, BasePlus1); + // Write the PC to redzone[2]. + Value *BasePlus2 = IRB.CreateIntToPtr( + IRB.CreateAdd(LocalStackBase, + ConstantInt::get(IntptrTy, 2 * ASan.LongSize / 8)), + IntptrPtrTy); + IRB.CreateStore(IRB.CreatePointerCast(&F, IntptrTy), BasePlus2); + } const auto &ShadowAfterScope = GetShadowBytesAfterScope(SVD, L); // Poison the stack red zones at the entry. - Value *ShadowBase = ASan.memToShadow(LocalStackBase, IRB); + Value *ShadowBase = + ASan.memToShadow(LocalStackBase, IRB, kSpirOffloadPrivateAS); // As mask we must use most poisoned case: red zones and after scope. // As bytes we can use either the same or just red zones only. copyToShadow(ShadowAfterScope, ShadowAfterScope, IRB, ShadowBase); diff --git a/llvm/test/Instrumentation/AddressSanitizer/SPIRV/instrument_private_address_space.ll b/llvm/test/Instrumentation/AddressSanitizer/SPIRV/instrument_private_address_space.ll new file mode 100644 index 0000000000000..bf412b8225b79 --- /dev/null +++ b/llvm/test/Instrumentation/AddressSanitizer/SPIRV/instrument_private_address_space.ll @@ -0,0 +1,32 @@ +; RUN: opt < %s -passes=asan -asan-instrumentation-with-call-threshold=0 -asan-stack=0 -asan-globals=0 -asan-constructor-kind=none -asan-spir-privates=1 -asan-use-after-return=never -S | FileCheck %s + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +%"class.sycl::_V1::range" = type { %"class.sycl::_V1::detail::array" } +%"class.sycl::_V1::detail::array" = type { [1 x i64] } +%"class.sycl::_V1::id" = type { %"class.sycl::_V1::detail::array" } + +@__const._ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv.p = private unnamed_addr addrspace(1) constant [4 x i32] [i32 1, i32 2, i32 3, i32 4], align 4 + +define spir_func i32 @_Z3fooPii(ptr addrspace(4) %p) { +entry: + %arrayidx = getelementptr inbounds i32, ptr addrspace(4) %p, i64 0 + %0 = load i32, ptr addrspace(4) %arrayidx, align 4 + ret i32 %0 +} + +define spir_kernel void @kernel() #0 { +; CHECK-LABEL: define spir_kernel void @kernel +entry: + %p.i = alloca [4 x i32], align 4 + ; CHECK: %shadow_ptr = call i64 @__asan_mem_to_shadow(i64 %0, i32 0) + call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %p.i) + call void @llvm.memcpy.p0.p1.i64(ptr align 4 %p.i, ptr addrspace(1) align 4 @__const._ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv.p, i64 16, i1 false) + %arraydecay.i = getelementptr inbounds [4 x i32], ptr %p.i, i64 0, i64 0 + %0 = addrspacecast ptr %arraydecay.i to ptr addrspace(4) + %call.i = call spir_func i32 @_Z3fooPii(ptr addrspace(4) %0) + ret void +} + +attributes #0 = { mustprogress norecurse nounwind sanitize_address uwtable } diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index bb36a41b78ed6..0cbbcf4a574be 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -100,13 +100,11 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) endfunction() set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") - # commit 396fb20498c315a526c961d7cb645b42795acd2c - # Merge: 719bb9cd e2ffea69 - # Author: Kenneth Benzie (Benie) - # Date: Thu May 23 10:53:03 2024 +0100 - # Merge pull request #1501 from RossBrunton/ross/kerneltests - # [Testing] Spec clarifications and testing updates for kernel - set(UNIFIED_RUNTIME_TAG 764b75c9087930799963a30be726ac76fcf1ac11) + # commit 58ca3a34dea0f559b8d950bdfe7d5d8a610d3a94 + # Author: Yang Zhao + # Date: Thu Jun 27 20:26:17 2024 +0800 + # [DeviceSanitizer] Support out-of-bounds on private memory (#1676) + set(UNIFIED_RUNTIME_TAG 58ca3a34dea0f559b8d950bdfe7d5d8a610d3a94) fetch_adapter_source(level_zero ${UNIFIED_RUNTIME_REPO} diff --git a/sycl/test-e2e/AddressSanitizer/common/kernel-debug.cpp b/sycl/test-e2e/AddressSanitizer/common/kernel-debug.cpp index b4ae8b2b30e12..3783de97bd6c1 100644 --- a/sycl/test-e2e/AddressSanitizer/common/kernel-debug.cpp +++ b/sycl/test-e2e/AddressSanitizer/common/kernel-debug.cpp @@ -4,12 +4,22 @@ // RUN: env SYCL_PREFER_UR=1 UR_LAYER_ASAN_OPTIONS=debug:0 %{run} %t 2>&1 | FileCheck %s #include +/// This test is used to check enabling/disabling kernel debug message +/// We always use "[kernel]" prefix in kernel debug message + +constexpr std::size_t N = 4; +constexpr std::size_t group_size = 1; + int main() { sycl::queue Q; - int *array = sycl::malloc_device(1, Q); + int *array = sycl::malloc_device(N, Q); - Q.submit([&](sycl::handler &h) { - h.single_task([=]() { *array = 0; }); + Q.submit([&](sycl::handler &cgh) { + auto acc = sycl::local_accessor(group_size, cgh); + cgh.parallel_for( + sycl::nd_range<1>(N, group_size), [=](sycl::nd_item<1> item) { + array[item.get_global_id()] = acc[item.get_local_id()]; + }); }); Q.wait(); // CHECK-DEBUG: [kernel] diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_no_local_size.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_no_local_size.cpp new file mode 100644 index 0000000000000..2e10143fdad8c --- /dev/null +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_no_local_size.cpp @@ -0,0 +1,41 @@ +// REQUIRES: linux, cpu +// RUN: %{build} %device_asan_flags -DMALLOC_DEVICE -O0 -g -o %t +// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s +// RUN: %{build} %device_asan_flags -DMALLOC_DEVICE -O1 -g -o %t +// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s +// RUN: %{build} %device_asan_flags -DMALLOC_DEVICE -O2 -g -o %t +// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s +// RUN: %{build} %device_asan_flags -DMALLOC_HOST -O2 -g -o %t +// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-HOST %s +// RUN: %{build} %device_asan_flags -DMALLOC_SHARED -O2 -g -o %t +// RUN: env SYCL_PREFER_UR=1 %{run} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-SHARED --input-file %t.txt %s + +#include + +#include + +int main() { + sycl::queue Q; + constexpr std::size_t N = 12345; +#if defined(MALLOC_HOST) + auto *array = sycl::malloc_host(N, Q); +#elif defined(MALLOC_SHARED) + auto *array = sycl::malloc_shared(N, Q); +#else // defined(MALLOC_DEVICE) + auto *array = sycl::malloc_device(N, Q); +#endif + + Q.submit([&](sycl::handler &h) { + h.parallel_for(sycl::range<1>(N + 1), + [=](sycl::id<1> i) { ++array[i]; }); + }); + Q.wait(); + // CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on Device USM + // CHECK-HOST: ERROR: DeviceSanitizer: out-of-bounds-access on Host USM + // CHECK-SHARED: ERROR: DeviceSanitizer: out-of-bounds-access on Shared USM + // CHECK: READ of size 1 at kernel {{<.*MyKernel.*>}} LID({{.*}}, 0, 0) GID(12345, 0, 0) + // CHECK: {{ #0 .* .*parallel_no_local_size.cpp:}}[[@LINE-7]] + + sycl::free(array, Q); + return 0; +} diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/private/multiple_private.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/private/multiple_private.cpp new file mode 100644 index 0000000000000..1c8ba6915abc8 --- /dev/null +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/private/multiple_private.cpp @@ -0,0 +1,77 @@ +// REQUIRES: linux, cpu +// RUN: %{build} %device_asan_flags -DVAR=1 -O2 -g -o %t1 +// RUN: env SYCL_PREFER_UR=1 %{run} not %t1 2>&1 | FileCheck --check-prefixes CHECK,CHECK-VAR1 %s +// RUN: %{build} %device_asan_flags -DVAR=2 -O2 -g -o %t2 +// RUN: env SYCL_PREFER_UR=1 %{run} not %t2 2>&1 | FileCheck --check-prefixes CHECK,CHECK-VAR2 %s +// RUN: %{build} %device_asan_flags -DVAR=3 -O2 -g -o %t3 +// RUN: env SYCL_PREFER_UR=1 %{run} not %t3 2>&1 | FileCheck --check-prefixes CHECK,CHECK-VAR3 %s +// RUN: %{build} %device_asan_flags -DVAR=4 -O2 -g -o %t4 +// RUN: env SYCL_PREFER_UR=1 %{run} not %t4 2>&1 | FileCheck --check-prefixes CHECK,CHECK-VAR4 %s +// RUN: %{build} %device_asan_flags -DVAR=5 -O2 -g -o %t5 +// RUN: env SYCL_PREFER_UR=1 %{run} not %t5 2>&1 | FileCheck --check-prefixes CHECK,CHECK-VAR5 %s +// RUN: %{build} %device_asan_flags -DVAR=6 -O2 -g -o %t6 +// RUN: env SYCL_PREFER_UR=1 %{run} not %t6 2>&1 | FileCheck --check-prefixes CHECK,CHECK-VAR6 %s + +#include +#include + +// CHECK: ERROR: DeviceSanitizer: out-of-bounds-access on Private Memory +template __attribute__((noinline)) T foo(T *p) { return *p; } +template __attribute__((noinline)) T foo1(T *p) { return *p; } +// CHECK-VAR1: READ of size 2 at kernel {{<.*MyKernel>}} LID(0, 0, 0) GID(0, 0, 0) +// CHECK-VAR1: #0 {{.*}} {{.*multiple_private.cpp}}:[[@LINE-2]] +template __attribute__((noinline)) T foo2(T *p) { return *p; } +// CHECK-VAR2: READ of size 2 at kernel {{<.*MyKernel>}} LID(0, 0, 0) GID(0, 0, 0) +// CHECK-VAR2: #0 {{.*}} {{.*multiple_private.cpp}}:[[@LINE-2]] +template __attribute__((noinline)) T foo3(T *p) { return *p; } +// CHECK-VAR3: READ of size 4 at kernel {{<.*MyKernel>}} LID(0, 0, 0) GID(0, 0, 0) +// CHECK-VAR3: #0 {{.*}} {{.*multiple_private.cpp}}:[[@LINE-2]] +template __attribute__((noinline)) T foo4(T *p) { return *p; } +// CHECK-VAR4: READ of size 4 at kernel {{<.*MyKernel>}} LID(0, 0, 0) GID(0, 0, 0) +// CHECK-VAR4: #0 {{.*}} {{.*multiple_private.cpp}}:[[@LINE-2]] +template __attribute__((noinline)) T foo5(T *p) { return *p; } +// CHECK-VAR5: READ of size 8 at kernel {{<.*MyKernel>}} LID(0, 0, 0) GID(0, 0, 0) +// CHECK-VAR5: #0 {{.*}} {{.*multiple_private.cpp}}:[[@LINE-2]] +template __attribute__((noinline)) T foo6(T *p) { return *p; } +// CHECK-VAR6: READ of size 1 at kernel {{<.*MyKernel>}} LID(0, 0, 0) GID(0, 0, 0) +// CHECK-VAR6: #0 {{.*}} {{.*multiple_private.cpp}}:[[@LINE-2]] + +int main() { + sycl::queue Q; + auto *array = sycl::malloc_device(5, Q); + + Q.submit([&](sycl::handler &h) { + h.single_task([=]() { + short p1[] = {1}; + int p2[] = {1}; + int p3[10] = {8, 1, 10, 1, 0, 10}; + long p4[] = {5111LL}; + char p5[] = {'c'}; + + array[0] = foo(&p1[0]); + array[1] = foo(&p2[0]); + for (int i = 0; i < 10; ++i) + array[2] += foo(&p3[i]); + array[3] = foo(&p4[0]); + array[4] = foo(&p5[0]); + +#if VAR == 1 + array[0] = foo1(&p1[-4]); +#elif VAR == 2 + array[0] = foo2(&p1[4]); +#elif VAR == 3 + array[0] = foo3(&p2[1]); +#elif VAR == 4 + array[0] = foo4(&p3[10]); +#elif VAR == 5 + array[0] = foo5(&p4[1]); +#else + array[0] = foo6(&p5[1]); +#endif + }); + }); + Q.wait(); + sycl::free(array, Q); + + return 0; +} diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/private/single_private.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/private/single_private.cpp new file mode 100644 index 0000000000000..50f1f763233d6 --- /dev/null +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/private/single_private.cpp @@ -0,0 +1,32 @@ +// REQUIRES: linux, cpu +// RUN: %{build} %device_asan_flags -O0 -g -o %t +// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck %s +// RUN: %{build} %device_asan_flags -O1 -g -o %t +// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck %s +// RUN: %{build} %device_asan_flags -O2 -g -o %t +// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck %s + +#include +#include + +__attribute__((noinline)) int foo(int p[], int i) { return p[i]; } +// CHECK: ERROR: DeviceSanitizer: out-of-bounds-access on Private Memory +// CHECK: READ of size 4 at kernel {{<.*MyKernel>}} LID(0, 0, 0) GID({{.*}}, 0, 0) +// CHECK: #0 {{.*}} {{.*single_private.cpp}}:[[@LINE-3]] + +int main() { + sycl::queue Q; + auto *array = sycl::malloc_device(1, Q); + + Q.submit([&](sycl::handler &h) { + h.single_task([=]() { + int p[] = {1, 2, 3, 4}; + for (int i = 0; i < 5; ++i) + array[0] = foo(p, i); + }); + }); + Q.wait(); + sycl::free(array, Q); + + return 0; +} From c9842c1a78bb85149d96654da6e1ae85c2e86624 Mon Sep 17 00:00:00 2001 From: aelovikov-intel Date: Fri, 28 Jun 2024 07:40:32 -0700 Subject: [PATCH 17/40] [SYCL] Remove deprecated shuffles from the sub-group class (#13666) Re-lands https://github.com/intel/llvm/pull/13236 and reverts https://github.com/intel/llvm/pull/13463. --- sycl/include/sycl/sub_group.hpp | 58 ----- sycl/test-e2e/SubGroup/generic-shuffle.cpp | 239 --------------------- sycl/test-e2e/SubGroup/shuffle.cpp | 54 ----- sycl/test-e2e/SubGroup/shuffle_fp16.cpp | 26 --- sycl/test-e2e/SubGroup/shuffle_fp64.cpp | 25 --- 5 files changed, 402 deletions(-) delete mode 100644 sycl/test-e2e/SubGroup/generic-shuffle.cpp delete mode 100644 sycl/test-e2e/SubGroup/shuffle.cpp delete mode 100644 sycl/test-e2e/SubGroup/shuffle_fp16.cpp delete mode 100644 sycl/test-e2e/SubGroup/shuffle_fp64.cpp diff --git a/sycl/include/sycl/sub_group.hpp b/sycl/include/sycl/sub_group.hpp index c405f436a9fe4..f80b0876a65a3 100644 --- a/sycl/include/sycl/sub_group.hpp +++ b/sycl/include/sycl/sub_group.hpp @@ -209,64 +209,6 @@ struct sub_group { #endif } - template - using EnableIfIsScalarArithmetic = - std::enable_if_t::value, T>; - - /* --- one-input shuffles --- */ - /* indices in [0 , sub_group size) */ - template - __SYCL_DEPRECATED("Shuffles in the sub-group class are deprecated.") - T shuffle(T x, id_type local_id) const { -#ifdef __SYCL_DEVICE_ONLY__ - return sycl::detail::spirv::Shuffle(*this, x, local_id); -#else - (void)x; - (void)local_id; - throw sycl::exception(make_error_code(errc::feature_not_supported), - "Sub-groups are not supported on host."); -#endif - } - - template - __SYCL_DEPRECATED("Shuffles in the sub-group class are deprecated.") - T shuffle_down(T x, uint32_t delta) const { -#ifdef __SYCL_DEVICE_ONLY__ - return sycl::detail::spirv::ShuffleDown(*this, x, delta); -#else - (void)x; - (void)delta; - throw sycl::exception(make_error_code(errc::feature_not_supported), - "Sub-groups are not supported on host."); -#endif - } - - template - __SYCL_DEPRECATED("Shuffles in the sub-group class are deprecated.") - T shuffle_up(T x, uint32_t delta) const { -#ifdef __SYCL_DEVICE_ONLY__ - return sycl::detail::spirv::ShuffleUp(*this, x, delta); -#else - (void)x; - (void)delta; - throw sycl::exception(make_error_code(errc::feature_not_supported), - "Sub-groups are not supported on host."); -#endif - } - - template - __SYCL_DEPRECATED("Shuffles in the sub-group class are deprecated.") - T shuffle_xor(T x, id_type value) const { -#ifdef __SYCL_DEVICE_ONLY__ - return sycl::detail::spirv::ShuffleXor(*this, x, value); -#else - (void)x; - (void)value; - throw sycl::exception(make_error_code(errc::feature_not_supported), - "Sub-groups are not supported on host."); -#endif - } - /* --- sub_group load/stores --- */ /* these can map to SIMD or block read/write hardware where available */ #ifdef __SYCL_DEVICE_ONLY__ diff --git a/sycl/test-e2e/SubGroup/generic-shuffle.cpp b/sycl/test-e2e/SubGroup/generic-shuffle.cpp deleted file mode 100644 index cb9f1a720b1b7..0000000000000 --- a/sycl/test-e2e/SubGroup/generic-shuffle.cpp +++ /dev/null @@ -1,239 +0,0 @@ -// RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out -// RUN: %{run} %t.out -// -//==-- generic_shuffle.cpp - SYCL sub_group generic shuffle test *- C++ -*--==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "helper.hpp" -#include -#include -#include -template class pointer_kernel; - -using namespace sycl; - -template -void check_pointer(queue &Queue, size_t G = 256, size_t L = 64) { - try { - nd_range<1> NdRange(G, L); - buffer buf(G); - buffer buf_up(G); - buffer buf_down(G); - buffer buf_xor(G); - buffer sgsizebuf(1); - Queue.submit([&](handler &cgh) { - auto acc = buf.template get_access(cgh); - auto acc_up = buf_up.template get_access(cgh); - auto acc_down = - buf_down.template get_access(cgh); - auto acc_xor = buf_xor.template get_access(cgh); - auto sgsizeacc = sgsizebuf.get_access(cgh); - - cgh.parallel_for( - NdRange, [=](nd_item<1> NdItem) { - sycl::sub_group SG = NdItem.get_sub_group(); - uint32_t wggid = NdItem.get_global_id(0); - uint32_t sgid = SG.get_group_id().get(0); - if (wggid == 0) - sgsizeacc[0] = SG.get_max_local_range()[0]; - - T *ptr = static_cast(0x0) + wggid; - - /*GID of middle element in every subgroup*/ - acc[NdItem.get_global_id()] = - SG.shuffle(ptr, SG.get_max_local_range()[0] / 2); - - /* Save GID-SGID */ - acc_up[NdItem.get_global_id()] = SG.shuffle_up(ptr, sgid); - - /* Save GID+SGID */ - acc_down[NdItem.get_global_id()] = SG.shuffle_down(ptr, sgid); - - /* Save GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */ - acc_xor[NdItem.get_global_id()] = - SG.shuffle_xor(ptr, sgid % SG.get_max_local_range()[0]); - }); - }); - host_accessor acc(buf); - host_accessor acc_up(buf_up); - host_accessor acc_down(buf_down); - host_accessor acc_xor(buf_xor); - host_accessor sgsizeacc(sgsizebuf); - - size_t sg_size = sgsizeacc[0]; - int SGid = 0; - int SGLid = 0; - int SGBeginGid = 0; - for (int j = 0; j < G; j++) { - if (j % L % sg_size == 0) { - SGid++; - SGLid = 0; - SGBeginGid = j; - } - if (j % L == 0) { - SGid = 0; - SGLid = 0; - SGBeginGid = j; - } - - /*GID of middle element in every subgroup*/ - exit_if_not_equal(acc[j], - static_cast(0x0) + - (j / L * L + SGid * sg_size + sg_size / 2), - "shuffle"); - - /* Value GID+SGID for all element except last SGID in SG*/ - if (j % L % sg_size + SGid < sg_size && j % L + SGid < L) { - exit_if_not_equal(acc_down[j], static_cast(0x0) + (j + SGid), - "shuffle_down"); - } - - /* Value GID-SGID for all element except first SGID in SG*/ - if (j % L % sg_size >= SGid) { - exit_if_not_equal(acc_up[j], static_cast(0x0) + (j - SGid), - "shuffle_up"); - } - - /* Value GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */ - exit_if_not_equal(acc_xor[j], - static_cast(0x0) + - (SGBeginGid + (SGLid ^ (SGid % sg_size))), - "shuffle_xor"); - SGLid++; - } - } catch (exception e) { - std::cout << "SYCL exception caught: " << e.what(); - exit(1); - } -} - -template -void check_struct(queue &Queue, Generator &Gen, size_t G = 256, size_t L = 64) { - - // Fill a vector with values that will be shuffled - std::vector values(G); - std::generate(values.begin(), values.end(), Gen); - - try { - nd_range<1> NdRange(G, L); - buffer buf(G); - buffer buf_up(G); - buffer buf_down(G); - buffer buf_xor(G); - buffer sgsizebuf(1); - buffer buf_in(values.data(), values.size()); - Queue.submit([&](handler &cgh) { - auto acc = buf.template get_access(cgh); - auto acc_up = buf_up.template get_access(cgh); - auto acc_down = - buf_down.template get_access(cgh); - auto acc_xor = buf_xor.template get_access(cgh); - auto sgsizeacc = sgsizebuf.get_access(cgh); - auto in = buf_in.template get_access(cgh); - - cgh.parallel_for( - NdRange, [=](nd_item<1> NdItem) { - sycl::sub_group SG = NdItem.get_sub_group(); - uint32_t wggid = NdItem.get_global_id(0); - uint32_t sgid = SG.get_group_id().get(0); - if (wggid == 0) - sgsizeacc[0] = SG.get_max_local_range()[0]; - - T val = in[wggid]; - - /*GID of middle element in every subgroup*/ - acc[NdItem.get_global_id()] = - SG.shuffle(val, SG.get_max_local_range()[0] / 2); - - /* Save GID-SGID */ - acc_up[NdItem.get_global_id()] = SG.shuffle_up(val, sgid); - - /* Save GID+SGID */ - acc_down[NdItem.get_global_id()] = SG.shuffle_down(val, sgid); - - /* Save GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */ - acc_xor[NdItem.get_global_id()] = - SG.shuffle_xor(val, sgid % SG.get_max_local_range()[0]); - }); - }); - host_accessor acc(buf); - host_accessor acc_up(buf_up); - host_accessor acc_down(buf_down); - host_accessor acc_xor(buf_xor); - host_accessor sgsizeacc(sgsizebuf); - - size_t sg_size = sgsizeacc[0]; - int SGid = 0; - int SGLid = 0; - int SGBeginGid = 0; - for (int j = 0; j < G; j++) { - if (j % L % sg_size == 0) { - SGid++; - SGLid = 0; - SGBeginGid = j; - } - if (j % L == 0) { - SGid = 0; - SGLid = 0; - SGBeginGid = j; - } - - /*GID of middle element in every subgroup*/ - exit_if_not_equal( - acc[j], values[j / L * L + SGid * sg_size + sg_size / 2], "shuffle"); - - /* Value GID+SGID for all element except last SGID in SG*/ - if (j % L % sg_size + SGid < sg_size && j % L + SGid < L) { - exit_if_not_equal(acc_down[j], values[j + SGid], "shuffle_down"); - } - - /* Value GID-SGID for all element except first SGID in SG*/ - if (j % L % sg_size >= SGid) { - exit_if_not_equal(acc_up[j], values[j - SGid], "shuffle_up"); - } - - /* Value GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */ - exit_if_not_equal(acc_xor[j], - values[SGBeginGid + (SGLid ^ (SGid % sg_size))], - "shuffle_xor"); - SGLid++; - } - } catch (exception e) { - std::cout << "SYCL exception caught: " << e.what(); - exit(1); - } -} - -int main() { - queue Queue; - - // Test shuffle of pointer types - check_pointer(Queue); - - // Test shuffle of non-native types - auto ComplexFloatGenerator = [state = std::complex(0, 1)]() mutable { - return state += std::complex(2, 2); - }; - check_struct>( - Queue, ComplexFloatGenerator); - - if (Queue.get_device().has(sycl::aspect::fp64)) { - auto ComplexDoubleGenerator = [state = - std::complex(0, 1)]() mutable { - return state += std::complex(2, 2); - }; - check_struct>( - Queue, ComplexDoubleGenerator); - } else { - std::cout << "fp64 tests were skipped due to the device not supporting the " - "aspect."; - } - - std::cout << "Test passed." << std::endl; - return 0; -} diff --git a/sycl/test-e2e/SubGroup/shuffle.cpp b/sycl/test-e2e/SubGroup/shuffle.cpp deleted file mode 100644 index 2baf624ce804c..0000000000000 --- a/sycl/test-e2e/SubGroup/shuffle.cpp +++ /dev/null @@ -1,54 +0,0 @@ -// RUN: %{build} -o %t.out -// RUN: %{run} %t.out - -//==------------ shuffle.cpp - SYCL sub_group shuffle test -----*- C++ -*---==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "shuffle.hpp" -#include - -int main() { - queue Queue; - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - - // Check long long and unsigned long long because they differ from - // long and unsigned long according to C++ rules even if they have the same - // size at some system. - check(Queue); - check(Queue); - check(Queue); - check(Queue); - std::cout << "Test passed." << std::endl; - return 0; -} diff --git a/sycl/test-e2e/SubGroup/shuffle_fp16.cpp b/sycl/test-e2e/SubGroup/shuffle_fp16.cpp deleted file mode 100644 index b2fea85d0126f..0000000000000 --- a/sycl/test-e2e/SubGroup/shuffle_fp16.cpp +++ /dev/null @@ -1,26 +0,0 @@ -// REQUIRES: aspect-fp16 -// REQUIRES: gpu - -// RUN: %{build} -o %t.out -// RUN: %{run} %t.out - -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "shuffle.hpp" -#include - -int main() { - queue Queue; - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - std::cout << "Test passed." << std::endl; - return 0; -} diff --git a/sycl/test-e2e/SubGroup/shuffle_fp64.cpp b/sycl/test-e2e/SubGroup/shuffle_fp64.cpp deleted file mode 100644 index 1440de1d25a8a..0000000000000 --- a/sycl/test-e2e/SubGroup/shuffle_fp64.cpp +++ /dev/null @@ -1,25 +0,0 @@ -// REQUIRES: aspect-fp64 -// RUN: %{build} -o %t.out -// RUN: %{run} %t.out -// -//==------- shuffle_fp64.cpp - SYCL sub_group shuffle test -----*- C++ -*---==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "shuffle.hpp" -#include - -int main() { - queue Queue; - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - std::cout << "Test passed." << std::endl; - return 0; -} From b51ebb818f2b54e3e210c31c83407d493d6e655b Mon Sep 17 00:00:00 2001 From: David Garcia Orozco Date: Fri, 28 Jun 2024 11:54:10 -0400 Subject: [PATCH 18/40] [SYCL][E2E] Fix deprecated warnings in `Regression` e2e tests (#14168) --- .../Regression/commandlist/Inputs/main.cpp | 17 +--------------- sycl/test-e2e/Regression/device_num.cpp | 20 ++++++++----------- .../device_pci_address_bdf_format.cpp | 2 +- sycl/test-e2e/Regression/group.cpp | 2 +- sycl/test-e2e/Regression/image_access.cpp | 6 +++--- 5 files changed, 14 insertions(+), 33 deletions(-) diff --git a/sycl/test-e2e/Regression/commandlist/Inputs/main.cpp b/sycl/test-e2e/Regression/commandlist/Inputs/main.cpp index 83c76afe0dd4b..c80ea80e45335 100644 --- a/sycl/test-e2e/Regression/commandlist/Inputs/main.cpp +++ b/sycl/test-e2e/Regression/commandlist/Inputs/main.cpp @@ -44,7 +44,6 @@ int main(int argc, char *argv[]) { int nthreadsGPU = 8; int arr_size = 20; int iter_gpu = 200; - unsigned int gpu_dev = 999; unsigned int nitems = 0; bool passed = true; @@ -73,21 +72,7 @@ int main(int argc, char *argv[]) { cout << " iter GPU: " << iter_gpu << "\n"; #endif - std::vector dlist; - if (gpu_dev == 999) { - try { - auto sel = sycl::gpu_selector(); - sel_dev = sel.select_device(); - } catch (...) { - cout << "no gpu device found\n"; - } - } else { - if (gpu_dev > dlist.size() - 1) { - cout << "ERROR: selected device index [" << gpu_dev << "] is too large\n"; - exit(1); - } - sel_dev = dlist[gpu_dev]; - } + sel_dev = sycl::device(sycl::default_selector_v); std::cout << "selected dev: " << sel_dev.get_info() << "\n"; diff --git a/sycl/test-e2e/Regression/device_num.cpp b/sycl/test-e2e/Regression/device_num.cpp index 5feaf110bdfcc..cc70ea5295406 100644 --- a/sycl/test-e2e/Regression/device_num.cpp +++ b/sycl/test-e2e/Regression/device_num.cpp @@ -175,27 +175,24 @@ int main() { targetDevIndex = GetPreferredDeviceIndex(devices, info::device_type::all); assert(targetDevIndex >= 0 && "Failed to find target device for default selector."); - default_selector ds; - device d = ds.select_device(); - std::cout << "default_selector selected "; + device d(default_selector_v); + std::cout << "default_selector_v selected "; printDeviceType(d); assert(devices[targetDevIndex] == d && "The selected device is not the target device specified."); } targetDevIndex = GetPreferredDeviceIndex(devices, info::device_type::gpu); if (targetDevIndex >= 0) { - gpu_selector gs; - device d = gs.select_device(); - std::cout << "gpu_selector selected "; + device d(gpu_selector_v); + std::cout << "gpu_selector_v selected "; printDeviceType(d); assert(devices[targetDevIndex] == d && "The selected device is not the target device specified."); } targetDevIndex = GetPreferredDeviceIndex(devices, info::device_type::cpu); if (targetDevIndex >= 0) { - cpu_selector cs; - device d = cs.select_device(); - std::cout << "cpu_selector selected "; + device d(cpu_selector_v); + std::cout << "cpu_selector_v selected "; printDeviceType(d); assert(devices[targetDevIndex] == d && "The selected device is not the target device specified."); @@ -203,9 +200,8 @@ int main() { targetDevIndex = GetPreferredDeviceIndex(devices, info::device_type::accelerator); if (targetDevIndex >= 0) { - accelerator_selector as; - device d = as.select_device(); - std::cout << "accelerator_selector selected "; + device d(accelerator_selector_v); + std::cout << "accelerator_selector_v selected "; printDeviceType(d); assert(devices[targetDevIndex] == d && "The selected device is not the target device specified."); diff --git a/sycl/test-e2e/Regression/device_pci_address_bdf_format.cpp b/sycl/test-e2e/Regression/device_pci_address_bdf_format.cpp index 79ef8c29caa1e..b44ab15c214c0 100644 --- a/sycl/test-e2e/Regression/device_pci_address_bdf_format.cpp +++ b/sycl/test-e2e/Regression/device_pci_address_bdf_format.cpp @@ -40,7 +40,7 @@ int main(int argc, char **argv) { continue; std::string PCIAddress = - dev.get_info(); + dev.get_info(); std::cout << "PCI address = " << PCIAddress << std::endl; assert(std::regex_match(PCIAddress, ExpectedBDFFormat)); } diff --git a/sycl/test-e2e/Regression/group.cpp b/sycl/test-e2e/Regression/group.cpp index 6e528dccb695c..d906c3500a095 100644 --- a/sycl/test-e2e/Regression/group.cpp +++ b/sycl/test-e2e/Regression/group.cpp @@ -111,7 +111,7 @@ bool group__get_linear_id() { cgh.parallel_for( nd_range<3>{GlobalRange, LocalRange}, [=](nd_item I) { const auto Off = I.get_global_linear_id() * 3; - const auto LI = I.get_group().get_linear_id(); + const auto LI = I.get_group().get_group_linear_id(); Ptr[Off + 0] = LI; Ptr[Off + 1] = LI; Ptr[Off + 2] = LI; diff --git a/sycl/test-e2e/Regression/image_access.cpp b/sycl/test-e2e/Regression/image_access.cpp index 347430c406e37..7ade1b4fb3b4b 100644 --- a/sycl/test-e2e/Regression/image_access.cpp +++ b/sycl/test-e2e/Regression/image_access.cpp @@ -21,13 +21,13 @@ int main() { try { sycl::range<1> Range(32); - std::vector Data(Range.size() * 4, 0.0f); + std::vector Data(Range.size() * 4, 0.0f); sycl::image<1> Image(Data.data(), sycl::image_channel_order::rgba, sycl::image_channel_type::fp32, Range); sycl::queue Queue; Queue.submit([&](sycl::handler &CGH) { - sycl::accessor A(Image, CGH); @@ -35,7 +35,7 @@ int main() { }); Queue.wait_and_throw(); - sycl::accessor A(Image); From 94f6b2f567e5161b769e31222d7cb0197f0f0ee9 Mon Sep 17 00:00:00 2001 From: Nick Sarnie Date: Fri, 28 Jun 2024 12:07:32 -0400 Subject: [PATCH 19/40] [SYCL][thinLTO] Seperate module properties and symbol table generation into IR-based analysis (#14220) Based on feedback from https://github.com/intel/llvm/pull/14197, I seperated out the code that generates the module properties and symbol table into separate functions that can be called by anyone, and just looks at the IR and entry points. For now, we still call it inside `sycl-post-link` because we still support the old offloading model, but once we drop support for that we can drop this responsibility from sycl-post-link and only compute it inside `clang-linker-wrapper`, both for normal compilation and thinLTO. In a (hopefully soon) future PR I plan to call these functions from `clang-linker-wrapper` when compiling for thinLTO, which we need because we will split early. Most of this change should be NFC(I). The expected changes are: 1) New option to sycl-post-link to generate the properties file 2) Driver change to NOT pass the option from 1) in thinLTO mode 3) Two minor chages in logic from properties generation, I've called these out inline. --------- Signed-off-by: Sarnie, Nick --- clang/lib/Driver/ToolChains/Clang.cpp | 12 +- .../test/Driver/sycl-linker-wrapper-image.cpp | 2 +- clang/test/Driver/sycl-lto.cpp | 1 + .../Driver/sycl-offload-intelfpga-emu.cpp | 2 +- .../Driver/sycl-offload-intelfpga-link.cpp | 14 +- clang/test/Driver/sycl-offload-new-driver.c | 2 +- .../Driver/sycl-post-link-options-win.cpp | 2 +- clang/test/Driver/sycl-post-link-options.cpp | 6 +- .../SYCLLowerIR/ComputeModuleRuntimeInfo.h | 45 ++ .../include/llvm/SYCLLowerIR/ModuleSplitter.h | 3 +- .../llvm/SYCLLowerIR}/SYCLDeviceLibReqMask.h | 0 .../llvm/SYCLLowerIR/SYCLDeviceRequirements.h | 10 +- .../SYCLLowerIR}/SYCLKernelParamOptInfo.h | 2 + .../llvm/SYCLLowerIR}/SpecConstants.h | 0 .../llvm/SYCLLowerIR}/Support.h | 0 llvm/lib/SYCLLowerIR/CMakeLists.txt | 4 + .../SYCLLowerIR/ComputeModuleRuntimeInfo.cpp | 379 +++++++++++++++++ llvm/lib/SYCLLowerIR/ModuleSplitter.cpp | 8 +- .../SYCLLowerIR}/SYCLDeviceLibReqMask.cpp | 4 +- .../SYCLLowerIR/SYCLDeviceRequirements.cpp | 7 +- .../SYCLLowerIR}/SYCLKernelParamOptInfo.cpp | 2 +- .../SYCLLowerIR}/SpecConstants.cpp | 4 +- .../assert/indirect-with-split-2.ll | 2 +- .../assert/indirect-with-split.ll | 2 +- .../tools/sycl-post-link/assert/property-1.ll | 8 +- .../tools/sycl-post-link/assert/property-2.ll | 2 +- .../device-code-split/auto-module-split-1.ll | 2 +- .../device-code-split/auto-module-split-2.ll | 2 +- .../device-code-split/auto-module-split-3.ll | 2 +- .../auto-module-split-func-ptr.ll | 2 +- .../device-code-split/basic-module-split.ll | 2 +- .../complex-indirect-call-chain.ll | 6 +- .../one-kernel-per-module.ll | 2 +- .../device-code-split/per-aspect-split-1.ll | 6 +- .../device-code-split/per-aspect-split-2.ll | 2 +- .../device-code-split/per-aspect-split-3.ll | 2 +- .../device-code-split/per-aspect-split-4.ll | 2 +- .../device-code-split/per-joint-matrix-1.ll | 6 +- .../device-code-split/per-joint-matrix-2.ll | 6 +- .../device-code-split/per-joint-matrix-3.ll | 2 +- .../per-joint-matrix-mad-1.ll | 6 +- .../per-joint-matrix-mad-2.ll | 6 +- .../per-joint-matrix-mad-3.ll | 2 +- .../per-joint-matrix-mad-4.ll | 6 +- .../per-joint-matrix-mad-5.ll | 6 +- .../per-reqd-sub-group-size-split-1.ll | 6 +- .../per-reqd-sub-group-size-split-2.ll | 2 +- .../per-reqd-wg-size-split-1.ll | 6 +- .../per-reqd-wg-size-split-2.ll | 2 +- .../per-reqd-wg-size-split-3.ll | 2 +- .../device-code-split/split-with-func-ptrs.ll | 6 +- .../split-with-kernel-declarations.ll | 4 +- .../device-code-split/vtable.ll | 2 +- .../device-globals/test_global_variable.ll | 2 +- .../test_global_variable_drop_used.ll | 2 +- ...st_global_variable_drop_used_opaque_ptr.ll | 2 +- ...bal_variable_many_kernels_in_one_module.ll | 2 +- ...bal_variable_many_modules_no_dev_global.ll | 2 +- ..._variable_many_modules_no_dev_img_scope.ll | 2 +- ...lobal_variable_many_modules_two_vars_ok.ll | 2 +- ...t_global_variable_name_mapping_metadata.ll | 2 +- .../test_global_variable_trim_used.ll | 2 +- ...st_global_variable_trim_used_opaque_ptr.ll | 2 +- ...no_property_set_header_for_an_empty_set.ll | 2 +- .../device-requirements/aspects.ll | 4 +- .../device-requirements/fixed-target.ll | 2 +- .../device-requirements/joint-matrix.ll | 2 +- .../reqd-sub-group-size.ll | 2 +- .../reqd-work-group-size.ll | 2 +- .../sycl-post-link/device-sanitizer/asan.ll | 2 +- .../sycl-post-link/emit_exported_symbols.ll | 6 +- .../sycl-post-link/emit_imported_symbols.ll | 6 +- .../sycl-post-link/emit_program_metadata.ll | 2 +- llvm/test/tools/sycl-post-link/erase_used.ll | 4 +- .../tools/sycl-post-link/erase_used_decl.ll | 2 +- .../sycl-post-link/erase_used_decl_opaque.ll | 2 +- .../tools/sycl-post-link/erase_used_opaque.ll | 4 +- .../exclude_external_functions.ll | 2 +- .../exclude_external_functions_source.ll | 2 +- llvm/test/tools/sycl-post-link/help.test | 7 +- .../multiple-filtered-outputs.ll | 2 +- .../sycl-post-link/no-args-to-eliminate.ll | 2 +- .../sycl-post-link/no-split-unused-func.ll | 2 +- .../tools/sycl-post-link/omit_kernel_args.ll | 2 +- .../tools/sycl-post-link/registerallocmode.ll | 2 +- .../sycl-post-link/skip-properties-gen.ll | 25 ++ ...CL-2020-zeroinitializer-array-of-arrays.ll | 4 +- .../SYCL-2020-zeroinitializer.ll | 2 +- .../spec-constants/SYCL-2020.ll | 8 +- .../spec-constants/SYCL-alloca-error.ll | 2 +- .../spec-constants/SYCL-alloca.ll | 4 +- .../SYCL2020-struct-with-undef-padding.ll | 4 +- .../sycl-post-link/spec-constants/bool.ll | 4 +- .../spec-constants/composite-O2.ll | 2 +- .../composite-default-value-padding.ll | 4 +- .../spec-constants/composite-no-sret.ll | 2 +- .../spec-constants/composite-padding-desc.ll | 4 +- .../default-value/SYCL-alloca.ll | 2 +- .../spec-constants/default-value/bool.ll | 4 +- .../default-value/device-image.ll | 4 +- .../spec-constants/default-value/esimd.ll | 4 +- .../default-value/split-by-kernel.ll | 2 +- .../default-value/split-by-source.ll | 2 +- .../default-value/struct-with-padding.ll | 4 +- .../spec-constants/nested-struct.ll | 4 +- .../remove-dead-private-constants.ll | 4 +- .../struct-with-padding-in-the-middle.ll | 4 +- .../struct-with-undef-padding-2.ll | 4 +- .../sycl-esimd/basic-esimd-lower.ll | 8 +- .../sycl-esimd/basic-sycl-esimd-split.ll | 2 +- .../sycl-esimd/invoke-esimd-double.ll | 2 +- .../sycl-esimd/lower-with-no-esimd-entry.ll | 2 +- .../sycl-esimd/nbarriers-metadata.ll | 2 +- .../no-sycl-esimd-split-shared-func.ll | 2 +- .../sycl-esimd/no-sycl-esimd-split.ll | 2 +- .../sycl-post-link/sycl-esimd/optnone.ll | 4 +- .../sycl-esimd/sycl-esimd-split-per-kernel.ll | 2 +- .../sycl-esimd/sycl-esimd-split-per-source.ll | 2 +- .../sycl-esimd/sycl-esimd-split-symbols.ll | 2 +- .../split-and-lower-esimd.ll | 2 +- .../sycl-external-funcs/split-per-kernel.ll | 4 +- .../sycl-external-funcs/split-per-source1.ll | 4 +- .../sycl-external-funcs/split-per-source2.ll | 4 +- .../tools/sycl-post-link/sycl-grf-size.ll | 2 +- .../sycl-opt-level-external-funcs.ll | 2 +- .../tools/sycl-post-link/sycl-opt-level.ll | 2 +- .../sycl-post-link/sycl-post-link-test.ll | 2 +- .../tools/sycl-post-link/sym_but_no_split.ll | 2 +- llvm/tools/sycl-post-link/CMakeLists.txt | 3 - llvm/tools/sycl-post-link/sycl-post-link.cpp | 392 ++---------------- .../SYCL-2020-spec-const-ids-order.cpp | 2 +- .../basic_tests/SYCL-2020-spec-constants.cpp | 4 +- .../sycl-kernel-save-user-names.cpp | 2 +- .../check_device_code/esimd/NBarrierAttr.cpp | 2 +- sycl/test/check_device_code/esimd/dpas.cpp | 2 +- .../check_device_code/esimd/fp16_converts.cpp | 2 +- .../esimd/genx_func_attr.cpp | 2 +- sycl/test/check_device_code/esimd/glob.cpp | 2 +- .../check_device_code/esimd/intrins_trans.cpp | 4 +- .../esimd/lower-external-funcs.cpp | 2 +- sycl/test/check_device_code/esimd/lsc.cpp | 4 +- .../esimd/memory_properties_atomic_update.cpp | 4 +- .../memory_properties_copytocopyfrom.cpp | 4 +- .../esimd/memory_properties_gather.cpp | 4 +- .../esimd/memory_properties_load_store.cpp | 4 +- .../esimd/memory_properties_prefetch_2d.cpp | 4 +- .../esimd/memory_properties_scatter.cpp | 4 +- .../esimd/slm_init_specconst_size.cpp | 2 +- .../native_cpu/offload-wrapper.cpp | 2 +- sycl/test/esimd/sycl_half_basic_ops.cpp | 2 +- sycl/test/esimd/sycl_half_math_ops.cpp | 2 +- sycl/test/extensions/private_alloca.cpp | 2 +- .../optional_kernel_features/fp-accuracy.c | 6 +- .../optional_kernel_features/fp-accuracy.cpp | 6 +- 154 files changed, 732 insertions(+), 600 deletions(-) create mode 100644 llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h rename llvm/{tools/sycl-post-link => include/llvm/SYCLLowerIR}/SYCLDeviceLibReqMask.h (100%) rename llvm/{tools/sycl-post-link => include/llvm/SYCLLowerIR}/SYCLKernelParamOptInfo.h (99%) rename llvm/{tools/sycl-post-link => include/llvm/SYCLLowerIR}/SpecConstants.h (100%) rename llvm/{tools/sycl-post-link => include/llvm/SYCLLowerIR}/Support.h (100%) create mode 100644 llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp rename llvm/{tools/sycl-post-link => lib/SYCLLowerIR}/SYCLDeviceLibReqMask.cpp (99%) rename llvm/{tools/sycl-post-link => lib/SYCLLowerIR}/SYCLKernelParamOptInfo.cpp (96%) rename llvm/{tools/sycl-post-link => lib/SYCLLowerIR}/SpecConstants.cpp (99%) create mode 100644 llvm/test/tools/sycl-post-link/skip-properties-gen.ll diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 976bc1a9be6cc..9dada7cf351fc 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -10726,6 +10726,11 @@ static void getNonTripleBasedSYCLPostLinkOpts(const ToolChain &TC, if (TCArgs.hasFlag(options::OPT_fno_sycl_esimd_force_stateless_mem, options::OPT_fsycl_esimd_force_stateless_mem, false)) addArgs(PostLinkArgs, TCArgs, {"-lower-esimd-force-stateless-mem=false"}); + + bool IsUsingLTO = TC.getDriver().isUsingLTO(/*IsDeviceOffloadAction=*/true); + auto LTOMode = TC.getDriver().getLTOMode(/*IsDeviceOffloadAction=*/true); + if (!IsUsingLTO || LTOMode != LTOK_Thin) + addArgs(PostLinkArgs, TCArgs, {"-properties"}); } // Add any sycl-post-link options that rely on a specific Triple in addition @@ -10780,9 +10785,12 @@ static void getTripleBasedSYCLPostLinkOpts(const ToolChain &TC, bool SplitEsimd = TCArgs.hasFlag( options::OPT_fsycl_device_code_split_esimd, options::OPT_fno_sycl_device_code_split_esimd, SplitEsimdByDefault); - // Symbol file and specialization constant info generation is mandatory - + bool IsUsingLTO = TC.getDriver().isUsingLTO(/*IsDeviceOffloadAction=*/true); + auto LTOMode = TC.getDriver().getLTOMode(/*IsDeviceOffloadAction=*/true); + if (!IsUsingLTO || LTOMode != LTOK_Thin) + addArgs(PostLinkArgs, TCArgs, {"-symbols"}); + // Specialization constant info generation is mandatory - // add options unconditionally - addArgs(PostLinkArgs, TCArgs, {"-symbols"}); addArgs(PostLinkArgs, TCArgs, {"-emit-exported-symbols"}); addArgs(PostLinkArgs, TCArgs, {"-emit-imported-symbols"}); if (SplitEsimd) diff --git a/clang/test/Driver/sycl-linker-wrapper-image.cpp b/clang/test/Driver/sycl-linker-wrapper-image.cpp index cfbebd7c01a6a..a8be834a19690 100644 --- a/clang/test/Driver/sycl-linker-wrapper-image.cpp +++ b/clang/test/Driver/sycl-linker-wrapper-image.cpp @@ -5,7 +5,7 @@ // RUN: clang-offload-packager -o %t.fat --image=file=%t.device.bc,kind=sycl,triple=spir64-unknown-unknown // RUN: %clang -cc1 %s -triple=x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.fat // RUN: clang-linker-wrapper --print-wrapped-module --host-triple=x86_64-unknown-linux-gnu \ -// RUN: -sycl-device-library-location=%S/Inputs -sycl-post-link-options="-split=auto -symbols" \ +// RUN: -sycl-device-library-location=%S/Inputs -sycl-post-link-options="-split=auto -symbols -properties" \ // RUN: %t.o -o %t.out 2>&1 --linker-path="/usr/bin/ld" | FileCheck %s template diff --git a/clang/test/Driver/sycl-lto.cpp b/clang/test/Driver/sycl-lto.cpp index 1c14219daa342..77c75b348f7fd 100644 --- a/clang/test/Driver/sycl-lto.cpp +++ b/clang/test/Driver/sycl-lto.cpp @@ -12,6 +12,7 @@ // RUN: %clangxx -fsycl --offload-new-driver -foffload-lto=thin %s -### 2>&1 | FileCheck -check-prefix=CHECK_SUPPORTED %s // CHECK_SUPPORTED: clang{{.*}} "-cc1" "-triple" "spir64-unknown-unknown" {{.*}} "-flto=thin" "-flto-unit" // CHECK_SUPPORTED: sycl-post-link{{.*}} +// CHECK_SUPPORTED-NOT: -properties // CHECK_SUPPORTED-NEXT: file-table-tform{{.*}} // CHECK_SUPPORTED-NEXT: llvm-foreach{{.*}} "--" {{.*}}clang{{.*}} "-fsycl-is-device"{{.*}} "-flto=thin" "-flto-unit" // CHECK_SUPPORTED-NEXT: file-table-tform{{.*}} diff --git a/clang/test/Driver/sycl-offload-intelfpga-emu.cpp b/clang/test/Driver/sycl-offload-intelfpga-emu.cpp index 2a01cf719801e..cabbdd17b2d0b 100644 --- a/clang/test/Driver/sycl-offload-intelfpga-emu.cpp +++ b/clang/test/Driver/sycl-offload-intelfpga-emu.cpp @@ -178,7 +178,7 @@ // CHK-FPGA-AOCX-SRC: clang-offload-wrapper{{.*}} "-o=[[WRAPOUT:.+\.bc]]" {{.*}} "-target=spir64_fpga" "-kind=sycl" "--sym-prop-bc-files=[[SYM_AND_PROP]]" "-batch" "[[TABLEOUT]]" // CHK-FPGA-AOCX-SRC: llc{{.*}} "-filetype=obj" "-o" "[[LLCOUT:.+\.(o|obj)]]" "[[WRAPOUT]]" // CHK-FPGA-AOCX-SRC: llvm-link{{.*}} "[[DEVICEBC]]" "-o" "[[LLVMLINKOUT:.+\.bc]]" "--suppress-warnings" -// CHK-FPGA-AOCX-SRC: sycl-post-link{{.*}} "-O2" "-device-globals" "-spec-const=emulation"{{.*}} "-o" "[[POSTLINKOUT:.+\.table]]" "[[LLVMLINKOUT]] +// CHK-FPGA-AOCX-SRC: sycl-post-link{{.*}} "-O2" "-device-globals" "-properties" "-spec-const=emulation"{{.*}} "-o" "[[POSTLINKOUT:.+\.table]]" "[[LLVMLINKOUT]] // CHK-FPGA-AOCX-SRC: file-table-tform{{.*}} "-o" "[[TABLEOUT:.+\.txt]]" "[[POSTLINKOUT]]" // CHK-FPGA-AOCX-SRC: llvm-spirv{{.*}} "-o" "[[LLVMSPVOUT:.+\.txt]]" {{.*}} "[[TABLEOUT]]" // CHK-FPGA-AOCX-SRC: opencl-aot{{.*}} "-device=fpga_fast_emu" "-spv=[[LLVMSPVOUT]]" "-ir=[[OUTPUT4:.+\.aocx]]" "--bo=-g" diff --git a/clang/test/Driver/sycl-offload-intelfpga-link.cpp b/clang/test/Driver/sycl-offload-intelfpga-link.cpp index d816f01b4a6e7..87b4e60de1339 100644 --- a/clang/test/Driver/sycl-offload-intelfpga-link.cpp +++ b/clang/test/Driver/sycl-offload-intelfpga-link.cpp @@ -13,7 +13,7 @@ // CHK-FPGA-LINK-NOT: clang-offload-bundler{{.*}} // CHK-FPGA-LINK: spirv-to-ir-wrapper{{.*}} "[[OUTPUT1]]" "-o" "[[IROUTPUT1:.+\.bc]]" // CHK-FPGA-LINK: llvm-link{{.*}} "[[IROUTPUT1]]" "-o" "[[OUTPUT2_1:.+\.bc]]" -// CHK-FPGA-LINK: sycl-post-link{{.*}} "-O2" "-device-globals" "-spec-const=emulation"{{.*}} "-o" "[[OUTPUT2:.+\.table]]" "[[OUTPUT2_1]]" +// CHK-FPGA-LINK: sycl-post-link{{.*}} "-O2" "-device-globals" "-properties" "-spec-const=emulation"{{.*}} "-o" "[[OUTPUT2:.+\.table]]" "[[OUTPUT2_1]]" // CHK-FPGA-LINK: file-table-tform{{.*}} "-o" "[[TABLEOUT:.+\.txt]]" "[[OUTPUT2]]" // CHK-FPGA-LINK: llvm-spirv{{.*}} "-o" "[[OUTPUT3:.+\.txt]]" "-spirv-max-version={{.*}}"{{.*}} "[[TABLEOUT]]" // CHK-FPGA-EARLY: aoc{{.*}} "-o" "[[OUTPUT4:.+\.aocr]]" "[[OUTPUT3]]" "-sycl" "-rtl" @@ -46,7 +46,7 @@ // CHK-FPGA-LINK-WIN-NOT: clang-offload-bundler{{.*}} // CHK-FPGA-LINK-WIN: spirv-to-ir-wrapper{{.*}} "[[OUTPUT1]]" "-o" "[[IROUTPUT1:.+\.bc]]" // CHK-FPGA-LINK-WIN: llvm-link{{.*}} "[[IROUTPUT1]]" "-o" "[[OUTPUT2_1:.+\.bc]]" -// CHK-FPGA-LINK-WIN: sycl-post-link{{.*}} "-O2" "-device-globals" "-spec-const=emulation"{{.*}} "-o" "[[OUTPUT2:.+\.table]]" "[[OUTPUT2_1]]" +// CHK-FPGA-LINK-WIN: sycl-post-link{{.*}} "-O2" "-device-globals" "-properties" "-spec-const=emulation"{{.*}} "-o" "[[OUTPUT2:.+\.table]]" "[[OUTPUT2_1]]" // CHK-FPGA-LINK-WIN: file-table-tform{{.*}} "-o" "[[TABLEOUT:.+\.txt]]" "[[OUTPUT2]]" // CHK-FPGA-LINK-WIN: llvm-spirv{{.*}} "-o" "[[OUTPUT3:.+\.txt]]" "-spirv-max-version={{.*}}"{{.*}} "[[TABLEOUT]]" // CHK-FPGA-LINK-WIN: aoc{{.*}} "-o" "[[OUTPUT5:.+\.aocr]]" "[[OUTPUT3]]" "-sycl" "-rtl" @@ -175,7 +175,7 @@ // CHK-FPGA: clang-offload-bundler{{.*}} "-type=o" "-targets=host-x86_64-unknown-linux-gnu,sycl-spir64_fpga-unknown-unknown" {{.*}} "-output=[[FINALLINK2x:.+\.o]]" "-output=[[OUTPUT1:.+\.o]]" "-unbundle" // CHK-FPGA: spirv-to-ir-wrapper{{.*}} "[[OUTPUT1]]" "-o" "[[IROUTPUT1:.+\.bc]]" // CHK-FPGA: llvm-link{{.*}} "[[IROUTPUT1]]" "-o" "[[OUTPUT2_BC:.+\.bc]]" -// CHK-FPGA: sycl-post-link{{.*}} "-O2" "-device-globals" "-spec-const=emulation"{{.*}} "-o" "[[OUTPUT3_TABLE:.+\.table]]" "[[OUTPUT2_BC]]" +// CHK-FPGA: sycl-post-link{{.*}} "-O2" "-device-globals" "-properties" "-spec-const=emulation"{{.*}} "-o" "[[OUTPUT3_TABLE:.+\.table]]" "[[OUTPUT2_BC]]" // CHK-FPGA: file-table-tform{{.*}} "-o" "[[TABLEOUT:.+\.txt]]" "[[OUTPUT3_TABLE]]" // CHK-FPGA: llvm-spirv{{.*}} "-o" "[[OUTPUT5:.+\.txt]]" "-spirv-max-version={{.*}}"{{.*}} "[[TABLEOUT]]" // CHK-FPGA: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-fpga_dep" {{.*}} "-output=[[DEPFILE:.+\.d]]" "-unbundle" @@ -237,7 +237,7 @@ // CHK-FPGA-AOCX-SRC: clang-offload-wrapper{{.*}} "-o=[[WRAPOUT:.+\.bc]]" {{.*}} "-target=spir64_fpga" "-kind=sycl" "--sym-prop-bc-files=[[SYM_AND_PROP]]" "-batch" "[[TABLEOUT]]" // CHK-FPGA-AOCX-SRC: llc{{.*}} "-filetype=obj" "-o" "[[LLCOUT:.+\.(o|obj)]]" "[[WRAPOUT]]" // CHK-FPGA-AOCX-SRC: llvm-link{{.*}} "[[DEVICEBC]]" "-o" "[[LLVMLINKOUT:.+\.bc]]" "--suppress-warnings" -// CHK-FPGA-AOCX-SRC: sycl-post-link{{.*}} "-O2" "-device-globals" "-spec-const=emulation"{{.*}} "-o" "[[POSTLINKOUT:.+\.table]]" "[[LLVMLINKOUT]] +// CHK-FPGA-AOCX-SRC: sycl-post-link{{.*}} "-O2" "-device-globals" "-properties" "-spec-const=emulation"{{.*}} "-o" "[[POSTLINKOUT:.+\.table]]" "[[LLVMLINKOUT]] // CHK-FPGA-AOCX-SRC: file-table-tform{{.*}} "-o" "[[TABLEOUT:.+\.txt]]" "[[POSTLINKOUT]]" // CHK-FPGA-AOCX-SRC: llvm-spirv{{.*}} "-o" "[[LLVMSPVOUT:.+\.txt]]" {{.*}} "[[TABLEOUT]]" // CHK-FPGA-AOCX-SRC: aoc{{.*}} "-o" "[[AOCOUT:.+\.aocx]]" "[[LLVMSPVOUT]]" "-sycl" @@ -263,7 +263,7 @@ // CHK-FPGA-AOCX-OBJ: clang-offload-bundler{{.*}} "-type=o" {{.*}} "-output=[[HOSTOBJx:.+\.(o|obj)]]" "-output=[[DEVICEOBJ:.+\.(o|obj)]]" "-unbundle" // CHK-FPGA-AOCX-OBJ: spirv-to-ir-wrapper{{.*}} "[[DEVICEOBJ]]" "-o" "[[IROUTPUT:.+\.bc]]" // CHK-FPGA-AOCX-OBJ: llvm-link{{.*}} "[[IROUTPUT]]" "-o" "[[LLVMLINKOUT:.+\.bc]]" "--suppress-warnings" -// CHK-FPGA-AOCX-OBJ: sycl-post-link{{.*}} "-O2" "-device-globals" "-spec-const=emulation"{{.*}} "-o" "[[POSTLINKOUT:.+\.table]]" "[[LLVMLINKOUT]] +// CHK-FPGA-AOCX-OBJ: sycl-post-link{{.*}} "-O2" "-device-globals" "-properties" "-spec-const=emulation"{{.*}} "-o" "[[POSTLINKOUT:.+\.table]]" "[[LLVMLINKOUT]] // CHK-FPGA-AOCX-OBJ: file-table-tform{{.*}} "-o" "[[TABLEOUT:.+\.txt]]" "[[POSTLINKOUT]]" // CHK-FPGA-AOCX-OBJ: llvm-spirv{{.*}} "-o" "[[LLVMSPVOUT:.+\.txt]]" {{.*}} "[[TABLEOUT]]" // CHK-FPGA-AOCX-OBJ: aoc{{.*}} "-o" "[[AOCOUT:.+\.aocx]]" "[[LLVMSPVOUT]]" "-sycl" @@ -283,7 +283,7 @@ // CHK-FPGA-AOCX-OBJ2: clang-offload-bundler{{.*}} "-type=o" {{.*}} "-output=[[HOSTOBJx:.+\.(o|obj)]]" "-output=[[DEVICEOBJ:.+\.(o|obj)]]" "-output=[[DEVICEOBJ2:.+\.(o|obj)]]" "-unbundle" // CHK-FPGA-AOCX-OBJ2: spirv-to-ir-wrapper{{.*}} "[[DEVICEOBJ]]" "-o" "[[IROUTPUT:.+\.bc]]" // CHK-FPGA-AOCX-OBJ2: llvm-link{{.*}} "[[IROUTPUT]]" "-o" "[[LLVMLINKOUT:.+\.bc]]" "--suppress-warnings" -// CHK-FPGA-AOCX-OBJ2: sycl-post-link{{.*}} "-O2" "-device-globals" "-spec-const=native"{{.*}} "-o" "[[POSTLINKOUT:.+\.table]]" "[[LLVMLINKOUT]]" +// CHK-FPGA-AOCX-OBJ2: sycl-post-link{{.*}} "-O2" "-device-globals" "-properties" "-spec-const=native"{{.*}} "-o" "[[POSTLINKOUT:.+\.table]]" "[[LLVMLINKOUT]]" // CHK-FPGA-AOCX-OBJ2: file-table-tform{{.*}} "-o" "[[TABLEOUT:.+\.txt]]" "[[POSTLINKOUT]]" // CHK-FPGA-AOCX-OBJ2: llvm-spirv{{.*}} "-o" "[[LLVMSPVOUT:.+\.txt]]" {{.*}} "[[TABLEOUT]]" // CHK-FPGA-AOCX-OBJ2: file-table-tform{{.*}} "-replace=Code,Code" "-o" "[[TFORM_OUT:.+\.table]]" "[[POSTLINKOUT]]" "[[LLVMSPVOUT]]" @@ -291,7 +291,7 @@ // CHK-FPGA-AOCX-OBJ2: llc{{.*}} "-filetype=obj" "-o" "[[LLCOUT:.+\.(o|obj)]]" "[[WRAPOUT]]" // CHK-FPGA-AOCX-OBJ2: spirv-to-ir-wrapper{{.*}} "[[DEVICEOBJ2]]" "-o" "[[IROUTPUT2:.+\.bc]]" // CHK-FPGA-AOCX-OBJ2: llvm-link{{.*}} "[[IROUTPUT2]]" "-o" "[[LLVMLINKOUT2:.+\.bc]]" "--suppress-warnings" -// CHK-FPGA-AOCX-OBJ2: sycl-post-link{{.*}} "-O2" "-device-globals" "-spec-const=emulation"{{.*}} "-o" "[[POSTLINKOUT2:.+\.table]]" "[[LLVMLINKOUT2]]" +// CHK-FPGA-AOCX-OBJ2: sycl-post-link{{.*}} "-O2" "-device-globals" "-properties" "-spec-const=emulation"{{.*}} "-o" "[[POSTLINKOUT2:.+\.table]]" "[[LLVMLINKOUT2]]" // CHK-FPGA-AOCX-OBJ2: file-table-tform{{.*}} "-o" "[[TABLEOUT2:.+\.txt]]" "[[POSTLINKOUT2]]" // CHK-FPGA-AOCX-OBJ2: llvm-spirv{{.*}} "-o" "[[LLVMSPVOUT2:.+\.txt]]" {{.*}} "[[TABLEOUT2]]" // CHK-FPGA-AOCX-OBJ2: aoc{{.*}} "-o" "[[AOCOUT:.+\.aocx]]" "[[LLVMSPVOUT2]]" "-sycl" diff --git a/clang/test/Driver/sycl-offload-new-driver.c b/clang/test/Driver/sycl-offload-new-driver.c index 405bf64f5ccf6..90c4e7437ca64 100644 --- a/clang/test/Driver/sycl-offload-new-driver.c +++ b/clang/test/Driver/sycl-offload-new-driver.c @@ -60,7 +60,7 @@ // RUN: %clangxx --target=x86_64-unknown-linux-gnu -fsycl --offload-new-driver \ // RUN: -Xdevice-post-link -post-link-opt -### %s 2>&1 \ // RUN: | FileCheck -check-prefix WRAPPER_OPTIONS_POSTLINK %s -// WRAPPER_OPTIONS_POSTLINK: clang-linker-wrapper{{.*}} "--sycl-post-link-options=-O2 -device-globals -post-link-opt" +// WRAPPER_OPTIONS_POSTLINK: clang-linker-wrapper{{.*}} "--sycl-post-link-options=-O2 -device-globals -properties -post-link-opt" // -fsycl-device-only behavior // RUN: %clangxx --target=x86_64-unknown-linux-gnu -fsycl --offload-new-driver \ diff --git a/clang/test/Driver/sycl-post-link-options-win.cpp b/clang/test/Driver/sycl-post-link-options-win.cpp index 65a802d1f0210..d68d4881c9404 100644 --- a/clang/test/Driver/sycl-post-link-options-win.cpp +++ b/clang/test/Driver/sycl-post-link-options-win.cpp @@ -3,7 +3,7 @@ // RUN: %clangxx -### --target=x86_64-pc-windows-msvc -fsycl \ // RUN: -Xdevice-post-link -O0 %s 2>&1 \ // RUN: | FileCheck -check-prefix OPTIONS_POSTLINK_JIT_OLD %s -// OPTIONS_POSTLINK_JIT_OLD: sycl-post-link{{.*}} "-O2" "-device-globals" "-spec-const=native" "-split=auto" "-emit-only-kernels-as-entry-points" "-emit-param-info" "-symbols" "-emit-exported-symbols" "-emit-imported-symbols" "-split-esimd" "-lower-esimd" "-O0" +// OPTIONS_POSTLINK_JIT_OLD: sycl-post-link{{.*}} "-O2" "-device-globals" "-properties" "-spec-const=native" "-split=auto" "-emit-only-kernels-as-entry-points" "-emit-param-info" "-symbols" "-emit-exported-symbols" "-emit-imported-symbols" "-split-esimd" "-lower-esimd" "-O0" // RUN: %clang -cc1 %s -triple x86_64-pc-windows-msvc -emit-obj -o %t.elf.o // RUN: clang-offload-packager -o %t.out --image=file=%t.elf.o,kind=sycl,triple=spir64 diff --git a/clang/test/Driver/sycl-post-link-options.cpp b/clang/test/Driver/sycl-post-link-options.cpp index 4f81fb424ec7c..55a6b94feb86e 100644 --- a/clang/test/Driver/sycl-post-link-options.cpp +++ b/clang/test/Driver/sycl-post-link-options.cpp @@ -3,7 +3,7 @@ // RUN: %clangxx --target=x86_64-unknown-linux-gnu -fsycl -### \ // RUN: -Xdevice-post-link -O0 %s 2>&1 \ // RUN: | FileCheck -check-prefix OPTIONS_POSTLINK_JIT_OLD %s -// OPTIONS_POSTLINK_JIT_OLD: sycl-post-link{{.*}} "-O2" "-device-globals" "-spec-const=native" "-split=auto" "-emit-only-kernels-as-entry-points" "-emit-param-info" "-symbols" "-emit-exported-symbols" "-emit-imported-symbols" "-split-esimd" "-lower-esimd" "-O0" +// OPTIONS_POSTLINK_JIT_OLD: sycl-post-link{{.*}} "-O2" "-device-globals" "-properties" "-spec-const=native" "-split=auto" "-emit-only-kernels-as-entry-points" "-emit-param-info" "-symbols" "-emit-exported-symbols" "-emit-imported-symbols" "-split-esimd" "-lower-esimd" "-O0" // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.elf.o // RUN: clang-offload-packager -o %t.out --image=file=%t.elf.o,kind=sycl,triple=spir64 @@ -11,6 +11,6 @@ // RUN: -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu \ // RUN: -sycl-device-library-location=%S/Inputs -sycl-device-libraries=libsycl-crt.new.o \ -// RUN: --sycl-post-link-options="-O2 -device-globals -O0" \ +// RUN: --sycl-post-link-options="-O2 -device-globals -properties -O0" \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck --check-prefix OPTIONS_POSTLINK_JIT_NEW %s -// OPTIONS_POSTLINK_JIT_NEW: sycl-post-link{{.*}} -spec-const=native -split=auto -emit-only-kernels-as-entry-points -emit-param-info -symbols -emit-exported-symbols -emit-imported-symbols -split-esimd -lower-esimd -O2 -device-globals -O0 +// OPTIONS_POSTLINK_JIT_NEW: sycl-post-link{{.*}} -spec-const=native -split=auto -emit-only-kernels-as-entry-points -emit-param-info -symbols -emit-exported-symbols -emit-imported-symbols -split-esimd -lower-esimd -O2 -device-globals -properties -O0 diff --git a/llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h b/llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h new file mode 100644 index 0000000000000..eaeecb44deb03 --- /dev/null +++ b/llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h @@ -0,0 +1,45 @@ +//===- ComputeModuleRuntimeInfo.h - compute runtime info for module -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Functions for computing module properties and symbols for SYCL modules. +//===----------------------------------------------------------------------===// + +#pragma once + +#include "llvm/ADT/SetVector.h" +#include "llvm/SYCLLowerIR/ModuleSplitter.h" +#include "llvm/Support/PropertySetIO.h" +#include +namespace llvm { + +class Function; +class Module; + +namespace sycl { + +struct GlobalBinImageProps { + bool EmitKernelParamInfo; + bool EmitProgramMetadata; + bool EmitExportedSymbols; + bool EmitImportedSymbols; + bool EmitDeviceGlobalPropSet; +}; +bool isModuleUsingAsan(const Module &M); +using PropSetRegTy = llvm::util::PropertySetRegistry; +using EntryPointSet = SetVector; + +PropSetRegTy computeModuleProperties(const Module &M, + const EntryPointSet &EntryPoints, + const GlobalBinImageProps &GlobProps, + bool SpecConstsMet, + bool IsSpecConstantDefault); + +std::string computeModuleSymbolTable(const Module &M, + const EntryPointSet &EntryPoints); + +} // namespace sycl +} // namespace llvm diff --git a/llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h b/llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h index 8f485ad0c1667..2465bd212b6e0 100644 --- a/llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h +++ b/llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h @@ -207,7 +207,7 @@ class ModuleDesc { const SYCLDeviceRequirements &getOrComputeDeviceRequirements() const { if (!Reqs.has_value()) - Reqs = computeDeviceRequirements(*this); + Reqs = computeDeviceRequirements(getModule(), entries()); return *Reqs; } @@ -306,6 +306,7 @@ struct ModuleSplitterSettings { Expected> splitSYCLModule(std::unique_ptr M, ModuleSplitterSettings Settings); +bool isESIMDFunction(const Function &F); bool canBeImportedFunction(const Function &F); } // namespace module_split diff --git a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.h b/llvm/include/llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h similarity index 100% rename from llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.h rename to llvm/include/llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h diff --git a/llvm/include/llvm/SYCLLowerIR/SYCLDeviceRequirements.h b/llvm/include/llvm/SYCLLowerIR/SYCLDeviceRequirements.h index 8891f7f550c5f..8f67b115c43f9 100644 --- a/llvm/include/llvm/SYCLLowerIR/SYCLDeviceRequirements.h +++ b/llvm/include/llvm/SYCLLowerIR/SYCLDeviceRequirements.h @@ -8,6 +8,7 @@ #pragma once +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" @@ -19,12 +20,10 @@ #include namespace llvm { - +class Function; +class Module; class StringRef; -namespace module_split { -class ModuleDesc; -} namespace util { class PropertyValue; } @@ -54,6 +53,7 @@ struct SYCLDeviceRequirements { }; SYCLDeviceRequirements -computeDeviceRequirements(const module_split::ModuleDesc &M); +computeDeviceRequirements(const Module &M, + const SetVector &EntryPoints); } // namespace llvm diff --git a/llvm/tools/sycl-post-link/SYCLKernelParamOptInfo.h b/llvm/include/llvm/SYCLLowerIR/SYCLKernelParamOptInfo.h similarity index 99% rename from llvm/tools/sycl-post-link/SYCLKernelParamOptInfo.h rename to llvm/include/llvm/SYCLLowerIR/SYCLKernelParamOptInfo.h index 00eeadedc1e00..c8331578a00c0 100644 --- a/llvm/tools/sycl-post-link/SYCLKernelParamOptInfo.h +++ b/llvm/include/llvm/SYCLLowerIR/SYCLKernelParamOptInfo.h @@ -12,6 +12,8 @@ // attached to kernel functions in a module. //===----------------------------------------------------------------------===// +#pragma once + #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringRef.h" diff --git a/llvm/tools/sycl-post-link/SpecConstants.h b/llvm/include/llvm/SYCLLowerIR/SpecConstants.h similarity index 100% rename from llvm/tools/sycl-post-link/SpecConstants.h rename to llvm/include/llvm/SYCLLowerIR/SpecConstants.h diff --git a/llvm/tools/sycl-post-link/Support.h b/llvm/include/llvm/SYCLLowerIR/Support.h similarity index 100% rename from llvm/tools/sycl-post-link/Support.h rename to llvm/include/llvm/SYCLLowerIR/Support.h diff --git a/llvm/lib/SYCLLowerIR/CMakeLists.txt b/llvm/lib/SYCLLowerIR/CMakeLists.txt index 9160029e18c79..6bd5e9eb719b7 100644 --- a/llvm/lib/SYCLLowerIR/CMakeLists.txt +++ b/llvm/lib/SYCLLowerIR/CMakeLists.txt @@ -47,6 +47,7 @@ add_llvm_component_library(LLVMSYCLLowerIR RecordSYCLAspectNames.cpp CleanupSYCLMetadata.cpp CompileTimePropertiesPass.cpp + ComputeModuleRuntimeInfo.cpp DeviceGlobals.cpp ESIMD/LowerESIMDVLoadVStore.cpp ESIMD/LowerESIMDSlmReservation.cpp @@ -56,8 +57,11 @@ add_llvm_component_library(LLVMSYCLLowerIR LowerWGScope.cpp ModuleSplitter.cpp MutatePrintfAddrspace.cpp + SpecConstants.cpp SYCLAddOptLevelAttribute.cpp + SYCLDeviceLibReqMask.cpp SYCLDeviceRequirements.cpp + SYCLKernelParamOptInfo.cpp SYCLPropagateAspectsUsage.cpp SYCLPropagateJointMatrixUsage.cpp SYCLUtils.cpp diff --git a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp new file mode 100644 index 0000000000000..643f2605e270f --- /dev/null +++ b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp @@ -0,0 +1,379 @@ +//===--- ComputeModuleRuntimeInfo.cpp - compute runtime info for module ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// See comments in the header. +//===----------------------------------------------------------------------===// +#include "llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h" +#include "llvm/Demangle/Demangle.h" +#include "llvm/SYCLLowerIR/CompileTimePropertiesPass.h" +#include "llvm/SYCLLowerIR/DeviceGlobals.h" +#include "llvm/SYCLLowerIR/HostPipes.h" +#include "llvm/SYCLLowerIR/ModuleSplitter.h" +#include "llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h" +#include "llvm/SYCLLowerIR/SYCLKernelParamOptInfo.h" +#include "llvm/SYCLLowerIR/SYCLUtils.h" +#include "llvm/SYCLLowerIR/SpecConstants.h" +#include +#include +#ifndef NDEBUG +constexpr int DebugModuleProps = 0; +#endif + +namespace llvm::sycl { +bool isModuleUsingAsan(const Module &M) { + NamedMDNode *MD = M.getNamedMetadata("device.sanitizer"); + if (MD == nullptr) + return false; + assert(MD->getNumOperands() != 0); + auto *MDVal = cast(MD->getOperand(0)->getOperand(0)); + return MDVal->getString() == "asan"; +} + +// This function traverses over reversed call graph by BFS algorithm. +// It means that an edge links some function @func with functions +// which contain call of function @func. It starts from +// @StartingFunction and lifts up until it reach all reachable functions, +// or it reaches some function containing "referenced-indirectly" attribute. +// If it reaches "referenced-indirectly" attribute than it returns an empty +// Optional. +// Otherwise, it returns an Optional containing a list of reached +// SPIR kernel function's names. +std::optional> +traverseCGToFindSPIRKernels(const Function *StartingFunction) { + std::queue FunctionsToVisit; + std::unordered_set VisitedFunctions; + FunctionsToVisit.push(StartingFunction); + std::vector KernelNames; + + while (!FunctionsToVisit.empty()) { + const Function *F = FunctionsToVisit.front(); + FunctionsToVisit.pop(); + + auto InsertionResult = VisitedFunctions.insert(F); + // It is possible that we insert some particular function several + // times in functionsToVisit queue. + if (!InsertionResult.second) + continue; + + for (const auto *U : F->users()) { + const CallInst *CI = dyn_cast(U); + if (!CI) + continue; + + const Function *ParentF = CI->getFunction(); + + if (VisitedFunctions.count(ParentF)) + continue; + + if (ParentF->hasFnAttribute("referenced-indirectly")) + return {}; + + if (ParentF->getCallingConv() == CallingConv::SPIR_KERNEL) + KernelNames.push_back(ParentF->getName()); + + FunctionsToVisit.push(ParentF); + } + } + + return {std::move(KernelNames)}; +} +std::vector getKernelNamesUsingAssert(const Module &M) { + auto *DevicelibAssertFailFunction = M.getFunction("__devicelib_assert_fail"); + if (!DevicelibAssertFailFunction) + return {}; + + auto TraverseResult = + traverseCGToFindSPIRKernels(DevicelibAssertFailFunction); + + if (TraverseResult.has_value()) + return std::move(*TraverseResult); + + // Here we reached "referenced-indirectly", so we need to find all kernels and + // return them. + std::vector SPIRKernelNames; + for (const Function &F : M) { + if (F.getCallingConv() == CallingConv::SPIR_KERNEL) + SPIRKernelNames.push_back(F.getName()); + } + + return SPIRKernelNames; +} + +// Gets reqd_work_group_size information for function Func. +std::vector getKernelReqdWorkGroupSizeMetadata(const Function &Func) { + MDNode *ReqdWorkGroupSizeMD = Func.getMetadata("reqd_work_group_size"); + if (!ReqdWorkGroupSizeMD) + return {}; + size_t NumOperands = ReqdWorkGroupSizeMD->getNumOperands(); + assert(NumOperands >= 1 && NumOperands <= 3 && + "reqd_work_group_size does not have between 1 and 3 operands."); + std::vector OutVals; + OutVals.reserve(NumOperands); + for (const MDOperand &MDOp : ReqdWorkGroupSizeMD->operands()) + OutVals.push_back(mdconst::extract(MDOp)->getZExtValue()); + return OutVals; +} +// Gets work_group_num_dim information for function Func, conviniently 0 if +// metadata is not present. +uint32_t getKernelWorkGroupNumDim(const Function &Func) { + MDNode *MaxDimMD = Func.getMetadata("work_group_num_dim"); + if (!MaxDimMD) + return 0; + assert(MaxDimMD->getNumOperands() == 1 && "Malformed node."); + return mdconst::extract(MaxDimMD->getOperand(0))->getZExtValue(); +} + +PropSetRegTy computeModuleProperties(const Module &M, + const EntryPointSet &EntryPoints, + const GlobalBinImageProps &GlobProps, + bool SpecConstsMet, + bool IsSpecConstantDefault) { + + PropSetRegTy PropSet; + { + uint32_t MRMask = getSYCLDeviceLibReqMask(M); + std::map RMEntry = {{"DeviceLibReqMask", MRMask}}; + PropSet.add(PropSetRegTy::SYCL_DEVICELIB_REQ_MASK, RMEntry); + } + { + PropSet.add(PropSetRegTy::SYCL_DEVICE_REQUIREMENTS, + computeDeviceRequirements(M, EntryPoints).asMap()); + } + if (SpecConstsMet) { + // extract spec constant maps per each module + SpecIDMapTy TmpSpecIDMap; + SpecConstantsPass::collectSpecConstantMetadata(M, TmpSpecIDMap); + PropSet.add(PropSetRegTy::SYCL_SPECIALIZATION_CONSTANTS, TmpSpecIDMap); + + // Add property with the default values of spec constants + std::vector DefaultValues; + SpecConstantsPass::collectSpecConstantDefaultValuesMetadata(M, + DefaultValues); + PropSet.add(PropSetRegTy::SYCL_SPEC_CONSTANTS_DEFAULT_VALUES, "all", + DefaultValues); + } + if (GlobProps.EmitKernelParamInfo) { + // extract kernel parameter optimization info per module + ModuleAnalysisManager MAM; + // Register required analysis + MAM.registerPass([&] { return PassInstrumentationAnalysis(); }); + // Register the payload analysis + + MAM.registerPass([&] { return SYCLKernelParamOptInfoAnalysis(); }); + SYCLKernelParamOptInfo PInfo = + MAM.getResult(const_cast(M)); + + // convert analysis results into properties and record them + llvm::util::PropertySet &Props = + PropSet[PropSetRegTy::SYCL_KERNEL_PARAM_OPT_INFO]; + + for (const auto &NameInfoPair : PInfo) { + const llvm::BitVector &Bits = NameInfoPair.second; + if (Bits.empty()) + continue; // Nothing to add + + const llvm::ArrayRef Arr = Bits.getData(); + const unsigned char *Data = + reinterpret_cast(Arr.begin()); + llvm::util::PropertyValue::SizeTy DataBitSize = Bits.size(); + Props.insert(std::make_pair( + NameInfoPair.first, llvm::util::PropertyValue(Data, DataBitSize))); + } + } + if (GlobProps.EmitExportedSymbols) { + // extract exported functions if any and save them into property set + for (const auto *F : EntryPoints) { + // TODO FIXME some of SYCL/ESIMD functions maybe marked with __regcall CC, + // so they won't make it into the export list. Should the check be + // F->getCallingConv() != CallingConv::SPIR_KERNEL? + if (F->getCallingConv() == CallingConv::SPIR_FUNC) { + PropSet.add(PropSetRegTy::SYCL_EXPORTED_SYMBOLS, F->getName(), + /*PropVal=*/true); + } + } + } + + if (GlobProps.EmitImportedSymbols) { + // record imported functions in the property set + for (const auto &F : M) { + if ( // A function that can be imported may still be defined in one split + // image. Only add import property if this is not the image where the + // function is defined. + F.isDeclaration() && module_split::canBeImportedFunction(F)) { + + // StripDeadPrototypes is called during module splitting + // cleanup. At this point all function decls should have uses. + assert(!F.use_empty() && "Function F has no uses"); + PropSet.add(PropSetRegTy::SYCL_IMPORTED_SYMBOLS, F.getName(), + /*PropVal=*/true); + } + } + } + + // Metadata names may be composite so we keep them alive until the + // properties have been written. + SmallVector MetadataNames; + + if (GlobProps.EmitProgramMetadata) { + // Add reqd_work_group_size and work_group_num_dim information to + // program metadata. + for (const Function &Func : M.functions()) { + std::vector KernelReqdWorkGroupSize = + getKernelReqdWorkGroupSizeMetadata(Func); + if (!KernelReqdWorkGroupSize.empty()) { + MetadataNames.push_back(Func.getName().str() + "@reqd_work_group_size"); + PropSet.add(PropSetRegTy::SYCL_PROGRAM_METADATA, MetadataNames.back(), + KernelReqdWorkGroupSize); + } + + uint32_t WorkGroupNumDim = getKernelWorkGroupNumDim(Func); + if (WorkGroupNumDim) { + MetadataNames.push_back(Func.getName().str() + "@work_group_num_dim"); + PropSet.add(PropSetRegTy::SYCL_PROGRAM_METADATA, MetadataNames.back(), + WorkGroupNumDim); + } + } + + // Add global_id_mapping information with mapping between device-global + // unique identifiers and the variable's name in the IR. + for (auto &GV : M.globals()) { + if (!isDeviceGlobalVariable(GV)) + continue; + + StringRef GlobalID = getGlobalVariableUniqueId(GV); + MetadataNames.push_back(GlobalID.str() + "@global_id_mapping"); + PropSet.add(PropSetRegTy::SYCL_PROGRAM_METADATA, MetadataNames.back(), + GV.getName()); + } + } + bool SeenESIMDFunction = false; + bool SeenSYCLFunction = false; + for (const auto &F : M) { + if (llvm::module_split::isESIMDFunction(F)) + SeenESIMDFunction = true; + else if (utils::isSYCLExternalFunction(&F) && + !F.getName().starts_with("__itt")) + SeenSYCLFunction = true; + } + if (SeenESIMDFunction && !SeenSYCLFunction) + PropSet.add(PropSetRegTy::SYCL_MISC_PROP, "isEsimdImage", true); + { + StringRef RegAllocModeAttr = "sycl-register-alloc-mode"; + uint32_t RegAllocModeVal; + + bool HasRegAllocMode = llvm::any_of(EntryPoints, [&](const Function *F) { + if (!F->hasFnAttribute(RegAllocModeAttr)) + return false; + const auto &Attr = F->getFnAttribute(RegAllocModeAttr); + RegAllocModeVal = getAttributeAsInteger(Attr); + return true; + }); + if (HasRegAllocMode) { + PropSet.add(PropSetRegTy::SYCL_MISC_PROP, RegAllocModeAttr, + RegAllocModeVal); + } + } + + { + StringRef GRFSizeAttr = "sycl-grf-size"; + uint32_t GRFSizeVal; + + bool HasGRFSize = llvm::any_of(EntryPoints, [&](const Function *F) { + if (!F->hasFnAttribute(GRFSizeAttr)) + return false; + const auto &Attr = F->getFnAttribute(GRFSizeAttr); + GRFSizeVal = getAttributeAsInteger(Attr); + return true; + }); + if (HasGRFSize) { + PropSet.add(PropSetRegTy::SYCL_MISC_PROP, GRFSizeAttr, GRFSizeVal); + } + } + + // FIXME: Remove 'if' below when possible + // GPU backend has a problem with accepting optimization level options in form + // described by Level Zero specification (-ze-opt-level=1) when 'invoke_simd' + // functionality is involved. JIT compilation results in the following error: + // error: VLD: Failed to compile SPIR-V with following error: + // invalid api option: -ze-opt-level=O1 + // -11 (PI_ERROR_BUILD_PROGRAM_FAILURE) + // 'if' below essentially preserves the behavior (presumably mistakenly) + // implemented in intel/llvm#8763: ignore 'optLevel' property for images which + // were produced my merge after ESIMD split + if (!SeenESIMDFunction || !SeenSYCLFunction) { + // Handle sycl-optlevel property + int OptLevel = -1; + for (const Function *F : EntryPoints) { + if (!F->hasFnAttribute(llvm::sycl::utils::ATTR_SYCL_OPTLEVEL)) + continue; + + // getAsInteger returns true on error + if (!F->getFnAttribute(llvm::sycl::utils::ATTR_SYCL_OPTLEVEL) + .getValueAsString() + .getAsInteger(10, OptLevel)) { + // It is expected that device-code split has separated kernels with + // different values of sycl-optlevel attribute. Therefore, it is enough + // to only look at the first function with such attribute to compute + // the property for the whole device image. + break; + } + } + + if (OptLevel != -1) + PropSet.add(PropSetRegTy::SYCL_MISC_PROP, "optLevel", OptLevel); + } + { + std::vector FuncNames = getKernelNamesUsingAssert(M); + for (const StringRef &FName : FuncNames) + PropSet.add(PropSetRegTy::SYCL_ASSERT_USED, FName, true); + } + + { + if (isModuleUsingAsan(M)) + PropSet.add(PropSetRegTy::SYCL_MISC_PROP, "asanUsed", true); + } + + if (GlobProps.EmitDeviceGlobalPropSet) { + // Extract device global maps per module + auto DevGlobalPropertyMap = collectDeviceGlobalProperties(M); + if (!DevGlobalPropertyMap.empty()) + PropSet.add(PropSetRegTy::SYCL_DEVICE_GLOBALS, DevGlobalPropertyMap); + } + + auto HostPipePropertyMap = collectHostPipeProperties(M); + if (!HostPipePropertyMap.empty()) { + PropSet.add(PropSetRegTy::SYCL_HOST_PIPES, HostPipePropertyMap); + } + + if (IsSpecConstantDefault) + PropSet.add(PropSetRegTy::SYCL_MISC_PROP, "specConstsReplacedWithDefault", + 1); + + return PropSet; +} +std::string computeModuleSymbolTable(const Module &M, + const EntryPointSet &EntryPoints) { + +#ifndef NDEBUG + if (DebugModuleProps > 0) { + llvm::errs() << "ENTRY POINTS saving Sym table {\n"; + for (const auto *F : EntryPoints) { + llvm::errs() << " " << F->getName() << "\n"; + } + llvm::errs() << "}\n"; + } +#endif // NDEBUG + // Concatenate names of the input entry points with "\n". + std::string SymT; + + for (const auto *F : EntryPoints) { + SymT = (Twine(SymT) + Twine(F->getName()) + Twine("\n")).str(); + } + return SymT; +} + +} // namespace llvm::sycl diff --git a/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp b/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp index ddac91c28875c..0c024610c8235 100644 --- a/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp +++ b/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp @@ -135,10 +135,6 @@ bool isEntryPoint(const Function &F, bool EmitOnlyKernelsAsEntryPoints) { return false; } -bool isESIMDFunction(const Function &F) { - return F.getMetadata(ESIMD_MARKER_MD) != nullptr; -} - // Represents "dependency" or "use" graph of global objects (functions and // global variables) in a module. It is used during device code split to // understand which global variables and functions (other than entry points) @@ -444,6 +440,10 @@ class ModuleSplitter : public ModuleSplitterBase { namespace llvm { namespace module_split { +bool isESIMDFunction(const Function &F) { + return F.getMetadata(ESIMD_MARKER_MD) != nullptr; +} + cl::OptionCategory &getModuleSplitCategory() { static cl::OptionCategory ModuleSplitCategory{"Module Split options"}; return ModuleSplitCategory; diff --git a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp b/llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp similarity index 99% rename from llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp rename to llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp index 1554e81751668..5f270baecec1d 100644 --- a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp +++ b/llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp @@ -14,9 +14,9 @@ // SYCL runtime later. //===----------------------------------------------------------------------===// -#include "SYCLDeviceLibReqMask.h" -#include "llvm/TargetParser/Triple.h" +#include "llvm/SYCLLowerIR/SYCLDeviceLibReqMask.h" #include "llvm/IR/Module.h" +#include "llvm/TargetParser/Triple.h" #include #include diff --git a/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp b/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp index 60424c04027fa..cbc6ec2f847d9 100644 --- a/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp +++ b/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp @@ -37,11 +37,12 @@ static llvm::StringRef ExtractStringFromMDNodeOperand(const MDNode *N, } SYCLDeviceRequirements -llvm::computeDeviceRequirements(const module_split::ModuleDesc &MD) { +llvm::computeDeviceRequirements(const Module &M, + const SetVector &EntryPoints) { SYCLDeviceRequirements Reqs; bool MultipleReqdWGSize = false; // Process all functions in the module - for (const Function &F : MD.getModule()) { + for (const Function &F : M) { if (auto *MDN = F.getMetadata("sycl_used_aspects")) { for (size_t I = 0, E = MDN->getNumOperands(); I < E; ++I) { StringRef AspectName = ""; @@ -98,7 +99,7 @@ llvm::computeDeviceRequirements(const module_split::ModuleDesc &MD) { } // Process just the entry points in the module - for (const Function *F : MD.entries()) { + for (const Function *F : EntryPoints) { if (auto *MDN = F->getMetadata("intel_reqd_sub_group_size")) { // There should only be at most one function with // intel_reqd_sub_group_size metadata when considering the entry diff --git a/llvm/tools/sycl-post-link/SYCLKernelParamOptInfo.cpp b/llvm/lib/SYCLLowerIR/SYCLKernelParamOptInfo.cpp similarity index 96% rename from llvm/tools/sycl-post-link/SYCLKernelParamOptInfo.cpp rename to llvm/lib/SYCLLowerIR/SYCLKernelParamOptInfo.cpp index 5a1c6f4e3e03b..427c4b6c47b5d 100644 --- a/llvm/tools/sycl-post-link/SYCLKernelParamOptInfo.cpp +++ b/llvm/lib/SYCLLowerIR/SYCLKernelParamOptInfo.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "SYCLKernelParamOptInfo.h" +#include "llvm/SYCLLowerIR/SYCLKernelParamOptInfo.h" #include "llvm/IR/Constants.h" #include "llvm/Support/Casting.h" diff --git a/llvm/tools/sycl-post-link/SpecConstants.cpp b/llvm/lib/SYCLLowerIR/SpecConstants.cpp similarity index 99% rename from llvm/tools/sycl-post-link/SpecConstants.cpp rename to llvm/lib/SYCLLowerIR/SpecConstants.cpp index 3ef8d15d338bd..58f5a0d54b26e 100644 --- a/llvm/tools/sycl-post-link/SpecConstants.cpp +++ b/llvm/lib/SYCLLowerIR/SpecConstants.cpp @@ -8,8 +8,8 @@ // See comments in the header. //===----------------------------------------------------------------------===// -#include "SpecConstants.h" -#include "Support.h" +#include "llvm/SYCLLowerIR/SpecConstants.h" +#include "llvm/SYCLLowerIR/Support.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/StringMap.h" diff --git a/llvm/test/tools/sycl-post-link/assert/indirect-with-split-2.ll b/llvm/test/tools/sycl-post-link/assert/indirect-with-split-2.ll index 65e1420d81356..e7011ae6de141 100644 --- a/llvm/test/tools/sycl-post-link/assert/indirect-with-split-2.ll +++ b/llvm/test/tools/sycl-post-link/assert/indirect-with-split-2.ll @@ -8,7 +8,7 @@ ; __devicelib_assert_fail, then all kernels in the module are conservatively ; marked as using asserts. -; RUN: sycl-post-link -split=auto -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=auto -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.prop -check-prefixes=CHECK,CHECK0 \ ; RUN: --implicit-check-not TU1 ; RUN: FileCheck %s -input-file=%t_1.prop -check-prefixes=CHECK,CHECK1 \ diff --git a/llvm/test/tools/sycl-post-link/assert/indirect-with-split.ll b/llvm/test/tools/sycl-post-link/assert/indirect-with-split.ll index e7ba6c43bb240..639dd73359c1b 100644 --- a/llvm/test/tools/sycl-post-link/assert/indirect-with-split.ll +++ b/llvm/test/tools/sycl-post-link/assert/indirect-with-split.ll @@ -6,7 +6,7 @@ ; __devicelib_assert_fail, then all kernels in the module are conservatively ; marked as using asserts. -; RUN: sycl-post-link -split=auto -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=auto -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.prop --check-prefixes=CHECK,CHECK1 \ ; RUN: --implicit-check-not TU0 ; RUN: FileCheck %s -input-file=%t_1.prop --check-prefixes=CHECK,CHECK0 \ diff --git a/llvm/test/tools/sycl-post-link/assert/property-1.ll b/llvm/test/tools/sycl-post-link/assert/property-1.ll index 81e7c674187be..2df5e11dbbb07 100644 --- a/llvm/test/tools/sycl-post-link/assert/property-1.ll +++ b/llvm/test/tools/sycl-post-link/assert/property-1.ll @@ -2,16 +2,16 @@ ; property - it should include only kernels that call assertions in their call ; graph. -; RUN: sycl-post-link -split=auto -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=auto -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.prop --implicit-check-not TheKernel2 ; -; RUN: sycl-post-link -split=source -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=source -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.prop --implicit-check-not TheKernel2 ; -; RUN: sycl-post-link -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.prop --implicit-check-not TheKernel2 ; -; RUN: sycl-post-link -split=kernel -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=kernel -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.prop --check-prefixes=CHECK-K3 ; RUN: FileCheck %s -input-file=%t_1.prop --check-prefixes=CHECK-K1 ; RUN: FileCheck %s -input-file=%t_2.prop --check-prefixes=CHECK-K2 diff --git a/llvm/test/tools/sycl-post-link/assert/property-2.ll b/llvm/test/tools/sycl-post-link/assert/property-2.ll index 6f4bea447ab89..4fafcde79e829 100644 --- a/llvm/test/tools/sycl-post-link/assert/property-2.ll +++ b/llvm/test/tools/sycl-post-link/assert/property-2.ll @@ -2,7 +2,7 @@ ; property - it should include only kernels that call assertions in their call ; graph. -; RUN: sycl-post-link -split=auto -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=auto -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.prop -check-prefix=PRESENCE-CHECK ; RUN: FileCheck %s -input-file=%t_0.prop -check-prefix=ABSENCE-CHECK diff --git a/llvm/test/tools/sycl-post-link/device-code-split/auto-module-split-1.ll b/llvm/test/tools/sycl-post-link/device-code-split/auto-module-split-1.ll index 0583cfde3af23..bbcde38cbadcb 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/auto-module-split-1.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/auto-module-split-1.ll @@ -1,4 +1,4 @@ -; RUN: sycl-post-link -split=auto -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=auto -symbols -S < %s -o %t.table ; By default auto mode is equal to source mode ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-TU0,CHECK ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-TU1,CHECK diff --git a/llvm/test/tools/sycl-post-link/device-code-split/auto-module-split-2.ll b/llvm/test/tools/sycl-post-link/device-code-split/auto-module-split-2.ll index 4ff2095f42bbb..0f7cf05ae86d5 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/auto-module-split-2.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/auto-module-split-2.ll @@ -1,4 +1,4 @@ -; RUN: sycl-post-link -split=auto -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=auto -symbols -S < %s -o %t.table ; ; This is the same as auto-module-split-1 test with the only difference is that ; @_Z3foov is marked with "referenced-indirectly" attribute. diff --git a/llvm/test/tools/sycl-post-link/device-code-split/auto-module-split-3.ll b/llvm/test/tools/sycl-post-link/device-code-split/auto-module-split-3.ll index a5c62a5912338..4dfbc4bfbd163 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/auto-module-split-3.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/auto-module-split-3.ll @@ -1,4 +1,4 @@ -; RUN: sycl-post-link -split=auto -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=auto -symbols -S < %s -o %t.table ; ; In precense of indirect calls we start matching functions using their ; signatures, i.e. we have an indirect call to i32(i32) function within diff --git a/llvm/test/tools/sycl-post-link/device-code-split/auto-module-split-func-ptr.ll b/llvm/test/tools/sycl-post-link/device-code-split/auto-module-split-func-ptr.ll index 730d9a5cd8efc..d36de8d1d3ff0 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/auto-module-split-func-ptr.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/auto-module-split-func-ptr.ll @@ -1,4 +1,4 @@ -; RUN: sycl-post-link -split=auto -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=auto -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.sym --check-prefix=CHECK-SYM0 ; RUN: FileCheck %s -input-file=%t_1.sym --check-prefix=CHECK-SYM1 ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefix=CHECK-IR0 diff --git a/llvm/test/tools/sycl-post-link/device-code-split/basic-module-split.ll b/llvm/test/tools/sycl-post-link/device-code-split/basic-module-split.ll index 48d58248d0095..f2898cbea387c 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/basic-module-split.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/basic-module-split.ll @@ -1,4 +1,4 @@ -; RUN: sycl-post-link -split=source -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=source -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-TU0,CHECK ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-TU1,CHECK ; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-TU0-TXT diff --git a/llvm/test/tools/sycl-post-link/device-code-split/complex-indirect-call-chain.ll b/llvm/test/tools/sycl-post-link/device-code-split/complex-indirect-call-chain.ll index 064471405a58d..d2e2084ca2b34 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/complex-indirect-call-chain.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/complex-indirect-call-chain.ll @@ -1,7 +1,7 @@ ; The idea of the test is to ensure that sycl-post-link can trace through more ; complex call stacks involving several nested indirect calls -; RUN: sycl-post-link -split=auto -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=auto -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefix CHECK0 \ ; RUN: --implicit-check-not @foo --implicit-check-not @kernel_A \ ; RUN: --implicit-check-not @kernel_B --implicit-check-not @baz @@ -23,7 +23,7 @@ ; RUN: --implicit-check-not @BAZ --implicit-check-not @kernel_B \ ; RUN: --implicit-check-not @kernel_C ; -; RUN: sycl-post-link -split=source -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=source -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefix CHECK0 \ ; RUN: --implicit-check-not @foo --implicit-check-not @kernel_A \ ; RUN: --implicit-check-not @kernel_B --implicit-check-not @baz @@ -45,7 +45,7 @@ ; RUN: --implicit-check-not @BAZ --implicit-check-not @kernel_B \ ; RUN: --implicit-check-not @kernel_C ; -; RUN: sycl-post-link -split=kernel -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=kernel -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefix CHECK0 \ ; RUN: --implicit-check-not @foo --implicit-check-not @kernel_A \ ; RUN: --implicit-check-not @kernel_B --implicit-check-not @baz diff --git a/llvm/test/tools/sycl-post-link/device-code-split/one-kernel-per-module.ll b/llvm/test/tools/sycl-post-link/device-code-split/one-kernel-per-module.ll index 0197a2edd4a1b..26081964dd2e7 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/one-kernel-per-module.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/one-kernel-per-module.ll @@ -1,4 +1,4 @@ -; RUN: sycl-post-link -split=kernel -symbols -S < %s -o %t.files.table +; RUN: sycl-post-link -properties -split=kernel -symbols -S < %s -o %t.files.table ; RUN: FileCheck %s -input-file=%t.files_0.ll --check-prefixes CHECK-MODULE0,CHECK ; RUN: FileCheck %s -input-file=%t.files_0.sym --check-prefixes CHECK-MODULE0-TXT ; RUN: FileCheck %s -input-file=%t.files_1.ll --check-prefixes CHECK-MODULE1,CHECK diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-1.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-1.ll index 51a2895f4d326..8c64029f5e1c2 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-1.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-1.ll @@ -7,7 +7,7 @@ ; that use aspects from kernels which doesn't use aspects regardless of device ; code split mode -; RUN: sycl-post-link -split=auto -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=auto -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-M0-IR \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M1-IR \ @@ -35,7 +35,7 @@ ; RUN: FileCheck %s -input-file=%t2_2.sym --check-prefixes CHECK-M2-SYMS \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 -; RUN: sycl-post-link -split=source -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=source -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-M0-IR \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M1-IR \ @@ -63,7 +63,7 @@ ; RUN: FileCheck %s -input-file=%t2_2.sym --check-prefixes CHECK-M2-SYMS \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 -; RUN: sycl-post-link -split=kernel -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=kernel -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-M0-IR \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M1-IR \ diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-2.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-2.ll index f4d66822b261c..4962b19721790 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-2.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-2.ll @@ -1,7 +1,7 @@ ; The test is intended to check that sycl-post-link correctly groups kernels ; by unique sets of aspects used in them -; RUN: sycl-post-link -split=auto -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=auto -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t.table --check-prefix CHECK-TABLE ; ; RUN: FileCheck %s -input-file=%t_0.sym --check-prefix CHECK-M0-SYMS \ diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-3.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-3.ll index 523477a07573b..93dcf91b578ac 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-3.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-3.ll @@ -1,7 +1,7 @@ ; This test is intended to check that per-aspect device code split works as ; expected with SYCL_EXTERNAL functions -; RUN: sycl-post-link -split=auto -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=auto -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t.table --check-prefix CHECK-TABLE ; ; RUN: FileCheck %s -input-file=%t_0.sym --check-prefix CHECK-M0-SYMS \ diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-4.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-4.ll index 9655a72e140e6..e2d32bce59c5d 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-4.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-4.ll @@ -1,7 +1,7 @@ ; This test is intended to check that we do not perform per-aspect split if ; it was disabled through one or another sycl-post-link option -; RUN: sycl-post-link -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t.table --check-prefix CHECK-TABLE ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefix CHECK-IR ; diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-joint-matrix-1.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-joint-matrix-1.ll index 543a892415fa4..b7d0bcfceeccd 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/per-joint-matrix-1.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-joint-matrix-1.ll @@ -5,7 +5,7 @@ ; The test is intended to check that sycl-post-link correctly separates kernels ; that use different sycl_joint_matrix metadata -; RUN: sycl-post-link -split=auto -symbols -S %s -o %t.table +; RUN: sycl-post-link -properties -split=auto -symbols -S %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-IR-K3 \ ; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel2 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-IR-K1,CHECK-IR-K2 \ @@ -25,7 +25,7 @@ ; RUN: FileCheck %s -input-file=%t2_1.sym --check-prefixes CHECK-SYMS-K1,CHECK-SYMS-K2 \ ; RUN: --implicit-check-not Kernel3 -; RUN: sycl-post-link -split=source -symbols -S %s -o %t.table +; RUN: sycl-post-link -properties -split=source -symbols -S %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-IR-K3 \ ; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel2 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-IR-K1,CHECK-IR-K2 \ @@ -45,7 +45,7 @@ ; RUN: FileCheck %s -input-file=%t2_1.sym --check-prefixes CHECK-SYMS-K1,CHECK-SYMS-K2 \ ; RUN: --implicit-check-not Kernel3 -; RUN: sycl-post-link -split=kernel -symbols -S %s -o %t.table +; RUN: sycl-post-link -properties -split=kernel -symbols -S %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-IR-K3 \ ; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel2 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-IR-K2 \ diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-joint-matrix-2.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-joint-matrix-2.ll index 6c054fc579659..81fbd2a05df50 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/per-joint-matrix-2.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-joint-matrix-2.ll @@ -6,7 +6,7 @@ ; The test is intended to check that sycl-post-link correctly separates kernels ; that use different sycl_joint_matrix metadata and kernels without that metadata -; RUN: sycl-post-link -split=auto -symbols -S %s -o %t.table +; RUN: sycl-post-link -properties -split=auto -symbols -S %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-IR-K2 \ ; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel3 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-IR-K1,CHECK-IR-K3 \ @@ -26,7 +26,7 @@ ; RUN: FileCheck %s -input-file=%t2_1.sym --check-prefixes CHECK-SYMS-K1,CHECK-SYMS-K3 \ ; RUN: --implicit-check-not Kernel2 -; RUN: sycl-post-link -split=source -symbols -S %s -o %t.table +; RUN: sycl-post-link -properties -split=source -symbols -S %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-IR-K2 \ ; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel3 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-IR-K1,CHECK-IR-K3 \ @@ -46,7 +46,7 @@ ; RUN: FileCheck %s -input-file=%t2_1.sym --check-prefixes CHECK-SYMS-K1,CHECK-SYMS-K3 \ ; RUN: --implicit-check-not Kernel2 -; RUN: sycl-post-link -split=kernel -symbols -S %s -o %t.table +; RUN: sycl-post-link -properties -split=kernel -symbols -S %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-IR-K3 \ ; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel2 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-IR-K2 \ diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-joint-matrix-3.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-joint-matrix-3.ll index 9a365d17faedd..64180694fae72 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/per-joint-matrix-3.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-joint-matrix-3.ll @@ -1,7 +1,7 @@ ; This test is intended to check that we do not perform per-joint-matrix ; split if it was disabled through one or another sycl-post-link option -; RUN: sycl-post-link -symbols -S %s -o %t.table +; RUN: sycl-post-link -properties -symbols -S %s -o %t.table ; RUN: FileCheck %s -input-file=%t.table --check-prefix CHECK-TABLE ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefix CHECK-IR ; diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-joint-matrix-mad-1.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-joint-matrix-mad-1.ll index fd64b234b2c6f..132568926dbce 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/per-joint-matrix-mad-1.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-joint-matrix-mad-1.ll @@ -5,7 +5,7 @@ ; The test is intended to check that sycl-post-link correctly separates kernels ; that use different sycl_joint_matrix_mad metadata -; RUN: sycl-post-link -split=auto -symbols -S %s -o %t.table +; RUN: sycl-post-link -properties -split=auto -symbols -S %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-IR-K3 \ ; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel2 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-IR-K1,CHECK-IR-K2 \ @@ -25,7 +25,7 @@ ; RUN: FileCheck %s -input-file=%t2_1.sym --check-prefixes CHECK-SYMS-K1,CHECK-SYMS-K2 \ ; RUN: --implicit-check-not Kernel3 -; RUN: sycl-post-link -split=source -symbols -S %s -o %t.table +; RUN: sycl-post-link -properties -split=source -symbols -S %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-IR-K3 \ ; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel2 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-IR-K1,CHECK-IR-K2 \ @@ -45,7 +45,7 @@ ; RUN: FileCheck %s -input-file=%t2_1.sym --check-prefixes CHECK-SYMS-K1,CHECK-SYMS-K2 \ ; RUN: --implicit-check-not Kernel3 -; RUN: sycl-post-link -split=kernel -symbols -S %s -o %t.table +; RUN: sycl-post-link -properties -split=kernel -symbols -S %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-IR-K3 \ ; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel2 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-IR-K2 \ diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-joint-matrix-mad-2.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-joint-matrix-mad-2.ll index 4c4a4bc8a1a6e..ee8e9d1ead30a 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/per-joint-matrix-mad-2.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-joint-matrix-mad-2.ll @@ -6,7 +6,7 @@ ; The test is intended to check that sycl-post-link correctly separates kernels ; that use different sycl_joint_matrix_mad metadata and kernels without that metadata -; RUN: sycl-post-link -split=auto -symbols -S %s -o %t.table +; RUN: sycl-post-link -properties -split=auto -symbols -S %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-IR-K2 \ ; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel3 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-IR-K1,CHECK-IR-K3 \ @@ -26,7 +26,7 @@ ; RUN: FileCheck %s -input-file=%t2_1.sym --check-prefixes CHECK-SYMS-K1,CHECK-SYMS-K3 \ ; RUN: --implicit-check-not Kernel2 -; RUN: sycl-post-link -split=source -symbols -S %s -o %t.table +; RUN: sycl-post-link -properties -split=source -symbols -S %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-IR-K2 \ ; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel3 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-IR-K1,CHECK-IR-K3 \ @@ -46,7 +46,7 @@ ; RUN: FileCheck %s -input-file=%t2_1.sym --check-prefixes CHECK-SYMS-K1,CHECK-SYMS-K3 \ ; RUN: --implicit-check-not Kernel2 -; RUN: sycl-post-link -split=kernel -symbols -S %s -o %t.table +; RUN: sycl-post-link -properties -split=kernel -symbols -S %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-IR-K3 \ ; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel2 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-IR-K2 \ diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-joint-matrix-mad-3.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-joint-matrix-mad-3.ll index 251c10969c047..e3abc63e8c5ec 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/per-joint-matrix-mad-3.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-joint-matrix-mad-3.ll @@ -1,7 +1,7 @@ ; This test is intended to check that we do not perform per-joint-matrix-mad ; split if it was disabled through one or another sycl-post-link option -; RUN: sycl-post-link -symbols -S %s -o %t.table +; RUN: sycl-post-link -properties -symbols -S %s -o %t.table ; RUN: FileCheck %s -input-file=%t.table --check-prefix CHECK-TABLE ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefix CHECK-IR ; diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-joint-matrix-mad-4.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-joint-matrix-mad-4.ll index fe995542deba1..b94c42e1f4d4f 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/per-joint-matrix-mad-4.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-joint-matrix-mad-4.ll @@ -6,7 +6,7 @@ ; The test is intended to check that sycl-post-link correctly separates kernels ; that use different sycl_joint_matrix_mad metadata -; RUN: sycl-post-link -split=auto -symbols -S %s -o %t.table +; RUN: sycl-post-link -properties -split=auto -symbols -S %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-IR-K3 \ ; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel2 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-IR-K1,CHECK-IR-K2 \ @@ -26,7 +26,7 @@ ; RUN: FileCheck %s -input-file=%t2_1.sym --check-prefixes CHECK-SYMS-K1,CHECK-SYMS-K2 \ ; RUN: --implicit-check-not Kernel3 -; RUN: sycl-post-link -split=source -symbols -S %s -o %t.table +; RUN: sycl-post-link -properties -split=source -symbols -S %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-IR-K3 \ ; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel2 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-IR-K1,CHECK-IR-K2 \ @@ -46,7 +46,7 @@ ; RUN: FileCheck %s -input-file=%t2_1.sym --check-prefixes CHECK-SYMS-K1,CHECK-SYMS-K2 \ ; RUN: --implicit-check-not Kernel3 -; RUN: sycl-post-link -split=kernel -symbols -S %s -o %t.table +; RUN: sycl-post-link -properties -split=kernel -symbols -S %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-IR-K3 \ ; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel2 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-IR-K2 \ diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-joint-matrix-mad-5.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-joint-matrix-mad-5.ll index 25fd2e26f3ca4..0d9631af085d5 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/per-joint-matrix-mad-5.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-joint-matrix-mad-5.ll @@ -7,7 +7,7 @@ ; The test is intended to check that sycl-post-link correctly separates kernels ; that use different sycl_joint_matrix_mad metadata -; RUN: sycl-post-link -split=auto -symbols -S %s -o %t.table +; RUN: sycl-post-link -properties -split=auto -symbols -S %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-IR-K3,CHECK-IR-K5 \ ; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel2 \ ; RUN: --implicit-check-not Kernel4 --implicit-check-not Kernel6 @@ -59,7 +59,7 @@ ; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel2 \ ; RUN: --implicit-check-not Kernel3 --implicit-check-not Kernel5 --implicit-check-not Kernel6 -; RUN: sycl-post-link -split=source -symbols -S %s -o %t.table +; RUN: sycl-post-link -properties -split=source -symbols -S %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-IR-K3,CHECK-IR-K5 \ ; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel2 \ ; RUN: --implicit-check-not Kernel4 --implicit-check-not Kernel6 @@ -111,7 +111,7 @@ ; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel2 \ ; RUN: --implicit-check-not Kernel3 --implicit-check-not Kernel5 --implicit-check-not Kernel6 -; RUN: sycl-post-link -split=kernel -symbols -S %s -o %t.table +; RUN: sycl-post-link -properties -split=kernel -symbols -S %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-IR-K6 \ ; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel2 \ ; RUN: --implicit-check-not Kernel3 --implicit-check-not Kernel4 --implicit-check-not Kernel5 diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-sub-group-size-split-1.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-sub-group-size-split-1.ll index 393943b63db43..ca723e075d2c0 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-sub-group-size-split-1.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-sub-group-size-split-1.ll @@ -7,7 +7,7 @@ ; that use reqd_sub_group_size attributes from kernels which doesn't use them ; regardless of device code split mode -; RUN: sycl-post-link -split=auto -symbols -S %s -o %t.table +; RUN: sycl-post-link -properties -split=auto -symbols -S %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-M0-IR \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M1-IR \ @@ -35,7 +35,7 @@ ; RUN: FileCheck %s -input-file=%t2_2.sym --check-prefixes CHECK-M2-SYMS \ ; RUN: --implicit-check-not kernel1 --implicit-check-not kernel2 -; RUN: sycl-post-link -split=kernel -symbols -S %s -o %t.table +; RUN: sycl-post-link -properties -split=kernel -symbols -S %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-M0-IR \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M1-IR \ @@ -63,7 +63,7 @@ ; RUN: FileCheck %s -input-file=%t2_2.sym --check-prefixes CHECK-M2-SYMS \ ; RUN: --implicit-check-not kernel1 --implicit-check-not kernel2 -; RUN: sycl-post-link -split=source -symbols -S %s -o %t.table +; RUN: sycl-post-link -properties -split=source -symbols -S %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-M0-IR \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M1-IR \ diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-sub-group-size-split-2.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-sub-group-size-split-2.ll index 1efeb364cb2e3..a943450a2a459 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-sub-group-size-split-2.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-sub-group-size-split-2.ll @@ -1,7 +1,7 @@ ; The test is intended to check that sycl-post-link correctly groups kernels ; by unique reqd_sub_group_size values used in them -; RUN: sycl-post-link -split=auto -symbols -S %s -o %t.table +; RUN: sycl-post-link -properties -split=auto -symbols -S %s -o %t.table ; RUN: FileCheck %s -input-file=%t.table --check-prefix CHECK-TABLE ; ; RUN: FileCheck %s -input-file=%t_0.sym --check-prefix CHECK-M0-SYMS \ diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-1.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-1.ll index b156d71b1e3f6..4b105faa24073 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-1.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-1.ll @@ -7,7 +7,7 @@ ; that use reqd_work_group_size attributes from kernels which doesn't use them ; regardless of device code split mode -; RUN: sycl-post-link -split=auto -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=auto -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-M0-IR \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M1-IR \ @@ -35,7 +35,7 @@ ; RUN: FileCheck %s -input-file=%t2_2.sym --check-prefixes CHECK-M2-SYMS \ ; RUN: --implicit-check-not kernel1 --implicit-check-not kernel2 -; RUN: sycl-post-link -split=source -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=source -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-M0-IR \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M1-IR \ @@ -63,7 +63,7 @@ ; RUN: FileCheck %s -input-file=%t2_2.sym --check-prefixes CHECK-M2-SYMS \ ; RUN: --implicit-check-not kernel1 --implicit-check-not kernel2 -; RUN: sycl-post-link -split=kernel -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=kernel -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-M0-IR \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M1-IR \ diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-2.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-2.ll index c92ae8dbc9c03..d453ba763ec3c 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-2.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-2.ll @@ -1,7 +1,7 @@ ; The test is intended to check that sycl-post-link correctly groups kernels ; by unique reqd_work_group_size values used in them -; RUN: sycl-post-link -split=auto -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=auto -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t.table --check-prefix CHECK-TABLE ; ; RUN: FileCheck %s -input-file=%t_0.sym --check-prefix CHECK-M0-SYMS \ diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-3.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-3.ll index ae6d79a041175..905eb6801f041 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-3.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-reqd-wg-size-split-3.ll @@ -1,7 +1,7 @@ ; This test is intended to check that we do not perform per-reqd_work_group_size ; split if it was disabled through one or another sycl-post-link option -; RUN: sycl-post-link -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t.table --check-prefix CHECK-TABLE ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefix CHECK-IR ; diff --git a/llvm/test/tools/sycl-post-link/device-code-split/split-with-func-ptrs.ll b/llvm/test/tools/sycl-post-link/device-code-split/split-with-func-ptrs.ll index af920f0ab373d..148f8e8545f75 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/split-with-func-ptrs.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/split-with-func-ptrs.ll @@ -3,14 +3,14 @@ ; modules. ; -- Per-source split -; RUN: sycl-post-link -split=source -emit-param-info -symbols -emit-exported-symbols -split-esimd -lower-esimd -O2 -spec-const=native -S < %s -o %tA.table +; RUN: sycl-post-link -properties -split=source -emit-param-info -symbols -emit-exported-symbols -split-esimd -lower-esimd -O2 -spec-const=native -S < %s -o %tA.table ; RUN: FileCheck %s -input-file=%tA_0.ll --check-prefixes CHECK-A0 ; RUN: FileCheck %s -input-file=%tA_1.ll --check-prefixes CHECK-A1 ; -- No split -; RUN: sycl-post-link -emit-param-info -symbols -emit-exported-symbols -split-esimd -lower-esimd -O2 -spec-const=native -S < %s -o %tB.table +; RUN: sycl-post-link -properties -emit-param-info -symbols -emit-exported-symbols -split-esimd -lower-esimd -O2 -spec-const=native -S < %s -o %tB.table ; RUN: FileCheck %s -input-file=%tB_0.ll --check-prefixes CHECK-B0 ; -- Per-kernel split -; RUN: sycl-post-link -split=kernel -emit-param-info -symbols -emit-exported-symbols -split-esimd -lower-esimd -O2 -spec-const=native -S < %s -o %tC.table +; RUN: sycl-post-link -properties -split=kernel -emit-param-info -symbols -emit-exported-symbols -split-esimd -lower-esimd -O2 -spec-const=native -S < %s -o %tC.table ; RUN: FileCheck %s -input-file=%tC_0.ll --check-prefixes CHECK-C0 ; RUN: FileCheck %s -input-file=%tC_1.ll --check-prefixes CHECK-C1 diff --git a/llvm/test/tools/sycl-post-link/device-code-split/split-with-kernel-declarations.ll b/llvm/test/tools/sycl-post-link/device-code-split/split-with-kernel-declarations.ll index 82213e4b3beeb..42dd9e980fe82 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/split-with-kernel-declarations.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/split-with-kernel-declarations.ll @@ -1,7 +1,7 @@ ; Purpose of this test is to check that sycl-post-link does not treat ; declarations as entry points. -; RUN: sycl-post-link -split=source -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=source -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t.table --check-prefix CHECK-PER-SOURCE-TABLE ; RUN: FileCheck %s -input-file=%t_0.sym --check-prefix CHECK-PER-SOURCE-SYM0 ; RUN: FileCheck %s -input-file=%t_1.sym --check-prefix CHECK-PER-SOURCE-SYM1 @@ -11,7 +11,7 @@ ; RUN: FileCheck %s -input-file=%t1_0.sym --check-prefix CHECK-PER-SOURCE-SYM0 ; RUN: FileCheck %s -input-file=%t1_1.sym --check-prefix CHECK-PER-SOURCE-SYM1 ; -; RUN: sycl-post-link -split=kernel -symbols -S < %s -o %t2.table +; RUN: sycl-post-link -properties -split=kernel -symbols -S < %s -o %t2.table ; RUN: FileCheck %s -input-file=%t2.table --check-prefix CHECK-PER-KERNEL-TABLE ; RUN: FileCheck %s -input-file=%t2_0.sym --check-prefix CHECK-PER-KERNEL-SYM1 ; RUN: FileCheck %s -input-file=%t2_1.sym --check-prefix CHECK-PER-KERNEL-SYM2 diff --git a/llvm/test/tools/sycl-post-link/device-code-split/vtable.ll b/llvm/test/tools/sycl-post-link/device-code-split/vtable.ll index cb9fd1f77cf78..6d932aba577f9 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/vtable.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/vtable.ll @@ -39,7 +39,7 @@ ; variables storing vtable, are also included into the final module, even though ; they are not directly used in a kernel otherwise. ; -; RUN: sycl-post-link -split=auto -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=auto -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll ; ; RUN: sycl-module-split -split=auto -S < %s -o %t2 diff --git a/llvm/test/tools/sycl-post-link/device-globals/test_global_variable.ll b/llvm/test/tools/sycl-post-link/device-globals/test_global_variable.ll index a97d69ffbc7fe..cd07c948fb91d 100644 --- a/llvm/test/tools/sycl-post-link/device-globals/test_global_variable.ll +++ b/llvm/test/tools/sycl-post-link/device-globals/test_global_variable.ll @@ -1,4 +1,4 @@ -; RUN: sycl-post-link --device-globals -S < %s -o %t.files.table +; RUN: sycl-post-link -properties --device-globals -S < %s -o %t.files.table ; RUN: FileCheck %s -input-file=%t.files_0.prop --check-prefix CHECK-PROP source_filename = "test_global_variable.cpp" target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" diff --git a/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_drop_used.ll b/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_drop_used.ll index 986b8b1917cc7..2e2c6127083e1 100644 --- a/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_drop_used.ll +++ b/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_drop_used.ll @@ -1,4 +1,4 @@ -; RUN: sycl-post-link --device-globals -S < %s -o %t.files.table +; RUN: sycl-post-link -properties --device-globals -S < %s -o %t.files.table ; RUN: FileCheck %s -input-file=%t.files_0.ll --check-prefix CHECK-IR ; ; Test checks that llvm.compiler.used is removed when all values in it are diff --git a/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_drop_used_opaque_ptr.ll b/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_drop_used_opaque_ptr.ll index 4c786e0df9f11..80905a39b9518 100644 --- a/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_drop_used_opaque_ptr.ll +++ b/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_drop_used_opaque_ptr.ll @@ -1,4 +1,4 @@ -; RUN: sycl-post-link --device-globals -S < %s -o %t.files.table +; RUN: sycl-post-link -properties --device-globals -S < %s -o %t.files.table ; RUN: FileCheck %s -input-file=%t.files_0.ll --check-prefix CHECK-IR ; ; Test checks that llvm.compiler.used is removed when all values in it are diff --git a/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_kernels_in_one_module.ll b/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_kernels_in_one_module.ll index 03ccc2f5f696b..6ce11b4dcdf52 100644 --- a/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_kernels_in_one_module.ll +++ b/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_kernels_in_one_module.ll @@ -1,4 +1,4 @@ -; RUN: sycl-post-link --device-globals --split=source -S < %s -o %t.files.table +; RUN: sycl-post-link -properties --device-globals --split=source -S < %s -o %t.files.table ; RUN: FileCheck %s -input-file=%t.files_0.ll --check-prefix CHECK-MOD1 ; RUN: FileCheck %s -input-file=%t.files_1.ll --check-prefix CHECK-MOD0 diff --git a/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_modules_no_dev_global.ll b/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_modules_no_dev_global.ll index 3b94a48cb3e90..406bf7edbf2b3 100644 --- a/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_modules_no_dev_global.ll +++ b/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_modules_no_dev_global.ll @@ -1,4 +1,4 @@ -; RUN: sycl-post-link --device-globals --split=source -S < %s -o %t.files.table +; RUN: sycl-post-link -properties --device-globals --split=source -S < %s -o %t.files.table ; RUN: FileCheck %s -input-file=%t.files_0.ll --check-prefix CHECK-MOD2 ; RUN: FileCheck %s -input-file=%t.files_1.ll --check-prefix CHECK-MOD0 ; RUN: FileCheck %s -input-file=%t.files_2.ll --check-prefix CHECK-MOD1 diff --git a/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_modules_no_dev_img_scope.ll b/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_modules_no_dev_img_scope.ll index 847ecbc2b102d..9934318e77793 100644 --- a/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_modules_no_dev_img_scope.ll +++ b/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_modules_no_dev_img_scope.ll @@ -1,4 +1,4 @@ -; RUN: sycl-post-link --device-globals --split=source -S < %s -o %t.files.table +; RUN: sycl-post-link -properties --device-globals --split=source -S < %s -o %t.files.table ; RUN: FileCheck %s -input-file=%t.files_0.ll --check-prefix CHECK-MOD2 ; RUN: FileCheck %s -input-file=%t.files_1.ll --check-prefix CHECK-MOD0 ; RUN: FileCheck %s -input-file=%t.files_2.ll --check-prefix CHECK-MOD1 diff --git a/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_modules_two_vars_ok.ll b/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_modules_two_vars_ok.ll index 9afd208726d79..e5d1b352ffca8 100644 --- a/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_modules_two_vars_ok.ll +++ b/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_many_modules_two_vars_ok.ll @@ -1,4 +1,4 @@ -; RUN: sycl-post-link --device-globals --split=source -S < %s -o %t.files.table +; RUN: sycl-post-link -properties --device-globals --split=source -S < %s -o %t.files.table ; RUN: FileCheck %s -input-file=%t.files_1.ll --check-prefix CHECK-MOD0 ; RUN: FileCheck %s -input-file=%t.files_2.ll --check-prefix CHECK-MOD1 diff --git a/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_name_mapping_metadata.ll b/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_name_mapping_metadata.ll index a7546dafef2c8..f3d8c6e0bff07 100644 --- a/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_name_mapping_metadata.ll +++ b/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_name_mapping_metadata.ll @@ -1,4 +1,4 @@ -; RUN: sycl-post-link --device-globals --emit-program-metadata -S < %s -o %t.files.table +; RUN: sycl-post-link -properties --device-globals --emit-program-metadata -S < %s -o %t.files.table ; RUN: FileCheck %s -input-file=%t.files_0.prop --check-prefix CHECK-PROP ; This test is intended to check that the global_id_mapping program metadata properties are diff --git a/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_trim_used.ll b/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_trim_used.ll index 6f17f406e5e01..b4d3bf3f557e1 100644 --- a/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_trim_used.ll +++ b/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_trim_used.ll @@ -1,4 +1,4 @@ -; RUN: sycl-post-link --device-globals -S < %s -o %t.files.table +; RUN: sycl-post-link -properties --device-globals -S < %s -o %t.files.table ; RUN: FileCheck %s -input-file=%t.files_0.ll --check-prefix CHECK-IR ; ; Test checks that all device_global variables in llvm.compiler.used are removed diff --git a/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_trim_used_opaque_ptr.ll b/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_trim_used_opaque_ptr.ll index 98668ef83369e..771f8ab1bada8 100644 --- a/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_trim_used_opaque_ptr.ll +++ b/llvm/test/tools/sycl-post-link/device-globals/test_global_variable_trim_used_opaque_ptr.ll @@ -1,4 +1,4 @@ -; RUN: sycl-post-link --device-globals -S < %s -o %t.files.table +; RUN: sycl-post-link -properties --device-globals -S < %s -o %t.files.table ; RUN: FileCheck %s -input-file=%t.files_0.ll --check-prefix CHECK-IR ; ; Test checks that all device_global variables in llvm.compiler.used are removed diff --git a/llvm/test/tools/sycl-post-link/device-globals/test_no_property_set_header_for_an_empty_set.ll b/llvm/test/tools/sycl-post-link/device-globals/test_no_property_set_header_for_an_empty_set.ll index 06128c4210544..3d00379d2259b 100644 --- a/llvm/test/tools/sycl-post-link/device-globals/test_no_property_set_header_for_an_empty_set.ll +++ b/llvm/test/tools/sycl-post-link/device-globals/test_no_property_set_header_for_an_empty_set.ll @@ -1,4 +1,4 @@ -; RUN: sycl-post-link --device-globals -S < %s -o %t.files.table +; RUN: sycl-post-link -properties --device-globals -S < %s -o %t.files.table ; RUN: FileCheck %s -input-file=%t.files_0.prop --check-prefix CHECK-PROP ; This test is intended to check that sycl-post-link doesn't add the header for diff --git a/llvm/test/tools/sycl-post-link/device-requirements/aspects.ll b/llvm/test/tools/sycl-post-link/device-requirements/aspects.ll index 98e92650e516f..eb9f253df0b7a 100644 --- a/llvm/test/tools/sycl-post-link/device-requirements/aspects.ll +++ b/llvm/test/tools/sycl-post-link/device-requirements/aspects.ll @@ -13,10 +13,10 @@ ; }); ; } -; RUN: sycl-post-link -split=auto < %s -o %t.files.table +; RUN: sycl-post-link -properties -split=auto < %s -o %t.files.table ; RUN: FileCheck %s -input-file=%t.files_0.prop --check-prefix CHECK-PROP-AUTO-SPLIT -; RUN: sycl-post-link -split=kernel < %s -o %t.files.table +; RUN: sycl-post-link -properties -split=kernel < %s -o %t.files.table ; RUN: FileCheck %s -input-file=%t.files_0.prop --check-prefix CHECK-PROP-KERNEL-SPLIT-1 ; RUN: FileCheck %s -input-file=%t.files_1.prop --check-prefix CHECK-PROP-KERNEL-SPLIT-0 diff --git a/llvm/test/tools/sycl-post-link/device-requirements/fixed-target.ll b/llvm/test/tools/sycl-post-link/device-requirements/fixed-target.ll index a1b09c7b64a62..c7b7395ad0bd7 100644 --- a/llvm/test/tools/sycl-post-link/device-requirements/fixed-target.ll +++ b/llvm/test/tools/sycl-post-link/device-requirements/fixed-target.ll @@ -1,4 +1,4 @@ -; RUN: sycl-post-link -split=auto < %s -o %t.files.table +; RUN: sycl-post-link -properties -split=auto < %s -o %t.files.table ; RUN: FileCheck %s -input-file=%t.files_0.prop ; CHECK: [SYCL/device requirements] diff --git a/llvm/test/tools/sycl-post-link/device-requirements/joint-matrix.ll b/llvm/test/tools/sycl-post-link/device-requirements/joint-matrix.ll index a710c0a9233e0..56e47665638f4 100644 --- a/llvm/test/tools/sycl-post-link/device-requirements/joint-matrix.ll +++ b/llvm/test/tools/sycl-post-link/device-requirements/joint-matrix.ll @@ -21,7 +21,7 @@ ; return 0; ; } -; RUN: sycl-post-link -split=kernel %s -o %t.table +; RUN: sycl-post-link -properties -split=kernel %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.prop --check-prefix CHECK-PROP-KERNEL-SPLIT-0 ; RUN: FileCheck %s -input-file=%t_1.prop --check-prefix CHECK-PROP-KERNEL-SPLIT-1 diff --git a/llvm/test/tools/sycl-post-link/device-requirements/reqd-sub-group-size.ll b/llvm/test/tools/sycl-post-link/device-requirements/reqd-sub-group-size.ll index df4d5682c0623..1f3a51849d1ab 100644 --- a/llvm/test/tools/sycl-post-link/device-requirements/reqd-sub-group-size.ll +++ b/llvm/test/tools/sycl-post-link/device-requirements/reqd-sub-group-size.ll @@ -22,7 +22,7 @@ ; return 0; ; } -; RUN: sycl-post-link -split=auto %s -o %t.table +; RUN: sycl-post-link -properties -split=auto %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.prop --check-prefix CHECK-PROP-AUTO-SPLIT-0 ; RUN: FileCheck %s -input-file=%t_1.prop --check-prefix CHECK-PROP-AUTO-SPLIT-1 diff --git a/llvm/test/tools/sycl-post-link/device-requirements/reqd-work-group-size.ll b/llvm/test/tools/sycl-post-link/device-requirements/reqd-work-group-size.ll index 4a440332a3634..da6ffc46e49b8 100644 --- a/llvm/test/tools/sycl-post-link/device-requirements/reqd-work-group-size.ll +++ b/llvm/test/tools/sycl-post-link/device-requirements/reqd-work-group-size.ll @@ -21,7 +21,7 @@ ; return 0; ; } -; RUN: sycl-post-link -split=auto < %s -o %t.table +; RUN: sycl-post-link -properties -split=auto < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.prop --check-prefix CHECK-PROP-AUTO-SPLIT-0 ; RUN: FileCheck %s -input-file=%t_1.prop --check-prefix CHECK-PROP-AUTO-SPLIT-1 diff --git a/llvm/test/tools/sycl-post-link/device-sanitizer/asan.ll b/llvm/test/tools/sycl-post-link/device-sanitizer/asan.ll index 002b14076dec2..c829f2dca9120 100644 --- a/llvm/test/tools/sycl-post-link/device-sanitizer/asan.ll +++ b/llvm/test/tools/sycl-post-link/device-sanitizer/asan.ll @@ -1,7 +1,7 @@ ; This test checks that the post-link tool properly generates "asanUsed=1" ; in [SYCL/misc properties] -; RUN: sycl-post-link -split=kernel -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=kernel -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.prop ; CHECK: [SYCL/misc properties] ; CHECK: asanUsed=1 diff --git a/llvm/test/tools/sycl-post-link/emit_exported_symbols.ll b/llvm/test/tools/sycl-post-link/emit_exported_symbols.ll index 49a94eefa2575..c6705ea972288 100644 --- a/llvm/test/tools/sycl-post-link/emit_exported_symbols.ll +++ b/llvm/test/tools/sycl-post-link/emit_exported_symbols.ll @@ -1,17 +1,17 @@ ; This test checks that the post-link tool generates list of exported symbols. ; ; Global scope -; RUN: sycl-post-link -symbols -emit-exported-symbols -S < %s -o %t.global.files.table +; RUN: sycl-post-link -properties -symbols -emit-exported-symbols -S < %s -o %t.global.files.table ; RUN: FileCheck %s -input-file=%t.global.files_0.prop --implicit-check-not="NotExported" --check-prefix=CHECK-GLOBAL-PROP ; ; Per-module split -; RUN: sycl-post-link -symbols -split=source -emit-exported-symbols -S < %s -o %t.per_module.files.table +; RUN: sycl-post-link -properties -symbols -split=source -emit-exported-symbols -S < %s -o %t.per_module.files.table ; RUN: FileCheck %s -input-file=%t.per_module.files_0.prop -implicit-check-not="NotExported" --check-prefix=CHECK-KERNELONLY-PROP ; RUN: FileCheck %s -input-file=%t.per_module.files_1.prop -implicit-check-not="NotExported" --check-prefix=CHECK-PERMODULE-0-PROP ; RUN: FileCheck %s -input-file=%t.per_module.files_2.prop -implicit-check-not="NotExported" --check-prefix=CHECK-PERMODULE-2-PROP ; ; Per-kernel split -; RUN: sycl-post-link -symbols -split=kernel -emit-exported-symbols -S < %s -o %t.per_kernel.files.table +; RUN: sycl-post-link -properties -symbols -split=kernel -emit-exported-symbols -S < %s -o %t.per_kernel.files.table ; RUN: FileCheck %s -input-file=%t.per_kernel.files_0.prop --implicit-check-not="NotExported" --check-prefix=CHECK-KERNELONLY-PROP ; RUN: FileCheck %s -input-file=%t.per_kernel.files_1.prop --implicit-check-not="NotExported" --check-prefix=CHECK-KERNELONLY-PROP ; RUN: FileCheck %s -input-file=%t.per_kernel.files_2.prop --implicit-check-not="NotExported" --check-prefix=CHECK-PERKERNEL-0-PROP diff --git a/llvm/test/tools/sycl-post-link/emit_imported_symbols.ll b/llvm/test/tools/sycl-post-link/emit_imported_symbols.ll index ae824d293b9ea..bade08d34147e 100644 --- a/llvm/test/tools/sycl-post-link/emit_imported_symbols.ll +++ b/llvm/test/tools/sycl-post-link/emit_imported_symbols.ll @@ -6,7 +6,7 @@ ; Test with -split=kernel ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; RUN: sycl-post-link -symbols -emit-imported-symbols -split=kernel -S < %s -o %t_kernel.table +; RUN: sycl-post-link -properties -symbols -emit-imported-symbols -split=kernel -S < %s -o %t_kernel.table ; RUN: FileCheck %s -input-file=%t_kernel_0.sym --check-prefixes CHECK-KERNEL-SYM-0 ; RUN: FileCheck %s -input-file=%t_kernel_1.sym --check-prefixes CHECK-KERNEL-SYM-1 @@ -41,11 +41,11 @@ ; Test with -split=source ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; RUN: sycl-post-link -symbols -emit-imported-symbols -split=source -S < %s -o %t_source.table +; RUN: sycl-post-link -properties -symbols -emit-imported-symbols -split=source -S < %s -o %t_source.table ; RUN: FileCheck %s -input-file=%t_source_0.sym --check-prefixes CHECK-SOURCE-SYM-0 ; RUN: FileCheck %s -input-file=%t_source_0.prop --check-prefixes CHECK-SOURCE-IMPORTED-SYM-0 -; RUN: sycl-post-link -symbols -emit-imported-symbols -split=source -S < %s -o %t_source.table -O0 +; RUN: sycl-post-link -properties -symbols -emit-imported-symbols -split=source -S < %s -o %t_source.table -O0 ; RUN: FileCheck %s -input-file=%t_source_0.sym --check-prefixes CHECK-SOURCE-SYM-0 ; RUN: FileCheck %s -input-file=%t_source_0.prop --check-prefixes CHECK-SOURCE-IMPORTED-SYM-0 diff --git a/llvm/test/tools/sycl-post-link/emit_program_metadata.ll b/llvm/test/tools/sycl-post-link/emit_program_metadata.ll index 4dad16cacf80f..80dcdc74a2619 100644 --- a/llvm/test/tools/sycl-post-link/emit_program_metadata.ll +++ b/llvm/test/tools/sycl-post-link/emit_program_metadata.ll @@ -1,6 +1,6 @@ ; This test checks that the post-link tool generates SYCL program metadata. ; -; RUN: sycl-post-link -emit-program-metadata -device-globals -S < %s -o %t.files.table +; RUN: sycl-post-link -properties -emit-program-metadata -device-globals -S < %s -o %t.files.table ; RUN: FileCheck %s -input-file=%t.files.table --check-prefixes CHECK-TABLE ; RUN: FileCheck %s -input-file=%t.files_0.prop --match-full-lines --check-prefixes CHECK-PROP diff --git a/llvm/test/tools/sycl-post-link/erase_used.ll b/llvm/test/tools/sycl-post-link/erase_used.ll index c8827bf7c70da..37aa550796f54 100644 --- a/llvm/test/tools/sycl-post-link/erase_used.ll +++ b/llvm/test/tools/sycl-post-link/erase_used.ll @@ -2,11 +2,11 @@ ; the output modules when splitting modules, creating a single row table, ; and outputing IR only ; -; RUN: sycl-post-link -split=kernel -S < %s -o %t.files.table +; RUN: sycl-post-link -properties -split=kernel -S < %s -o %t.files.table ; RUN: FileCheck %s -input-file=%t.files_0.ll ; RUN: FileCheck %s -input-file=%t.files_1.ll ; -; RUN: sycl-post-link -S -split=auto -symbols -split-esimd -lower-esimd -O2 -spec-const=emulation < %s -o %t.out.table +; RUN: sycl-post-link -properties -S -split=auto -symbols -split-esimd -lower-esimd -O2 -spec-const=emulation < %s -o %t.out.table ; RUN: FileCheck %s --input-file=%t.out_0.ll ; ; RUN: sycl-post-link -S -split=auto -ir-output-only < %s -o %t.out_ir_only.ll diff --git a/llvm/test/tools/sycl-post-link/erase_used_decl.ll b/llvm/test/tools/sycl-post-link/erase_used_decl.ll index 00e57c40da3b4..60eadd2a03c25 100644 --- a/llvm/test/tools/sycl-post-link/erase_used_decl.ll +++ b/llvm/test/tools/sycl-post-link/erase_used_decl.ll @@ -1,7 +1,7 @@ ; This test checks that the post-link tool doesn't incorrectly remove function ; declarations which are still in use while erasing the "llvm.used" global. ; -; RUN: sycl-post-link -split=auto -symbols -S < %s -o %t.files.table +; RUN: sycl-post-link -properties -split=auto -symbols -S < %s -o %t.files.table ; RUN: FileCheck %s -input-file=%t.files_0.ll ; target triple = "spir64-unknown-unknown" diff --git a/llvm/test/tools/sycl-post-link/erase_used_decl_opaque.ll b/llvm/test/tools/sycl-post-link/erase_used_decl_opaque.ll index e9faef66c3463..4801c9945ba87 100644 --- a/llvm/test/tools/sycl-post-link/erase_used_decl_opaque.ll +++ b/llvm/test/tools/sycl-post-link/erase_used_decl_opaque.ll @@ -1,7 +1,7 @@ ; This test checks that the post-link tool doesn't incorrectly remove function ; declarations which are still in use while erasing the "llvm.used" global. ; -; RUN: sycl-post-link -split=auto -symbols -S < %s -o %t.files.table +; RUN: sycl-post-link -properties -split=auto -symbols -S < %s -o %t.files.table ; RUN: FileCheck %s -input-file=%t.files_0.ll ; target triple = "spir64-unknown-unknown" diff --git a/llvm/test/tools/sycl-post-link/erase_used_opaque.ll b/llvm/test/tools/sycl-post-link/erase_used_opaque.ll index 0a562e8f2cf74..9f9158bdff087 100644 --- a/llvm/test/tools/sycl-post-link/erase_used_opaque.ll +++ b/llvm/test/tools/sycl-post-link/erase_used_opaque.ll @@ -2,11 +2,11 @@ ; the output modules when splitting modules, creating a single row table, ; and outputing IR only ; -; RUN: sycl-post-link -split=kernel -S < %s -o %t.files.table +; RUN: sycl-post-link -properties -split=kernel -S < %s -o %t.files.table ; RUN: FileCheck %s -input-file=%t.files_0.ll ; RUN: FileCheck %s -input-file=%t.files_1.ll ; -; RUN: sycl-post-link -S -split=auto -symbols -split-esimd -lower-esimd -O2 -spec-const=emulation < %s -o %t.out.table +; RUN: sycl-post-link -properties -S -split=auto -symbols -split-esimd -lower-esimd -O2 -spec-const=emulation < %s -o %t.out.table ; RUN: FileCheck %s --input-file=%t.out_0.ll ; ; RUN: sycl-post-link -S -split=auto -ir-output-only < %s -o %t.out_ir_only.ll diff --git a/llvm/test/tools/sycl-post-link/exclude_external_functions.ll b/llvm/test/tools/sycl-post-link/exclude_external_functions.ll index 7beff7f36113e..f9c7a3e58f376 100644 --- a/llvm/test/tools/sycl-post-link/exclude_external_functions.ll +++ b/llvm/test/tools/sycl-post-link/exclude_external_functions.ll @@ -2,7 +2,7 @@ ; dependencies to a function that can be imported do not cause the function ; to be added to a device image. -; RUN: sycl-post-link -symbols -support-dynamic-linking -split=kernel -S < %s -o %t.table +; RUN: sycl-post-link -properties -symbols -support-dynamic-linking -split=kernel -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-SYM-0 diff --git a/llvm/test/tools/sycl-post-link/exclude_external_functions_source.ll b/llvm/test/tools/sycl-post-link/exclude_external_functions_source.ll index a830ec31023c8..8c4cc75d15d6b 100644 --- a/llvm/test/tools/sycl-post-link/exclude_external_functions_source.ll +++ b/llvm/test/tools/sycl-post-link/exclude_external_functions_source.ll @@ -4,7 +4,7 @@ ; Also ensure that functions in the same source that can be imported do not get split into ; different images. -; RUN: sycl-post-link -symbols -support-dynamic-linking -split=source -S < %s -o %t.table +; RUN: sycl-post-link -properties -symbols -support-dynamic-linking -split=source -S < %s -o %t.table target triple = "spir64-unknown-unknown" diff --git a/llvm/test/tools/sycl-post-link/help.test b/llvm/test/tools/sycl-post-link/help.test index 6d41bc1f4fcac..2a74cd3b5797c 100644 --- a/llvm/test/tools/sycl-post-link/help.test +++ b/llvm/test/tools/sycl-post-link/help.test @@ -1,4 +1,4 @@ -// RUN: sycl-post-link --help | FileCheck %s +// RUN: sycl-post-link -properties --help | FileCheck %s CHECK: OVERVIEW: SYCL post-link device code processing tool. CHECK: This is a collection of utilities run on device code's LLVM IR before @@ -17,8 +17,8 @@ CHECK: for the SPIRV translator CHECK: Normally, the tool generates a number of files and "file table" CHECK: file listing all generated files in a table manner. For example, if CHECK: the input file 'example.bc' contains two kernels, then the command -CHECK: $ sycl-post-link --split=kernel --symbols --spec-const=native \ -CHECK: -o example.table example.bc +CHECK: $ sycl-post-link --properties --split=kernel --symbols \ +CHECK: --spec-const=native -o example.table example.bc CHECK: will produce 'example.table' file with the following content: CHECK: [Code|Properties|Symbols] CHECK: example_0.bc|example_0.prop|example_0.sym @@ -47,6 +47,7 @@ CHECK: -f - Enable binary output on terminals CHECK: --ir-output-only - Output single IR file CHECK: -o - Specifies an output file. Multiple output files can be specified. Additionally, a target may be specified alongside an output file, which has the effect that when module splitting is performed, the modules that are in that output table are filtered so those modules are compatible with the target. CHECK: --out-dir= - Directory where files listed in the result file table will be output +CHECK: --properties - generate module properties files CHECK: --spec-const= - lower and generate specialization constants information CHECK: =native - lower spec constants to native spirv instructions so that these values could be set at runtime CHECK: =emulation - remove specialization constants and replace it with emulation diff --git a/llvm/test/tools/sycl-post-link/multiple-filtered-outputs.ll b/llvm/test/tools/sycl-post-link/multiple-filtered-outputs.ll index 7c2ab6e91b925..f2abd47ecce0c 100644 --- a/llvm/test/tools/sycl-post-link/multiple-filtered-outputs.ll +++ b/llvm/test/tools/sycl-post-link/multiple-filtered-outputs.ll @@ -3,7 +3,7 @@ ; and that the output tables from inputs with target info have the modules ; that are not compatible with that target filtered out. -; RUN: sycl-post-link %s -symbols -split=auto \ +; RUN: sycl-post-link -properties %s -symbols -split=auto \ ; RUN: -o %t.table \ ; RUN: -o intel_gpu_pvc,%t-pvc.table \ ; RUN: -o intel_gpu_tgllp,%t-tgllp.table \ diff --git a/llvm/test/tools/sycl-post-link/no-args-to-eliminate.ll b/llvm/test/tools/sycl-post-link/no-args-to-eliminate.ll index 2b684f7f09a7e..274c5f0794400 100644 --- a/llvm/test/tools/sycl-post-link/no-args-to-eliminate.ll +++ b/llvm/test/tools/sycl-post-link/no-args-to-eliminate.ll @@ -1,7 +1,7 @@ ; This test ensures that sycl-post-link doesn't crash when kernel parameter ; optimization info metadata is empty ; -; RUN: sycl-post-link -emit-param-info -S < %s -o %t.files.table +; RUN: sycl-post-link -properties -emit-param-info -S < %s -o %t.files.table ; RUN: FileCheck %s -input-file=%t.files_0.prop ; ; CHECK: [SYCL/kernel param opt] diff --git a/llvm/test/tools/sycl-post-link/no-split-unused-func.ll b/llvm/test/tools/sycl-post-link/no-split-unused-func.ll index 09ad1d98afa22..fec85d4ad26e0 100644 --- a/llvm/test/tools/sycl-post-link/no-split-unused-func.ll +++ b/llvm/test/tools/sycl-post-link/no-split-unused-func.ll @@ -1,7 +1,7 @@ ; This test ensures that sycl-post-link will optimize away ; unused functions that are safe to remove even if there are no ; splits. -; RUN: sycl-post-link -split-esimd -S < %s -o %t.files.table +; RUN: sycl-post-link -properties -split-esimd -S < %s -o %t.files.table ; RUN: FileCheck %s -input-file=%t.files_0.ll --implicit-check-not=foo ; CHECK: target datalayout diff --git a/llvm/test/tools/sycl-post-link/omit_kernel_args.ll b/llvm/test/tools/sycl-post-link/omit_kernel_args.ll index ef6d2c96b9392..0253e72c3addb 100644 --- a/llvm/test/tools/sycl-post-link/omit_kernel_args.ll +++ b/llvm/test/tools/sycl-post-link/omit_kernel_args.ll @@ -2,7 +2,7 @@ ; optimization info into a property file if the source IR contained ; corresponding metadata. ; -; RUN: sycl-post-link -emit-param-info -S < %s -o %t.files.table +; RUN: sycl-post-link -properties -emit-param-info -S < %s -o %t.files.table ; RUN: FileCheck %s -input-file=%t.files.table --check-prefixes CHECK-TABLE ; RUN: FileCheck %s -input-file=%t.files_0.prop --match-full-lines --check-prefixes CHECK-PROP diff --git a/llvm/test/tools/sycl-post-link/registerallocmode.ll b/llvm/test/tools/sycl-post-link/registerallocmode.ll index a008d2593dd83..96035b4379c40 100644 --- a/llvm/test/tools/sycl-post-link/registerallocmode.ll +++ b/llvm/test/tools/sycl-post-link/registerallocmode.ll @@ -1,6 +1,6 @@ ; This test checks handling of RegisterAllocMode in SYCL post link -; RUN: sycl-post-link -split=source -symbols -split-esimd -lower-esimd -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=source -symbols -split-esimd -lower-esimd -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t.table ; CHECK: [Code|Properties|Symbols] diff --git a/llvm/test/tools/sycl-post-link/skip-properties-gen.ll b/llvm/test/tools/sycl-post-link/skip-properties-gen.ll new file mode 100644 index 0000000000000..6c400b38e6358 --- /dev/null +++ b/llvm/test/tools/sycl-post-link/skip-properties-gen.ll @@ -0,0 +1,25 @@ +; This test verifies the behavior of the sycl-post-link tool without the -properties and -symbols options. +; In particular, we verify that the properties and symbols files are not added to the output table. +; +; RUN: sycl-post-link -split=source -S < %s -o %t.table +; RUN: FileCheck %s -input-file=%t.table -check-prefix=CHECK-TABLE + +; CHECK-TABLE: [Code] +; CHECK-TABLE: {{.*}}_0.ll +; CHECK-TABLE: {{.*}}_1.ll + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +; CHECK-FOO: define dso_local spir_func noundef void @foo +define dso_local spir_func noundef void @foo(i32 noundef %a, i32 noundef %b) local_unnamed_addr #0 { +entry: +ret void +} + +define dso_local spir_func noundef void @bar(i32 noundef %a, i32 noundef %b) #1 { +entry: +ret void +} +attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "sycl-module-id"="test.cpp" "sycl-grf-size"="128" } +attributes #1 = { convergent mustprogress noinline norecurse nounwind "sycl-module-id"="test.cpp" "sycl-grf-size"="256" } diff --git a/llvm/test/tools/sycl-post-link/spec-constants/SYCL-2020-zeroinitializer-array-of-arrays.ll b/llvm/test/tools/sycl-post-link/spec-constants/SYCL-2020-zeroinitializer-array-of-arrays.ll index c7ab60148e1c5..1098628fa87f4 100644 --- a/llvm/test/tools/sycl-post-link/spec-constants/SYCL-2020-zeroinitializer-array-of-arrays.ll +++ b/llvm/test/tools/sycl-post-link/spec-constants/SYCL-2020-zeroinitializer-array-of-arrays.ll @@ -1,7 +1,7 @@ -; RUN: sycl-post-link --spec-const=native -S < %s -o %t.files.table +; RUN: sycl-post-link -properties --spec-const=native -S < %s -o %t.files.table ; RUN: FileCheck %s -input-file=%t.files_0.ll --check-prefix CHECK-IR ; RUN: FileCheck %s -input-file=%t.files_0.prop --check-prefix CHECK-PROP -; RUN: %if asserts %{ sycl-post-link -debug-only=SpecConst --spec-const=native -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LOG %} +; RUN: %if asserts %{ sycl-post-link -properties -debug-only=SpecConst --spec-const=native -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LOG %} ; ; This test is intended to check that SpecConstantsPass is able to handle the ; situation where specialization constants with complex types such as arrays diff --git a/llvm/test/tools/sycl-post-link/spec-constants/SYCL-2020-zeroinitializer.ll b/llvm/test/tools/sycl-post-link/spec-constants/SYCL-2020-zeroinitializer.ll index fb8a162d76c6c..21fdc1c835b79 100644 --- a/llvm/test/tools/sycl-post-link/spec-constants/SYCL-2020-zeroinitializer.ll +++ b/llvm/test/tools/sycl-post-link/spec-constants/SYCL-2020-zeroinitializer.ll @@ -1,5 +1,5 @@ ; RUN: sycl-post-link --ir-output-only --spec-const=native %s -S -o - | FileCheck %s -; RUN: %if asserts %{ sycl-post-link -debug-only=SpecConst --spec-const=native %s -S 2>&1 | FileCheck %s --check-prefix=CHECK-LOG %} +; RUN: %if asserts %{ sycl-post-link -properties -debug-only=SpecConst --spec-const=native %s -S 2>&1 | FileCheck %s --check-prefix=CHECK-LOG %} ; ; This test is intended to check that SpecConstantsPass is able to handle the ; situation where specialization constants have zeroinitializer in LLVM IR diff --git a/llvm/test/tools/sycl-post-link/spec-constants/SYCL-2020.ll b/llvm/test/tools/sycl-post-link/spec-constants/SYCL-2020.ll index 113479edc448a..9efcf14a421a1 100644 --- a/llvm/test/tools/sycl-post-link/spec-constants/SYCL-2020.ll +++ b/llvm/test/tools/sycl-post-link/spec-constants/SYCL-2020.ll @@ -1,11 +1,11 @@ -; RUN: sycl-post-link -spec-const=emulation < %s -S -o %t.table +; RUN: sycl-post-link -properties -spec-const=emulation < %s -S -o %t.table ; RUN: FileCheck %s -check-prefixes=CHECK,CHECK-DEF < %t_0.ll ; RUN: FileCheck %s --check-prefixes=CHECK-PROPS,CHECK-PROPS-DEF < %t_0.prop -; RUN: sycl-post-link -spec-const=native < %s -S -o %t.table +; RUN: sycl-post-link -properties -spec-const=native < %s -S -o %t.table ; RUN: FileCheck %s -check-prefixes=CHECK,CHECK-RT < %t_0.ll ; RUN: FileCheck %s --check-prefixes=CHECK-PROPS < %t_0.prop -; RUN: %if asserts %{ sycl-post-link -debug-only=SpecConst -spec-const=emulation < %s -S 2>&1 | FileCheck %s --check-prefixes=CHECK-LOG,CHECK-LOG-EMULATION %} -; RUN: %if asserts %{ sycl-post-link -debug-only=SpecConst -spec-const=native < %s -S 2>&1 | FileCheck %s --check-prefixes=CHECK-LOG,CHECK-LOG-NATIVE %} +; RUN: %if asserts %{ sycl-post-link -properties -debug-only=SpecConst -spec-const=emulation < %s -S 2>&1 | FileCheck %s --check-prefixes=CHECK-LOG,CHECK-LOG-EMULATION %} +; RUN: %if asserts %{ sycl-post-link -properties -debug-only=SpecConst -spec-const=native < %s -S 2>&1 | FileCheck %s --check-prefixes=CHECK-LOG,CHECK-LOG-NATIVE %} ; This test checks that the post link tool is able to correctly transform ; SYCL 2020 specialization constant intrinsics for different types in a device diff --git a/llvm/test/tools/sycl-post-link/spec-constants/SYCL-alloca-error.ll b/llvm/test/tools/sycl-post-link/spec-constants/SYCL-alloca-error.ll index 07f74450d0375..daf0e0126dec5 100644 --- a/llvm/test/tools/sycl-post-link/spec-constants/SYCL-alloca-error.ll +++ b/llvm/test/tools/sycl-post-link/spec-constants/SYCL-alloca-error.ll @@ -1,4 +1,4 @@ -; RUN: sycl-post-link -spec-const=emulation %s 2>&1 | FileCheck %s +; RUN: sycl-post-link -properties -spec-const=emulation %s 2>&1 | FileCheck %s ; This test checks the `-spec-const` pass on SPIR-V targets and emulation mode, ; i.e., on AOT SPIR-V targets. In this scenario, 'llvm.sycl.alloca' intrinsics diff --git a/llvm/test/tools/sycl-post-link/spec-constants/SYCL-alloca.ll b/llvm/test/tools/sycl-post-link/spec-constants/SYCL-alloca.ll index 2bceb13b43843..f8d8694c62ff0 100644 --- a/llvm/test/tools/sycl-post-link/spec-constants/SYCL-alloca.ll +++ b/llvm/test/tools/sycl-post-link/spec-constants/SYCL-alloca.ll @@ -1,8 +1,8 @@ -; RUN: sycl-post-link -spec-const=native < %s -S -o %t.table +; RUN: sycl-post-link -properties -spec-const=native < %s -S -o %t.table ; RUN: FileCheck %s -check-prefixes=CHECK,CHECK-RT < %t_0.ll ; RUN: FileCheck %s --check-prefixes=CHECK-PROPS < %t_0.prop -; RUN: sycl-post-link -spec-const=emulation < %s -S -o %t.table +; RUN: sycl-post-link -properties -spec-const=emulation < %s -S -o %t.table ; RUN: FileCheck %s -check-prefixes=CHECK,CHECK-EMULATION < %t_0.ll ; This test checks that the post link tool is able to correctly transform diff --git a/llvm/test/tools/sycl-post-link/spec-constants/SYCL2020-struct-with-undef-padding.ll b/llvm/test/tools/sycl-post-link/spec-constants/SYCL2020-struct-with-undef-padding.ll index 1511c68a7e381..4eb6862b2ca44 100644 --- a/llvm/test/tools/sycl-post-link/spec-constants/SYCL2020-struct-with-undef-padding.ll +++ b/llvm/test/tools/sycl-post-link/spec-constants/SYCL2020-struct-with-undef-padding.ll @@ -1,7 +1,7 @@ -; RUN: sycl-post-link --spec-const=native -S < %s -o %t.files.table +; RUN: sycl-post-link -properties --spec-const=native -S < %s -o %t.files.table ; RUN: FileCheck %s -input-file=%t.files_0.ll --check-prefix CHECK-IR ; RUN: FileCheck %s -input-file=%t.files_0.prop --check-prefix CHECK-PROP -; RUN: %if asserts %{ sycl-post-link -debug-only=SpecConst --spec-const=native -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LOG %} +; RUN: %if asserts %{ sycl-post-link -properties -debug-only=SpecConst --spec-const=native -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LOG %} ; ; This test is intended to check that SpecConstantsPass is able to handle the ; situation where specialization constants with complex types such as structs diff --git a/llvm/test/tools/sycl-post-link/spec-constants/bool.ll b/llvm/test/tools/sycl-post-link/spec-constants/bool.ll index 4d1d63048b60b..7981450a12f00 100644 --- a/llvm/test/tools/sycl-post-link/spec-constants/bool.ll +++ b/llvm/test/tools/sycl-post-link/spec-constants/bool.ll @@ -2,8 +2,8 @@ ; RUN: FileCheck %s --input-file=%t.ll --implicit-check-not "call i8 bitcast" --check-prefixes=CHECK,CHECK-RT ; RUN: sycl-post-link -spec-const=emulation -S < %s --ir-output-only -o %t.ll ; RUN: FileCheck %s --input-file=%t.ll --check-prefixes=CHECK,CHECK-DEF -; RUN: %if asserts %{sycl-post-link -debug-only=SpecConst -spec-const=native -S < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-LOG,CHECK-LOG-NATIVE %} -; RUN: %if asserts %{sycl-post-link -debug-only=SpecConst -spec-const=emulation -S < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-LOG,CHECK-LOG-EMULATION %} +; RUN: %if asserts %{sycl-post-link -properties -debug-only=SpecConst -spec-const=native -S < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-LOG,CHECK-LOG-NATIVE %} +; RUN: %if asserts %{sycl-post-link -properties -debug-only=SpecConst -spec-const=emulation -S < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-LOG,CHECK-LOG-EMULATION %} ; CHECK-LABEL: void @kernel_A diff --git a/llvm/test/tools/sycl-post-link/spec-constants/composite-O2.ll b/llvm/test/tools/sycl-post-link/spec-constants/composite-O2.ll index 538385b0401ef..2ef1141cbd740 100644 --- a/llvm/test/tools/sycl-post-link/spec-constants/composite-O2.ll +++ b/llvm/test/tools/sycl-post-link/spec-constants/composite-O2.ll @@ -1,6 +1,6 @@ ; RUN: sycl-post-link -spec-const=native --ir-output-only < %s -S -o - \ ; RUN: | FileCheck %s --implicit-check-not "call {{.*}} __sycl_getComposite2020SpecConstantValue" -; RUN: %if asserts %{ sycl-post-link -spec-const=native -debug-only=SpecConst < %s -S 2>&1 | FileCheck %s --check-prefix=CHECK-LOG %} +; RUN: %if asserts %{ sycl-post-link -properties -spec-const=native -debug-only=SpecConst < %s -S 2>&1 | FileCheck %s --check-prefix=CHECK-LOG %} ; ; This test is intended to check that sycl-post-link tool is capable of handling ; composite specialization constants by lowering them into a set of SPIR-V diff --git a/llvm/test/tools/sycl-post-link/spec-constants/composite-default-value-padding.ll b/llvm/test/tools/sycl-post-link/spec-constants/composite-default-value-padding.ll index 968e94547a194..bd376a260a719 100644 --- a/llvm/test/tools/sycl-post-link/spec-constants/composite-default-value-padding.ll +++ b/llvm/test/tools/sycl-post-link/spec-constants/composite-default-value-padding.ll @@ -1,6 +1,6 @@ -; RUN: sycl-post-link -spec-const=emulation < %s -o %t.files.table +; RUN: sycl-post-link -properties -spec-const=emulation < %s -o %t.files.table ; RUN: FileCheck %s -input-file=%t.files_0.prop -; RUN: %if asserts %{ sycl-post-link -debug-only=SpecConst -spec-const=emulation < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LOG %} +; RUN: %if asserts %{ sycl-post-link -properties -debug-only=SpecConst -spec-const=emulation < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LOG %} ; ; This test checks that composite specialization constants with padding gets the ; correct padding in their default values to prevent values being inserted at diff --git a/llvm/test/tools/sycl-post-link/spec-constants/composite-no-sret.ll b/llvm/test/tools/sycl-post-link/spec-constants/composite-no-sret.ll index 87715cb2db45a..a4bb392d7ff67 100644 --- a/llvm/test/tools/sycl-post-link/spec-constants/composite-no-sret.ll +++ b/llvm/test/tools/sycl-post-link/spec-constants/composite-no-sret.ll @@ -1,6 +1,6 @@ ; RUN: sycl-post-link -spec-const=native --ir-output-only < %s -S -o - \ ; RUN: | FileCheck %s --implicit-check-not "call {{.*}} __sycl_getCompositeSpecConstantValue" --implicit-check-not "call {{.*}} __sycl_getComposite2020SpecConstantValue" -; RUN: %if asserts %{ sycl-post-link -debug-only=SpecConst -spec-const=native < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LOG %} +; RUN: %if asserts %{ sycl-post-link -properties -debug-only=SpecConst -spec-const=native < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LOG %} ; CHECK: %[[#NS0:]] = call i32 @_Z20__spirv_SpecConstantii(i32 [[#ID:]], i32 ; CHECK: %[[#NS1:]] = call i32 @_Z20__spirv_SpecConstantii(i32 [[#ID + 1]], i32 42) diff --git a/llvm/test/tools/sycl-post-link/spec-constants/composite-padding-desc.ll b/llvm/test/tools/sycl-post-link/spec-constants/composite-padding-desc.ll index 1064e662eff01..b525e8c5c2aaa 100644 --- a/llvm/test/tools/sycl-post-link/spec-constants/composite-padding-desc.ll +++ b/llvm/test/tools/sycl-post-link/spec-constants/composite-padding-desc.ll @@ -1,6 +1,6 @@ -; RUN: sycl-post-link -spec-const=native < %s -o %t.files.table +; RUN: sycl-post-link -properties -spec-const=native < %s -o %t.files.table ; RUN: FileCheck %s -input-file=%t.files_0.prop -; RUN: %if asserts %{ sycl-post-link -debug-only=SpecConst -spec-const=native < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LOG %} +; RUN: %if asserts %{ sycl-post-link -properties -debug-only=SpecConst -spec-const=native < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LOG %} ; ; This test checks that composite specialization constants with implicit padding ; at the end of the composite type will have an additional padding descriptor at diff --git a/llvm/test/tools/sycl-post-link/spec-constants/default-value/SYCL-alloca.ll b/llvm/test/tools/sycl-post-link/spec-constants/default-value/SYCL-alloca.ll index c1fa304fbf159..30415be30a90c 100644 --- a/llvm/test/tools/sycl-post-link/spec-constants/default-value/SYCL-alloca.ll +++ b/llvm/test/tools/sycl-post-link/spec-constants/default-value/SYCL-alloca.ll @@ -1,4 +1,4 @@ -; RUN: sycl-post-link -split=auto -spec-const=native -S -o %t.table %s -generate-device-image-default-spec-consts +; RUN: sycl-post-link -properties -split=auto -spec-const=native -S -o %t.table %s -generate-device-image-default-spec-consts ; RUN: FileCheck %s -input-file %t_1.ll --implicit-check-not="SpecConst" ; This test checks that the post link tool is able to correctly transform diff --git a/llvm/test/tools/sycl-post-link/spec-constants/default-value/bool.ll b/llvm/test/tools/sycl-post-link/spec-constants/default-value/bool.ll index 2a8a82f262611..d96cfa1f333f2 100644 --- a/llvm/test/tools/sycl-post-link/spec-constants/default-value/bool.ll +++ b/llvm/test/tools/sycl-post-link/spec-constants/default-value/bool.ll @@ -1,8 +1,8 @@ ; Test checks handling of bool specialization constant. -; RUN: sycl-post-link -split=auto -spec-const=native -S -o %t.table %s -generate-device-image-default-spec-consts +; RUN: sycl-post-link -properties -split=auto -spec-const=native -S -o %t.table %s -generate-device-image-default-spec-consts ; RUN: FileCheck %s -input-file %t_1.ll --implicit-check-not="SpecConst" -; RUN: %if asserts %{ sycl-post-link -debug-only=SpecConst -split=auto -spec-const=native -S %s -generate-device-image-default-spec-consts 2>&1 | FileCheck %s --check-prefix=CHECK-LOG %} +; RUN: %if asserts %{ sycl-post-link -properties -debug-only=SpecConst -split=auto -spec-const=native -S %s -generate-device-image-default-spec-consts 2>&1 | FileCheck %s --check-prefix=CHECK-LOG %} ; CHECK: %bool1 = trunc i8 1 to i1 ; CHECK: %frombool = zext i1 %bool1 to i8 diff --git a/llvm/test/tools/sycl-post-link/spec-constants/default-value/device-image.ll b/llvm/test/tools/sycl-post-link/spec-constants/default-value/device-image.ll index 01af2cd741dde..d38e2469b7e2b 100644 --- a/llvm/test/tools/sycl-post-link/spec-constants/default-value/device-image.ll +++ b/llvm/test/tools/sycl-post-link/spec-constants/default-value/device-image.ll @@ -2,7 +2,7 @@ ; It checks scalar, sret and "return by value" versions of SpecConstant functions. ; Also test checks generated symbols. -; RUN: sycl-post-link -split=auto -spec-const=native -symbols -S -o %t.table %s -generate-device-image-default-spec-consts +; RUN: sycl-post-link -properties -split=auto -spec-const=native -symbols -S -o %t.table %s -generate-device-image-default-spec-consts ; RUN: FileCheck %s -input-file %t.table -check-prefix=CHECK-TABLE ; RUN: FileCheck %s -input-file %t_0.prop -check-prefix=CHECK-PROP0 ; RUN: FileCheck %s -input-file %t_1.prop -check-prefix=CHECK-PROP1 @@ -10,7 +10,7 @@ ; RUN: FileCheck %s -input-file %t_1.ll -check-prefix=CHECK-IR1 --implicit-check-not "SpecConstant" ; RUN: FileCheck %s -input-file %t_0.sym -check-prefix=CHECK-SYM0 ; RUN: FileCheck %s -input-file %t_1.sym -check-prefix=CHECK-SYM1 -; RUN: %if asserts %{ sycl-post-link -debug-only=SpecConst -split=auto -spec-const=native -symbols -S %s -generate-device-image-default-spec-consts 2>&1 | FileCheck %s --check-prefix=CHECK-LOG %} +; RUN: %if asserts %{ sycl-post-link -properties -debug-only=SpecConst -split=auto -spec-const=native -symbols -S %s -generate-device-image-default-spec-consts 2>&1 | FileCheck %s --check-prefix=CHECK-LOG %} ; CHECK-TABLE: {{.*}}_0.ll|{{.*}}_0.prop|{{.*}}_0.sym ; CHECK-TABLE: {{.*}}_1.ll|{{.*}}_1.prop|{{.*}}_1.sym diff --git a/llvm/test/tools/sycl-post-link/spec-constants/default-value/esimd.ll b/llvm/test/tools/sycl-post-link/spec-constants/default-value/esimd.ll index dbd995bd0ca44..5afb3b4aac1e4 100644 --- a/llvm/test/tools/sycl-post-link/spec-constants/default-value/esimd.ll +++ b/llvm/test/tools/sycl-post-link/spec-constants/default-value/esimd.ll @@ -1,10 +1,10 @@ ; Test checks generation of device image of esimd kernel. -; RUN: sycl-post-link -split=auto -split-esimd -lower-esimd -O2 -spec-const=native -o %t.table %s -generate-device-image-default-spec-consts +; RUN: sycl-post-link -properties -split=auto -split-esimd -lower-esimd -O2 -spec-const=native -o %t.table %s -generate-device-image-default-spec-consts ; RUN: FileCheck %s -input-file=%t.table -check-prefix=CHECK-TABLE ; RUN: FileCheck %s -input-file=%t_1.prop -check-prefix=CHECK-PROP ; RUN: FileCheck %s -input-file=%t_esimd_1.prop -check-prefix=CHECK-ESIMD-PROP -; RUN: %if asserts %{ sycl-post-link -debug-only=SpecConst -split=auto -split-esimd -lower-esimd -O2 -spec-const=native %s -generate-device-image-default-spec-consts 2>&1 | FileCheck %s --check-prefix=CHECK-LOG %} +; RUN: %if asserts %{ sycl-post-link -properties -debug-only=SpecConst -split=auto -split-esimd -lower-esimd -O2 -spec-const=native %s -generate-device-image-default-spec-consts 2>&1 | FileCheck %s --check-prefix=CHECK-LOG %} ; CHECK-TABLE: {{.*}}_esimd_0.bc|{{.*}}_esimd_0.prop ; CHECK-TABLE: {{.*}}_0.bc|{{.*}}_0.prop diff --git a/llvm/test/tools/sycl-post-link/spec-constants/default-value/split-by-kernel.ll b/llvm/test/tools/sycl-post-link/spec-constants/default-value/split-by-kernel.ll index e985687197a68..98d291b342e3c 100644 --- a/llvm/test/tools/sycl-post-link/spec-constants/default-value/split-by-kernel.ll +++ b/llvm/test/tools/sycl-post-link/spec-constants/default-value/split-by-kernel.ll @@ -1,6 +1,6 @@ ; Test checks generation of device images for splitted kernels. -; RUN: sycl-post-link -split=kernel -o %t.table %s -generate-device-image-default-spec-consts +; RUN: sycl-post-link -properties -split=kernel -o %t.table %s -generate-device-image-default-spec-consts ; RUN: cat %t.table | FileCheck %s -check-prefix=CHECK-TABLE ; RUN: cat %t_0.prop | FileCheck %s -check-prefix=CHECK-PROP0 ; RUN: cat %t_1.prop | FileCheck %s -check-prefix=CHECK-PROP1 diff --git a/llvm/test/tools/sycl-post-link/spec-constants/default-value/split-by-source.ll b/llvm/test/tools/sycl-post-link/spec-constants/default-value/split-by-source.ll index ad38aa3ba7489..5cdc2d2aed1d5 100644 --- a/llvm/test/tools/sycl-post-link/spec-constants/default-value/split-by-source.ll +++ b/llvm/test/tools/sycl-post-link/spec-constants/default-value/split-by-source.ll @@ -1,6 +1,6 @@ ; Test checks generation of device images for splitted kernels by source. -; RUN: sycl-post-link -split=source -o %t.table %s -generate-device-image-default-spec-consts +; RUN: sycl-post-link -properties -split=source -o %t.table %s -generate-device-image-default-spec-consts ; RUN: cat %t.table | FileCheck %s -check-prefix=CHECK-TABLE ; RUN: cat %t_0.prop | FileCheck %s -check-prefix=CHECK-PROP0 ; RUN: cat %t_1.prop | FileCheck %s -check-prefix=CHECK-PROP1 diff --git a/llvm/test/tools/sycl-post-link/spec-constants/default-value/struct-with-padding.ll b/llvm/test/tools/sycl-post-link/spec-constants/default-value/struct-with-padding.ll index 51dc1c557fd7f..8b9f3b72f3294 100644 --- a/llvm/test/tools/sycl-post-link/spec-constants/default-value/struct-with-padding.ll +++ b/llvm/test/tools/sycl-post-link/spec-constants/default-value/struct-with-padding.ll @@ -1,10 +1,10 @@ ; Test checks that struct with padding is handled correctly. -; RUN: sycl-post-link -split=auto -spec-const=native -S -o %t.table %s -generate-device-image-default-spec-consts +; RUN: sycl-post-link -properties -split=auto -spec-const=native -S -o %t.table %s -generate-device-image-default-spec-consts ; RUN: cat %t.table | FileCheck %s -check-prefix=CHECK-TABLE ; RUN: cat %t_1.prop | FileCheck %s -check-prefix=CHECK-PROP1 ; RUN: cat %t_1.ll | FileCheck %s -check-prefix=CHECK-IR1 --implicit-check-not SpecConstant -; RUN: %if asserts %{ sycl-post-link -debug-only=SpecConst -split=auto -spec-const=native -S %s -generate-device-image-default-spec-consts 2>&1 | FileCheck %s --check-prefix=CHECK-LOG %} +; RUN: %if asserts %{ sycl-post-link -properties -debug-only=SpecConst -split=auto -spec-const=native -S %s -generate-device-image-default-spec-consts 2>&1 | FileCheck %s --check-prefix=CHECK-LOG %} ; CHECK-TABLE: {{.*}}_0.ll|{{.*}}_0.prop ; CHECK-TABLE: {{.*}}_1.ll|{{.*}}_1.prop diff --git a/llvm/test/tools/sycl-post-link/spec-constants/nested-struct.ll b/llvm/test/tools/sycl-post-link/spec-constants/nested-struct.ll index f0bd6b8df6c66..d68bf98094103 100644 --- a/llvm/test/tools/sycl-post-link/spec-constants/nested-struct.ll +++ b/llvm/test/tools/sycl-post-link/spec-constants/nested-struct.ll @@ -1,9 +1,9 @@ ; This test demonstrates that multiple padding elements can be ; inserted in the spec constant metadata -; RUN: sycl-post-link --spec-const=native -S %s -o %t.table +; RUN: sycl-post-link -properties --spec-const=native -S %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll -; RUN: %if asserts %{ sycl-post-link -debug-only=SpecConst -spec-const=native < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LOG %} +; RUN: %if asserts %{ sycl-post-link -properties -debug-only=SpecConst -spec-const=native < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LOG %} ; CHECK: %[[#SCV1:]] = call i8 @_Z20__spirv_SpecConstantia(i32 [[#SCID1:]], i8 120) ; CHECK: %[[#SCV2:]] = call i8 @_Z20__spirv_SpecConstantia(i32 [[#SCID2:]], i8 121) diff --git a/llvm/test/tools/sycl-post-link/spec-constants/remove-dead-private-constants.ll b/llvm/test/tools/sycl-post-link/spec-constants/remove-dead-private-constants.ll index fb3b69228599e..0b96ef7172b1f 100644 --- a/llvm/test/tools/sycl-post-link/spec-constants/remove-dead-private-constants.ll +++ b/llvm/test/tools/sycl-post-link/spec-constants/remove-dead-private-constants.ll @@ -1,10 +1,10 @@ ; Test checks the content of simple generated device image. ; It checks for removal of unused private constants. -; RUN: sycl-post-link -split=auto -spec-const=native -symbols -S -o %t.table %s -generate-device-image-default-spec-consts +; RUN: sycl-post-link -properties -split=auto -spec-const=native -symbols -S -o %t.table %s -generate-device-image-default-spec-consts ; RUN: FileCheck %s -input-file %t_0.ll -check-prefix=CHECK-IR0 ; RUN: FileCheck %s -input-file %t_1.ll -check-prefix=CHECK-IR1 -; RUN: %if asserts %{ sycl-post-link -debug-only=SpecConst -split=auto -spec-const=native -symbols -S %s -generate-device-image-default-spec-consts 2>&1 | FileCheck %s --check-prefix=CHECK-LOG %} +; RUN: %if asserts %{ sycl-post-link -properties -debug-only=SpecConst -split=auto -spec-const=native -symbols -S %s -generate-device-image-default-spec-consts 2>&1 | FileCheck %s --check-prefix=CHECK-LOG %} ; CHECK-IR0-NOT: @__usid_str = private ; CHECK-IR1-NOT: @__usid_str = private diff --git a/llvm/test/tools/sycl-post-link/spec-constants/struct-with-padding-in-the-middle.ll b/llvm/test/tools/sycl-post-link/spec-constants/struct-with-padding-in-the-middle.ll index 57ddbfe23b55f..2dc002140362f 100644 --- a/llvm/test/tools/sycl-post-link/spec-constants/struct-with-padding-in-the-middle.ll +++ b/llvm/test/tools/sycl-post-link/spec-constants/struct-with-padding-in-the-middle.ll @@ -1,6 +1,6 @@ -; RUN: sycl-post-link --spec-const=native -S %s -o %t.table +; RUN: sycl-post-link -properties --spec-const=native -S %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll -; RUN: %if asserts %{ sycl-post-link -debug-only=SpecConst --spec-const=native -S %s 2>&1 | FileCheck %s --check-prefix=CHECK-LOG %} +; RUN: %if asserts %{ sycl-post-link -properties -debug-only=SpecConst --spec-const=native -S %s 2>&1 | FileCheck %s --check-prefix=CHECK-LOG %} ; LLVM IR for this test is produced from the following SYCL code snippet: ; ; #include diff --git a/llvm/test/tools/sycl-post-link/spec-constants/struct-with-undef-padding-2.ll b/llvm/test/tools/sycl-post-link/spec-constants/struct-with-undef-padding-2.ll index 233933d87f6c2..460699ae68ed3 100644 --- a/llvm/test/tools/sycl-post-link/spec-constants/struct-with-undef-padding-2.ll +++ b/llvm/test/tools/sycl-post-link/spec-constants/struct-with-undef-padding-2.ll @@ -34,9 +34,9 @@ ; a situation, where spec constant default value contains less elements than ; spec constant type, due to padding inserted by a compiler. ; -; RUN: sycl-post-link --spec-const=native -S < %s -o %t.files.table +; RUN: sycl-post-link -properties --spec-const=native -S < %s -o %t.files.table ; RUN: FileCheck %s -input-file=%t.files_0.ll -; RUN: %if asserts %{ sycl-post-link -debug-only=SpecConst --spec-const=native -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LOG %} +; RUN: %if asserts %{ sycl-post-link -properties -debug-only=SpecConst --spec-const=native -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LOG %} ; ; CHECK: %[[#A:]] = call float @_Z20__spirv_SpecConstantif(i32 [[#ID:]], float 0x40091EB860000000) ; CHECK: %[[#B:]] = call i32 @_Z20__spirv_SpecConstantii(i32 [[#ID+1]], i32 42) diff --git a/llvm/test/tools/sycl-post-link/sycl-esimd/basic-esimd-lower.ll b/llvm/test/tools/sycl-post-link/sycl-esimd/basic-esimd-lower.ll index 157742f6618f4..3773fd1048ba2 100644 --- a/llvm/test/tools/sycl-post-link/sycl-esimd/basic-esimd-lower.ll +++ b/llvm/test/tools/sycl-post-link/sycl-esimd/basic-esimd-lower.ll @@ -3,19 +3,19 @@ ; for ESIMD kernels in any case. ; No lowering -; RUN: sycl-post-link -split-esimd -S < %s -o %t.table +; RUN: sycl-post-link -properties -split-esimd -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes CHECK-NO-LOWERING ; Default lowering -; RUN: sycl-post-link -split-esimd -lower-esimd -S < %s -o %t.table +; RUN: sycl-post-link -properties -split-esimd -lower-esimd -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes CHECK-O2 ; -O2 lowering -; RUN: sycl-post-link -split-esimd -lower-esimd -O2 -S < %s -o %t.table +; RUN: sycl-post-link -properties -split-esimd -lower-esimd -O2 -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes CHECK-O2 ; -O0 lowering -; RUN: sycl-post-link -split-esimd -lower-esimd -O0 -S < %s -o %t.table +; RUN: sycl-post-link -properties -split-esimd -lower-esimd -O0 -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes CHECK-O0 target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" diff --git a/llvm/test/tools/sycl-post-link/sycl-esimd/basic-sycl-esimd-split.ll b/llvm/test/tools/sycl-post-link/sycl-esimd/basic-sycl-esimd-split.ll index 68977a001fbf7..25258107ad0e0 100644 --- a/llvm/test/tools/sycl-post-link/sycl-esimd/basic-sycl-esimd-split.ll +++ b/llvm/test/tools/sycl-post-link/sycl-esimd/basic-sycl-esimd-split.ll @@ -1,4 +1,4 @@ -; RUN: sycl-post-link -split-esimd -S < %s -o %t.table +; RUN: sycl-post-link -properties -split-esimd -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-SYCL-IR ; RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes CHECK-ESIMD-IR diff --git a/llvm/test/tools/sycl-post-link/sycl-esimd/invoke-esimd-double.ll b/llvm/test/tools/sycl-post-link/sycl-esimd/invoke-esimd-double.ll index a614184d2a83c..c7aeb4966fdbe 100644 --- a/llvm/test/tools/sycl-post-link/sycl-esimd/invoke-esimd-double.ll +++ b/llvm/test/tools/sycl-post-link/sycl-esimd/invoke-esimd-double.ll @@ -1,4 +1,4 @@ -; RUN: sycl-post-link --emit-only-kernels-as-entry-points -symbols -split=auto -S < %s -o %t.table +; RUN: sycl-post-link -properties --emit-only-kernels-as-entry-points -symbols -split=auto -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t.table --check-prefixes CHECK-TABLE ; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-M0-SYMS ; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-M1-SYMS diff --git a/llvm/test/tools/sycl-post-link/sycl-esimd/lower-with-no-esimd-entry.ll b/llvm/test/tools/sycl-post-link/sycl-esimd/lower-with-no-esimd-entry.ll index 63a5ffb1fe626..cdfb4bba20c04 100644 --- a/llvm/test/tools/sycl-post-link/sycl-esimd/lower-with-no-esimd-entry.ll +++ b/llvm/test/tools/sycl-post-link/sycl-esimd/lower-with-no-esimd-entry.ll @@ -1,7 +1,7 @@ ; This test checks to see if ESIMD lowering is performed even without the ; the presence of ESIMD entry points. -; RUN: sycl-post-link -symbols -lower-esimd -split=auto -S < %s -o %t.table +; RUN: sycl-post-link -properties -symbols -lower-esimd -split=auto -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t.table --check-prefixes CHECK-TABLE ; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-SYMS ; RUN: FileCheck %s -input-file=%t_0.ll diff --git a/llvm/test/tools/sycl-post-link/sycl-esimd/nbarriers-metadata.ll b/llvm/test/tools/sycl-post-link/sycl-esimd/nbarriers-metadata.ll index 01655f8c5c70b..20ae18777b559 100644 --- a/llvm/test/tools/sycl-post-link/sycl-esimd/nbarriers-metadata.ll +++ b/llvm/test/tools/sycl-post-link/sycl-esimd/nbarriers-metadata.ll @@ -1,4 +1,4 @@ -; RUN: sycl-post-link -split-esimd -lower-esimd -S < %s -o %t.table +; RUN: sycl-post-link -properties -split-esimd -lower-esimd -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_esimd_0.ll target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" diff --git a/llvm/test/tools/sycl-post-link/sycl-esimd/no-sycl-esimd-split-shared-func.ll b/llvm/test/tools/sycl-post-link/sycl-esimd/no-sycl-esimd-split-shared-func.ll index 50cdcc5dec8e1..3fead6c4029c0 100644 --- a/llvm/test/tools/sycl-post-link/sycl-esimd/no-sycl-esimd-split-shared-func.ll +++ b/llvm/test/tools/sycl-post-link/sycl-esimd/no-sycl-esimd-split-shared-func.ll @@ -5,7 +5,7 @@ ; making sure no functions are shared by the callgraphs (currently required by ; IGC) -; RUN: sycl-post-link -lower-esimd -symbols -split=auto -S < %s -o %t.table +; RUN: sycl-post-link -properties -lower-esimd -symbols -split=auto -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t.table --check-prefixes CHECK-TABLE ; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-HELPERS-SYM-1 ; RUN: FileCheck %s -input-file=%t_esimd_1.sym --check-prefixes CHECK-HELPERS-SYM-2 diff --git a/llvm/test/tools/sycl-post-link/sycl-esimd/no-sycl-esimd-split.ll b/llvm/test/tools/sycl-post-link/sycl-esimd/no-sycl-esimd-split.ll index 0488297b7fabe..13485a015c679 100644 --- a/llvm/test/tools/sycl-post-link/sycl-esimd/no-sycl-esimd-split.ll +++ b/llvm/test/tools/sycl-post-link/sycl-esimd/no-sycl-esimd-split.ll @@ -1,4 +1,4 @@ -; RUN: sycl-post-link -split=source -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=source -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-IR-0 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-IR-1 diff --git a/llvm/test/tools/sycl-post-link/sycl-esimd/optnone.ll b/llvm/test/tools/sycl-post-link/sycl-esimd/optnone.ll index aa0fb46bc8c03..14e64440ce97e 100644 --- a/llvm/test/tools/sycl-post-link/sycl-esimd/optnone.ll +++ b/llvm/test/tools/sycl-post-link/sycl-esimd/optnone.ll @@ -1,8 +1,8 @@ ; This ensures we remove optnone from ESIMD functions unless they are SIMT or we didn't split ESIMD code out. -; RUN: sycl-post-link -split-esimd -lower-esimd -S < %s -o %t.table +; RUN: sycl-post-link -properties -split-esimd -lower-esimd -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes CHECK,CHECK-ESIMD-SPLIT -; RUN: sycl-post-link -lower-esimd -S < %s -o %t1.table +; RUN: sycl-post-link -properties -lower-esimd -S < %s -o %t1.table ; RUN: FileCheck %s -input-file=%t1_esimd_0.ll --check-prefixes CHECK,CHECK-NO-ESIMD-SPLIT target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" target triple = "spir64-unknown-unknown" diff --git a/llvm/test/tools/sycl-post-link/sycl-esimd/sycl-esimd-split-per-kernel.ll b/llvm/test/tools/sycl-post-link/sycl-esimd/sycl-esimd-split-per-kernel.ll index 4782557b02615..eabed45ec75f0 100644 --- a/llvm/test/tools/sycl-post-link/sycl-esimd/sycl-esimd-split-per-kernel.ll +++ b/llvm/test/tools/sycl-post-link/sycl-esimd/sycl-esimd-split-per-kernel.ll @@ -1,4 +1,4 @@ -; RUN: sycl-post-link -split-esimd -split=kernel -S < %s -o %t.table +; RUN: sycl-post-link -properties -split-esimd -split=kernel -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-SYCL-IR-0 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-SYCL-IR-1 diff --git a/llvm/test/tools/sycl-post-link/sycl-esimd/sycl-esimd-split-per-source.ll b/llvm/test/tools/sycl-post-link/sycl-esimd/sycl-esimd-split-per-source.ll index efc08cbc7191c..610e52b8476ce 100644 --- a/llvm/test/tools/sycl-post-link/sycl-esimd/sycl-esimd-split-per-source.ll +++ b/llvm/test/tools/sycl-post-link/sycl-esimd/sycl-esimd-split-per-source.ll @@ -1,4 +1,4 @@ -; RUN: sycl-post-link -split-esimd -split=source -S < %s -o %t.table +; RUN: sycl-post-link -properties -split-esimd -split=source -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-SYCL-IR-0 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-SYCL-IR-1 diff --git a/llvm/test/tools/sycl-post-link/sycl-esimd/sycl-esimd-split-symbols.ll b/llvm/test/tools/sycl-post-link/sycl-esimd/sycl-esimd-split-symbols.ll index 4f7d001321077..cf0f42ab9a063 100644 --- a/llvm/test/tools/sycl-post-link/sycl-esimd/sycl-esimd-split-symbols.ll +++ b/llvm/test/tools/sycl-post-link/sycl-esimd/sycl-esimd-split-symbols.ll @@ -1,4 +1,4 @@ -; RUN: sycl-post-link -split-esimd -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -split-esimd -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t.table ; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-SYCL-SYM ; RUN: FileCheck %s -input-file=%t_esimd_0.sym --check-prefixes CHECK-ESIMD-SYM diff --git a/llvm/test/tools/sycl-post-link/sycl-external-funcs/split-and-lower-esimd.ll b/llvm/test/tools/sycl-post-link/sycl-external-funcs/split-and-lower-esimd.ll index 44f5d091926b8..55a3a7aad9c3b 100644 --- a/llvm/test/tools/sycl-post-link/sycl-external-funcs/split-and-lower-esimd.ll +++ b/llvm/test/tools/sycl-post-link/sycl-external-funcs/split-and-lower-esimd.ll @@ -1,4 +1,4 @@ -; RUN: sycl-post-link -split=auto -split-esimd -lower-esimd -O0 -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=auto -split-esimd -lower-esimd -O0 -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_esimd_0.ll ; This test checks that unreferenced functions with sycl-module-id diff --git a/llvm/test/tools/sycl-post-link/sycl-external-funcs/split-per-kernel.ll b/llvm/test/tools/sycl-post-link/sycl-external-funcs/split-per-kernel.ll index 2f2a24c8cad39..86ec87a808c34 100644 --- a/llvm/test/tools/sycl-post-link/sycl-external-funcs/split-per-kernel.ll +++ b/llvm/test/tools/sycl-post-link/sycl-external-funcs/split-per-kernel.ll @@ -1,7 +1,7 @@ ; This test checks handling of unreferenced functions with sycl-module-id ; attribute with splitting in per-kernel mode. -; RUN: sycl-post-link -split=kernel -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=kernel -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-IR0 ; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-SYM0 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-IR1 @@ -9,7 +9,7 @@ ; RUN: FileCheck %s -input-file=%t_2.ll --check-prefixes CHECK-IR2 ; RUN: FileCheck %s -input-file=%t_2.sym --check-prefixes CHECK-SYM2 -; RUN: sycl-post-link -split=kernel -emit-only-kernels-as-entry-points -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=kernel -emit-only-kernels-as-entry-points -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-IR1 ; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-SYM1 ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-IR0 diff --git a/llvm/test/tools/sycl-post-link/sycl-external-funcs/split-per-source1.ll b/llvm/test/tools/sycl-post-link/sycl-external-funcs/split-per-source1.ll index 074ef073d25c4..461edbe4aa4c5 100644 --- a/llvm/test/tools/sycl-post-link/sycl-external-funcs/split-per-source1.ll +++ b/llvm/test/tools/sycl-post-link/sycl-external-funcs/split-per-source1.ll @@ -1,13 +1,13 @@ ; This test checks handling of unreferenced functions with sycl-module-id ; attribute with splitting in per-source mode. -; RUN: sycl-post-link -split=source -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=source -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-IR2 ; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-SYM2 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-IR1 ; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-SYM1 -; RUN: sycl-post-link -split=source -emit-only-kernels-as-entry-points -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=source -emit-only-kernels-as-entry-points -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-SYM2 ; RUN: FileCheck %s -input-file=%t.table --check-prefixes CHECK-TABLE ; CHECK-TABLE: [Code|Properties|Symbols] diff --git a/llvm/test/tools/sycl-post-link/sycl-external-funcs/split-per-source2.ll b/llvm/test/tools/sycl-post-link/sycl-external-funcs/split-per-source2.ll index ea93eb7536b67..c020fd8d7b0be 100644 --- a/llvm/test/tools/sycl-post-link/sycl-external-funcs/split-per-source2.ll +++ b/llvm/test/tools/sycl-post-link/sycl-external-funcs/split-per-source2.ll @@ -1,13 +1,13 @@ ; This test checks handling of referenced SYCL_EXTERNAL functions with ; sycl-module-id attribute with splitting in per-source mode. -; RUN: sycl-post-link -split=source -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=source -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-IR0 ; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-SYM0 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-IR1 ; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-SYM1 -; RUN: sycl-post-link -split=source -emit-only-kernels-as-entry-points -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=source -emit-only-kernels-as-entry-points -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-SYM0 ; RUN: FileCheck %s -input-file=%t.table --check-prefixes CHECK-TABLE ; CHECK-TABLE: [Code|Properties|Symbols] diff --git a/llvm/test/tools/sycl-post-link/sycl-grf-size.ll b/llvm/test/tools/sycl-post-link/sycl-grf-size.ll index 9573b4bc1a4ab..28bb07e18fae7 100644 --- a/llvm/test/tools/sycl-post-link/sycl-grf-size.ll +++ b/llvm/test/tools/sycl-post-link/sycl-grf-size.ll @@ -1,6 +1,6 @@ ; This test checks handling of sycl-grf-size in SYCL post link -; RUN: sycl-post-link -split=source -symbols -split-esimd -lower-esimd -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=source -symbols -split-esimd -lower-esimd -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t.table ; RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes CHECK-ESIMD-LargeGRF-IR --implicit-check-not='__ESIMD_kernel()' ; RUN: FileCheck %s -input-file=%t_esimd_0.prop --check-prefixes CHECK-ESIMD-LargeGRF-PROP diff --git a/llvm/test/tools/sycl-post-link/sycl-opt-level-external-funcs.ll b/llvm/test/tools/sycl-post-link/sycl-opt-level-external-funcs.ll index 053760fa3b86f..0f8ffe0f6644c 100644 --- a/llvm/test/tools/sycl-post-link/sycl-opt-level-external-funcs.ll +++ b/llvm/test/tools/sycl-post-link/sycl-opt-level-external-funcs.ll @@ -9,7 +9,7 @@ ; - module with 'bar' (as entry point) with 'optLevel' set to 2 (taken from ; 'bar') -; RUN: sycl-post-link -split=source -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=source -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t.table ; RUN: FileCheck %s -input-file=%t_0.prop --check-prefixes CHECK-OPT-LEVEL-PROP-0 ; RUN: FileCheck %s -input-file=%t_1.prop --check-prefixes CHECK-OPT-LEVEL-PROP-1 diff --git a/llvm/test/tools/sycl-post-link/sycl-opt-level.ll b/llvm/test/tools/sycl-post-link/sycl-opt-level.ll index 8967d3bc02621..071c76193f094 100644 --- a/llvm/test/tools/sycl-post-link/sycl-opt-level.ll +++ b/llvm/test/tools/sycl-post-link/sycl-opt-level.ll @@ -4,7 +4,7 @@ ; on their optimization levels. ; sycl-post-link adds 'optLevel' property to the device binary -; RUN: sycl-post-link -split=source -symbols -S < %s -o %t.table +; RUN: sycl-post-link -properties -split=source -symbols -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t.table ; RUN: FileCheck %s -input-file=%t_0.prop --check-prefixes CHECK-OPT-LEVEL-PROP-0 ; RUN: FileCheck %s -input-file=%t_1.prop --check-prefixes CHECK-OPT-LEVEL-PROP-1 diff --git a/llvm/test/tools/sycl-post-link/sycl-post-link-test.ll b/llvm/test/tools/sycl-post-link/sycl-post-link-test.ll index cfd6b2c644419..269276c7c0ff4 100644 --- a/llvm/test/tools/sycl-post-link/sycl-post-link-test.ll +++ b/llvm/test/tools/sycl-post-link/sycl-post-link-test.ll @@ -1,4 +1,4 @@ -; RUN: sycl-post-link -split-esimd -lower-esimd -O0 -S < %s -o %t.table +; RUN: sycl-post-link -properties -split-esimd -lower-esimd -O0 -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t_esimd_0.ll ; This test checks that IR code below can be successfully processed by ; sycl-post-link. In this IR no extractelement instruction and no casting are used diff --git a/llvm/test/tools/sycl-post-link/sym_but_no_split.ll b/llvm/test/tools/sycl-post-link/sym_but_no_split.ll index ca624801f1832..7cbcda34f996d 100644 --- a/llvm/test/tools/sycl-post-link/sym_but_no_split.ll +++ b/llvm/test/tools/sycl-post-link/sym_but_no_split.ll @@ -2,7 +2,7 @@ ; table and a symbol file for an input module with two kernels when no code ; splitting is requested. ; -; RUN: sycl-post-link -symbols -spec-const=native -S < %s -o %t.files.table +; RUN: sycl-post-link -properties -symbols -spec-const=native -S < %s -o %t.files.table ; RUN: FileCheck %s -input-file=%t.files.table --check-prefixes CHECK-TABLE ; RUN: FileCheck %s -input-file=%t.files_0.sym --match-full-lines --check-prefixes CHECK-SYM diff --git a/llvm/tools/sycl-post-link/CMakeLists.txt b/llvm/tools/sycl-post-link/CMakeLists.txt index cfb9b1a27560f..2a8f99795d599 100644 --- a/llvm/tools/sycl-post-link/CMakeLists.txt +++ b/llvm/tools/sycl-post-link/CMakeLists.txt @@ -24,9 +24,6 @@ include_directories( add_llvm_tool(sycl-post-link sycl-post-link.cpp - SpecConstants.cpp - SYCLDeviceLibReqMask.cpp - SYCLKernelParamOptInfo.cpp ADDITIONAL_HEADER_DIRS ${LLVMGenXIntrinsics_SOURCE_DIR}/GenXIntrinsics/include ${LLVMGenXIntrinsics_BINARY_DIR}/GenXIntrinsics/include diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index d2c29d10aea7f..252f070652c4c 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -13,11 +13,6 @@ // - specialization constant intrinsic transformation //===----------------------------------------------------------------------===// -#include "SYCLDeviceLibReqMask.h" -#include "SYCLKernelParamOptInfo.h" -#include "SpecConstants.h" -#include "Support.h" - #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AssumptionCache.h" @@ -35,6 +30,7 @@ #include "llvm/Linker/Linker.h" #include "llvm/Passes/PassBuilder.h" #include "llvm/SYCLLowerIR/CompileTimePropertiesPass.h" +#include "llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h" #include "llvm/SYCLLowerIR/DeviceConfigFile.hpp" #include "llvm/SYCLLowerIR/DeviceGlobals.h" #include "llvm/SYCLLowerIR/ESIMD/ESIMDUtils.h" @@ -42,9 +38,10 @@ #include "llvm/SYCLLowerIR/HostPipes.h" #include "llvm/SYCLLowerIR/LowerInvokeSimd.h" #include "llvm/SYCLLowerIR/ModuleSplitter.h" -#include "llvm/SYCLLowerIR/SYCLDeviceRequirements.h" #include "llvm/SYCLLowerIR/SYCLUtils.h" #include "llvm/SYCLLowerIR/SanitizeDeviceGlobal.h" +#include "llvm/SYCLLowerIR/SpecConstants.h" +#include "llvm/SYCLLowerIR/Support.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/InitLLVM.h" @@ -64,15 +61,13 @@ #include "llvm/Transforms/Utils/GlobalStatus.h" #include -#include #include -#include #include -#include #include #include using namespace llvm; +using namespace llvm::sycl; using string_vector = std::vector; @@ -201,6 +196,10 @@ cl::opt SplitMode( cl::opt DoSymGen{"symbols", cl::desc("generate exported symbol files"), cl::cat(PostLinkCat)}; +cl::opt DoPropGen{"properties", + cl::desc("generate module properties files"), + cl::cat(PostLinkCat)}; + enum SpecConstLowerMode { SC_NATIVE_MODE, SC_EMULATION_MODE }; cl::opt SpecConstLower{ @@ -251,14 +250,6 @@ cl::opt GenerateDeviceImageWithDefaultSpecConsts{ "replaced with default values from specialization id(s)."), cl::cat(PostLinkCat)}; -struct GlobalBinImageProps { - bool EmitKernelParamInfo; - bool EmitProgramMetadata; - bool EmitExportedSymbols; - bool EmitImportedSymbols; - bool EmitDeviceGlobalPropSet; -}; - struct IrPropSymFilenameTriple { std::string Ir; std::string Prop; @@ -273,111 +264,6 @@ void writeToFile(const std::string &Filename, const std::string &Content) { OS.close(); } -// This function traverses over reversed call graph by BFS algorithm. -// It means that an edge links some function @func with functions -// which contain call of function @func. It starts from -// @StartingFunction and lifts up until it reach all reachable functions, -// or it reaches some function containing "referenced-indirectly" attribute. -// If it reaches "referenced-indirectly" attribute than it returns an empty -// Optional. -// Otherwise, it returns an Optional containing a list of reached -// SPIR kernel function's names. -std::optional> -traverseCGToFindSPIRKernels(const Function *StartingFunction) { - std::queue FunctionsToVisit; - std::unordered_set VisitedFunctions; - FunctionsToVisit.push(StartingFunction); - std::vector KernelNames; - - while (!FunctionsToVisit.empty()) { - const Function *F = FunctionsToVisit.front(); - FunctionsToVisit.pop(); - - auto InsertionResult = VisitedFunctions.insert(F); - // It is possible that we insert some particular function several - // times in functionsToVisit queue. - if (!InsertionResult.second) - continue; - - for (const auto *U : F->users()) { - const CallInst *CI = dyn_cast(U); - if (!CI) - continue; - - const Function *ParentF = CI->getFunction(); - - if (VisitedFunctions.count(ParentF)) - continue; - - if (ParentF->hasFnAttribute("referenced-indirectly")) - return {}; - - if (ParentF->getCallingConv() == CallingConv::SPIR_KERNEL) - KernelNames.push_back(ParentF->getName()); - - FunctionsToVisit.push(ParentF); - } - } - - return {std::move(KernelNames)}; -} - -std::vector getKernelNamesUsingAssert(const Module &M) { - auto *DevicelibAssertFailFunction = M.getFunction("__devicelib_assert_fail"); - if (!DevicelibAssertFailFunction) - return {}; - - auto TraverseResult = - traverseCGToFindSPIRKernels(DevicelibAssertFailFunction); - - if (TraverseResult.has_value()) - return std::move(*TraverseResult); - - // Here we reached "referenced-indirectly", so we need to find all kernels and - // return them. - std::vector SPIRKernelNames; - for (const Function &F : M) { - if (F.getCallingConv() == CallingConv::SPIR_KERNEL) - SPIRKernelNames.push_back(F.getName()); - } - - return SPIRKernelNames; -} - -bool isModuleUsingAsan(const Module &M) { - NamedMDNode *MD = M.getNamedMetadata("device.sanitizer"); - if (MD == nullptr) - return false; - assert(MD->getNumOperands() != 0); - auto *MDVal = cast(MD->getOperand(0)->getOperand(0)); - return MDVal->getString() == "asan"; -} - -// Gets work_group_num_dim information for function Func, conviniently 0 if -// metadata is not present. -uint32_t getKernelWorkGroupNumDim(const Function &Func) { - MDNode *MaxDimMD = Func.getMetadata("work_group_num_dim"); - if (!MaxDimMD) - return 0; - assert(MaxDimMD->getNumOperands() == 1 && "Malformed node."); - return mdconst::extract(MaxDimMD->getOperand(0))->getZExtValue(); -} - -// Gets reqd_work_group_size information for function Func. -std::vector getKernelReqdWorkGroupSizeMetadata(const Function &Func) { - MDNode *ReqdWorkGroupSizeMD = Func.getMetadata("reqd_work_group_size"); - if (!ReqdWorkGroupSizeMD) - return {}; - size_t NumOperands = ReqdWorkGroupSizeMD->getNumOperands(); - assert(NumOperands >= 1 && NumOperands <= 3 && - "reqd_work_group_size does not have between 1 and 3 operands."); - std::vector OutVals; - OutVals.reserve(NumOperands); - for (const MDOperand &MDOp : ReqdWorkGroupSizeMD->operands()) - OutVals.push_back(mdconst::extract(MDOp)->getZExtValue()); - return OutVals; -} - // Creates a filename based on current output filename, given extension, // sequential ID and suffix. std::string makeResultFileName(Twine Ext, int I, StringRef Suffix) { @@ -420,221 +306,9 @@ std::string saveModuleIR(Module &M, int I, StringRef Suff) { std::string saveModuleProperties(module_split::ModuleDesc &MD, const GlobalBinImageProps &GlobProps, int I, StringRef Suff) { - using PropSetRegTy = llvm::util::PropertySetRegistry; - PropSetRegTy PropSet; - Module &M = MD.getModule(); - { - uint32_t MRMask = getSYCLDeviceLibReqMask(M); - std::map RMEntry = {{"DeviceLibReqMask", MRMask}}; - PropSet.add(PropSetRegTy::SYCL_DEVICELIB_REQ_MASK, RMEntry); - } - { - PropSet.add(PropSetRegTy::SYCL_DEVICE_REQUIREMENTS, - MD.getOrComputeDeviceRequirements().asMap()); - } - if (MD.Props.SpecConstsMet) { - // extract spec constant maps per each module - SpecIDMapTy TmpSpecIDMap; - SpecConstantsPass::collectSpecConstantMetadata(M, TmpSpecIDMap); - PropSet.add(PropSetRegTy::SYCL_SPECIALIZATION_CONSTANTS, TmpSpecIDMap); - - // Add property with the default values of spec constants - std::vector DefaultValues; - SpecConstantsPass::collectSpecConstantDefaultValuesMetadata(M, - DefaultValues); - PropSet.add(PropSetRegTy::SYCL_SPEC_CONSTANTS_DEFAULT_VALUES, "all", - DefaultValues); - } - if (GlobProps.EmitKernelParamInfo) { - // extract kernel parameter optimization info per module - ModuleAnalysisManager MAM; - // Register required analysis - MAM.registerPass([&] { return PassInstrumentationAnalysis(); }); - // Register the payload analysis - - MAM.registerPass([&] { return SYCLKernelParamOptInfoAnalysis(); }); - SYCLKernelParamOptInfo PInfo = - MAM.getResult(M); - - // convert analysis results into properties and record them - llvm::util::PropertySet &Props = - PropSet[PropSetRegTy::SYCL_KERNEL_PARAM_OPT_INFO]; - - for (const auto &NameInfoPair : PInfo) { - const llvm::BitVector &Bits = NameInfoPair.second; - if (Bits.empty()) - continue; // Nothing to add - - const llvm::ArrayRef Arr = Bits.getData(); - const unsigned char *Data = - reinterpret_cast(Arr.begin()); - llvm::util::PropertyValue::SizeTy DataBitSize = Bits.size(); - Props.insert(std::make_pair( - NameInfoPair.first, llvm::util::PropertyValue(Data, DataBitSize))); - } - } - if (GlobProps.EmitExportedSymbols) { - // extract exported functions if any and save them into property set - for (const auto *F : MD.entries()) { - // TODO FIXME some of SYCL/ESIMD functions maybe marked with __regcall CC, - // so they won't make it into the export list. Should the check be - // F->getCallingConv() != CallingConv::SPIR_KERNEL? - if (F->getCallingConv() == CallingConv::SPIR_FUNC) { - PropSet.add(PropSetRegTy::SYCL_EXPORTED_SYMBOLS, F->getName(), - /*PropVal=*/true); - } - } - } - - if (GlobProps.EmitImportedSymbols) { - // record imported functions in the property set - for (const auto &F : M) { - if ( // A function that can be imported may still be defined in one split - // image. Only add import property if this is not the image where the - // function is defined. - F.isDeclaration() && module_split::canBeImportedFunction(F)) { - - // StripDeadPrototypes is called during module splitting - // cleanup. At this point all function decls should have uses. - assert(!F.use_empty() && "Function F has no uses"); - - PropSet.add(PropSetRegTy::SYCL_IMPORTED_SYMBOLS, F.getName(), - /*PropVal=*/true); - } - } - } - - // Metadata names may be composite so we keep them alive until the - // properties have been written. - SmallVector MetadataNames; - - if (GlobProps.EmitProgramMetadata) { - // Add reqd_work_group_size and work_group_num_dim information to - // program metadata. - for (const Function &Func : M.functions()) { - std::vector KernelReqdWorkGroupSize = - getKernelReqdWorkGroupSizeMetadata(Func); - if (!KernelReqdWorkGroupSize.empty()) { - MetadataNames.push_back(Func.getName().str() + "@reqd_work_group_size"); - PropSet.add(PropSetRegTy::SYCL_PROGRAM_METADATA, MetadataNames.back(), - KernelReqdWorkGroupSize); - } - - uint32_t WorkGroupNumDim = getKernelWorkGroupNumDim(Func); - if (WorkGroupNumDim) { - MetadataNames.push_back(Func.getName().str() + "@work_group_num_dim"); - PropSet.add(PropSetRegTy::SYCL_PROGRAM_METADATA, MetadataNames.back(), - WorkGroupNumDim); - } - } - - // Add global_id_mapping information with mapping between device-global - // unique identifiers and the variable's name in the IR. - for (auto &GV : M.globals()) { - if (!isDeviceGlobalVariable(GV)) - continue; - - StringRef GlobalID = getGlobalVariableUniqueId(GV); - MetadataNames.push_back(GlobalID.str() + "@global_id_mapping"); - PropSet.add(PropSetRegTy::SYCL_PROGRAM_METADATA, MetadataNames.back(), - GV.getName()); - } - } - if (MD.isESIMD()) { - PropSet.add(PropSetRegTy::SYCL_MISC_PROP, "isEsimdImage", true); - } - { - StringRef RegAllocModeAttr = "sycl-register-alloc-mode"; - uint32_t RegAllocModeVal; - - bool HasRegAllocMode = llvm::any_of(MD.entries(), [&](const Function *F) { - if (!F->hasFnAttribute(RegAllocModeAttr)) - return false; - const auto &Attr = F->getFnAttribute(RegAllocModeAttr); - RegAllocModeVal = getAttributeAsInteger(Attr); - return true; - }); - if (HasRegAllocMode) { - PropSet.add(PropSetRegTy::SYCL_MISC_PROP, RegAllocModeAttr, - RegAllocModeVal); - } - } - - { - StringRef GRFSizeAttr = "sycl-grf-size"; - uint32_t GRFSizeVal; - - bool HasGRFSize = llvm::any_of(MD.entries(), [&](const Function *F) { - if (!F->hasFnAttribute(GRFSizeAttr)) - return false; - const auto &Attr = F->getFnAttribute(GRFSizeAttr); - GRFSizeVal = getAttributeAsInteger(Attr); - return true; - }); - if (HasGRFSize) { - PropSet.add(PropSetRegTy::SYCL_MISC_PROP, GRFSizeAttr, GRFSizeVal); - } - } - - // FIXME: Remove 'if' below when possible - // GPU backend has a problem with accepting optimization level options in form - // described by Level Zero specification (-ze-opt-level=1) when 'invoke_simd' - // functionality is involved. JIT compilation results in the following error: - // error: VLD: Failed to compile SPIR-V with following error: - // invalid api option: -ze-opt-level=O1 - // -11 (PI_ERROR_BUILD_PROGRAM_FAILURE) - // 'if' below essentially preserves the behavior (presumably mistakenly) - // implemented in intel/llvm#8763: ignore 'optLevel' property for images which - // were produced my merge after ESIMD split - if (MD.getEntryPointGroup().Props.HasESIMD != - module_split::SyclEsimdSplitStatus::SYCL_AND_ESIMD) { - // Handle sycl-optlevel property - int OptLevel = -1; - for (const Function *F : MD.entries()) { - if (!F->hasFnAttribute(llvm::sycl::utils::ATTR_SYCL_OPTLEVEL)) - continue; - - // getAsInteger returns true on error - if (!F->getFnAttribute(llvm::sycl::utils::ATTR_SYCL_OPTLEVEL) - .getValueAsString() - .getAsInteger(10, OptLevel)) { - // It is expected that device-code split has separated kernels with - // different values of sycl-optlevel attribute. Therefore, it is enough - // to only look at the first function with such attribute to compute - // the property for the whole device image. - break; - } - } - - if (OptLevel != -1) - PropSet.add(PropSetRegTy::SYCL_MISC_PROP, "optLevel", OptLevel); - } - { - std::vector FuncNames = getKernelNamesUsingAssert(M); - for (const StringRef &FName : FuncNames) - PropSet.add(PropSetRegTy::SYCL_ASSERT_USED, FName, true); - } - - { - if (isModuleUsingAsan(M)) - PropSet.add(PropSetRegTy::SYCL_MISC_PROP, "asanUsed", true); - } - - if (GlobProps.EmitDeviceGlobalPropSet) { - // Extract device global maps per module - auto DevGlobalPropertyMap = collectDeviceGlobalProperties(M); - if (!DevGlobalPropertyMap.empty()) - PropSet.add(PropSetRegTy::SYCL_DEVICE_GLOBALS, DevGlobalPropertyMap); - } - - auto HostPipePropertyMap = collectHostPipeProperties(M); - if (!HostPipePropertyMap.empty()) { - PropSet.add(PropSetRegTy::SYCL_HOST_PIPES, HostPipePropertyMap); - } - - if (MD.isSpecConstantDefault()) - PropSet.add(PropSetRegTy::SYCL_MISC_PROP, "specConstsReplacedWithDefault", - 1); + const auto &PropSet = computeModuleProperties( + MD.getModule(), MD.entries(), GlobProps, MD.Props.SpecConstsMet, + MD.isSpecConstantDefault()); std::error_code EC; std::string SCFile = makeResultFileName(".prop", I, Suff); @@ -646,24 +320,9 @@ std::string saveModuleProperties(module_split::ModuleDesc &MD, } // Saves specified collection of symbols to a file. -std::string saveModuleSymbolTable(const module_split::EntryPointSet &Es, int I, +std::string saveModuleSymbolTable(const module_split::ModuleDesc &MD, int I, StringRef Suffix) { -#ifndef NDEBUG - if (DebugPostLink > 0) { - llvm::errs() << "ENTRY POINTS saving Sym table {\n"; - for (const auto *F : Es) { - llvm::errs() << " " << F->getName() << "\n"; - } - llvm::errs() << "}\n"; - } -#endif // NDEBUG - // Concatenate names of the input entry points with "\n". - std::string SymT; - - for (const auto *F : Es) { - SymT = (Twine(SymT) + Twine(F->getName()) + Twine("\n")).str(); - } - // Save to file. + auto SymT = computeModuleSymbolTable(MD.getModule(), MD.entries()); std::string OutFileName = makeResultFileName(".sym", I, Suffix); writeToFile(OutFileName, SymT); return OutFileName; @@ -758,11 +417,12 @@ IrPropSymFilenameTriple saveModule(module_split::ModuleDesc &MD, int I, GlobalBinImageProps Props = {EmitKernelParamInfo, EmitProgramMetadata, EmitExportedSymbols, EmitImportedSymbols, DeviceGlobals}; - Res.Prop = saveModuleProperties(MD, Props, I, Suffix); + if (DoPropGen) + Res.Prop = saveModuleProperties(MD, Props, I, Suffix); if (DoSymGen) { // save the names of the entry points - the symbol table - Res.Sym = saveModuleSymbolTable(MD.entries(), I, Suffix); + Res.Sym = saveModuleSymbolTable(MD, I, Suffix); } return Res; } @@ -1061,11 +721,14 @@ std::vector> processInputModule(std::unique_ptr M) { // Construct the resulting table which will accumulate all the outputs. SmallVector ColumnTitles{ - StringRef(COL_CODE), StringRef(COL_PROPS)}; + StringRef(COL_CODE)}; - if (DoSymGen) { + if (DoPropGen) + ColumnTitles.push_back(COL_PROPS); + + if (DoSymGen) ColumnTitles.push_back(COL_SYM); - } + Expected> TableE = util::SimpleTable::create(ColumnTitles); CHECK_AND_EXIT(TableE.takeError()); @@ -1255,8 +918,8 @@ int main(int argc, char **argv) { "Normally, the tool generates a number of files and \"file table\"\n" "file listing all generated files in a table manner. For example, if\n" "the input file 'example.bc' contains two kernels, then the command\n" - " $ sycl-post-link --split=kernel --symbols --spec-const=native \\\n" - " -o example.table example.bc\n" + " $ sycl-post-link --properties --split=kernel --symbols \\\n" + " --spec-const=native -o example.table example.bc\n" "will produce 'example.table' file with the following content:\n" " [Code|Properties|Symbols]\n" " example_0.bc|example_0.prop|example_0.sym\n" @@ -1282,7 +945,7 @@ int main(int argc, char **argv) { bool DoGenerateDeviceImageWithDefaulValues = GenerateDeviceImageWithDefaultSpecConsts.getNumOccurrences() > 0; - if (!DoSplit && !DoSpecConst && !DoSymGen && !DoParamInfo && + if (!DoSplit && !DoSpecConst && !DoSymGen && !DoPropGen && !DoParamInfo && !DoProgMetadata && !DoSplitEsimd && !DoExportedSyms && !DoImportedSyms && !DoDeviceGlobals && !DoLowerEsimd) { errs() << "no actions specified; try --help for usage info\n"; @@ -1303,6 +966,11 @@ int main(int argc, char **argv) { << IROutputOnly.ArgStr << "\n"; return 1; } + if (IROutputOnly && DoPropGen) { + errs() << "error: -" << DoPropGen.ArgStr << " can't be used with -" + << IROutputOnly.ArgStr << "\n"; + return 1; + } if (IROutputOnly && DoParamInfo) { errs() << "error: -" << EmitKernelParamInfo.ArgStr << " can't be used with" << " -" << IROutputOnly.ArgStr << "\n"; diff --git a/sycl/test/basic_tests/SYCL-2020-spec-const-ids-order.cpp b/sycl/test/basic_tests/SYCL-2020-spec-const-ids-order.cpp index 0aefe26bd826c..2e3bac17a03c2 100644 --- a/sycl/test/basic_tests/SYCL-2020-spec-const-ids-order.cpp +++ b/sycl/test/basic_tests/SYCL-2020-spec-const-ids-order.cpp @@ -1,5 +1,5 @@ // RUN: %clangxx -fsycl -fsycl-device-only -c -o %t.bc %s -// RUN: sycl-post-link %t.bc -spec-const=emulation -S -o %t-split1.txt +// RUN: sycl-post-link -properties %t.bc -spec-const=emulation -S -o %t-split1.txt // RUN: cat %t-split1_0.ll | FileCheck %s -check-prefixes=CHECK-IR // RUN: cat %t-split1_0.prop | FileCheck %s -check-prefixes=CHECK-PROP // diff --git a/sycl/test/basic_tests/SYCL-2020-spec-constants.cpp b/sycl/test/basic_tests/SYCL-2020-spec-constants.cpp index b9d29507a8eac..136043dba5fd6 100644 --- a/sycl/test/basic_tests/SYCL-2020-spec-constants.cpp +++ b/sycl/test/basic_tests/SYCL-2020-spec-constants.cpp @@ -1,7 +1,7 @@ // RUN: %clangxx -fsycl -fsycl-device-only -c -o %t.bc %s -// RUN: %if asserts %{sycl-post-link -debug-only=SpecConst %t.bc -spec-const=native -o %t-split1.txt 2>&1 | FileCheck %s -check-prefixes=CHECK-LOG %} %else %{sycl-post-link %t.bc -spec-const=native -o %t-split1.txt 2>&1 %} +// RUN: %if asserts %{sycl-post-link -properties -debug-only=SpecConst %t.bc -spec-const=native -o %t-split1.txt 2>&1 | FileCheck %s -check-prefixes=CHECK-LOG %} %else %{sycl-post-link -properties %t.bc -spec-const=native -o %t-split1.txt 2>&1 %} // RUN: cat %t-split1_0.prop | FileCheck %s -check-prefixes=CHECK,CHECK-RT -// RUN: sycl-post-link %t.bc -spec-const=emulation -o %t-split2.txt +// RUN: sycl-post-link -properties %t.bc -spec-const=emulation -o %t-split2.txt // RUN: cat %t-split2_0.prop | FileCheck %s -check-prefixes=CHECK,CHECK-DEF // RUN: llvm-spirv -o %t-split1_0.spv -spirv-max-version=1.1 -spirv-ext=+all %t-split1_0.bc // RUN: llvm-spirv -o - --to-text %t-split1_0.spv | FileCheck %s -check-prefixes=CHECK-SPV diff --git a/sycl/test/basic_tests/sycl-kernel-save-user-names.cpp b/sycl/test/basic_tests/sycl-kernel-save-user-names.cpp index 4663aa2f55baf..5c3c0a771ecd8 100644 --- a/sycl/test/basic_tests/sycl-kernel-save-user-names.cpp +++ b/sycl/test/basic_tests/sycl-kernel-save-user-names.cpp @@ -1,5 +1,5 @@ // RUN: %clangxx -fsycl -fsycl-device-only -fno-discard-value-names -fno-sycl-early-optimizations -o %t.bc %s -// RUN: sycl-post-link %t.bc -spec-const=emulation -o %t.table +// RUN: sycl-post-link -properties %t.bc -spec-const=emulation -o %t.table // RUN: llvm-spirv -o %t.spv -spirv-max-version=1.3 -spirv-ext=+all %t.bc // RUN: llvm-spirv -o %t.rev.bc -r %t.spv // RUN: llvm-dis %t.rev.bc -o=- | FileCheck %s diff --git a/sycl/test/check_device_code/esimd/NBarrierAttr.cpp b/sycl/test/check_device_code/esimd/NBarrierAttr.cpp index 7f9c7a6114e33..174b870c9b965 100644 --- a/sycl/test/check_device_code/esimd/NBarrierAttr.cpp +++ b/sycl/test/check_device_code/esimd/NBarrierAttr.cpp @@ -1,5 +1,5 @@ // RUN: %clangxx -O2 -fsycl -fsycl-device-only -Xclang -emit-llvm %s -o %t -// RUN: sycl-post-link -lower-esimd -O2 -S %t -o %t.table +// RUN: sycl-post-link -properties -lower-esimd -O2 -S %t -o %t.table // RUN: FileCheck %s -input-file=%t_esimd_0.ll // Checks NBarrier ESIMD intrinsic translation. diff --git a/sycl/test/check_device_code/esimd/dpas.cpp b/sycl/test/check_device_code/esimd/dpas.cpp index 0892200641bb6..7e2c2682d3988 100644 --- a/sycl/test/check_device_code/esimd/dpas.cpp +++ b/sycl/test/check_device_code/esimd/dpas.cpp @@ -1,6 +1,6 @@ // RUN: %clangxx -fsycl -c -Xclang -emit-llvm %s -o %t // RUN: %clangxx -fsycl -c -fsycl-device-only -Xclang -emit-llvm %s -o %t -// RUN: sycl-post-link -split-esimd -lower-esimd -S %t -o %t.table +// RUN: sycl-post-link -properties -split-esimd -lower-esimd -S %t -o %t.table // RUN: FileCheck %s -input-file=%t_esimd_0.ll #include diff --git a/sycl/test/check_device_code/esimd/fp16_converts.cpp b/sycl/test/check_device_code/esimd/fp16_converts.cpp index 83b1b86f96f67..61ed76a211837 100644 --- a/sycl/test/check_device_code/esimd/fp16_converts.cpp +++ b/sycl/test/check_device_code/esimd/fp16_converts.cpp @@ -5,7 +5,7 @@ // Checks that lowerESIMD pass builds proper vc-intrinsics // RUN: %clangxx -O2 -fsycl -c -fsycl-device-only -Xclang -emit-llvm %s -o %t -// RUN: sycl-post-link -split-esimd -lower-esimd -O0 -S %t -o %t.table +// RUN: sycl-post-link -properties -split-esimd -lower-esimd -O0 -S %t -o %t.table // RUN: FileCheck %s -input-file=%t_esimd_0.ll #include diff --git a/sycl/test/check_device_code/esimd/genx_func_attr.cpp b/sycl/test/check_device_code/esimd/genx_func_attr.cpp index 00fb5d7171431..f92c5f40c36a3 100644 --- a/sycl/test/check_device_code/esimd/genx_func_attr.cpp +++ b/sycl/test/check_device_code/esimd/genx_func_attr.cpp @@ -1,5 +1,5 @@ // RUN: %clangxx -O2 -fsycl -fsycl-device-only -Xclang -emit-llvm %s -o %t -// RUN: sycl-post-link -lower-esimd -O2 -S %t -o %t.table +// RUN: sycl-post-link -properties -lower-esimd -O2 -S %t -o %t.table // RUN: FileCheck %s -input-file=%t_esimd_0.ll // Checks ESIMD intrinsic translation. diff --git a/sycl/test/check_device_code/esimd/glob.cpp b/sycl/test/check_device_code/esimd/glob.cpp index b61a6e59830ec..2f005a7c71340 100644 --- a/sycl/test/check_device_code/esimd/glob.cpp +++ b/sycl/test/check_device_code/esimd/glob.cpp @@ -1,5 +1,5 @@ // RUN: %clangxx -fsycl -c -fsycl-device-only -Xclang -emit-llvm %s -o %t -// RUN: sycl-post-link -split-esimd -lower-esimd -O2 -S %t -o %t.table +// RUN: sycl-post-link -properties -split-esimd -lower-esimd -O2 -S %t -o %t.table // RUN: FileCheck %s -input-file=%t_esimd_0.ll // This test checks that globals with register attribute are allowed in ESIMD diff --git a/sycl/test/check_device_code/esimd/intrins_trans.cpp b/sycl/test/check_device_code/esimd/intrins_trans.cpp index f42a49bfdd87e..57543e6025702 100644 --- a/sycl/test/check_device_code/esimd/intrins_trans.cpp +++ b/sycl/test/check_device_code/esimd/intrins_trans.cpp @@ -1,9 +1,9 @@ // RUN: %clangxx %clang_O0 -fsycl -fsycl-device-only -fno-sycl-esimd-force-stateless-mem -Xclang -emit-llvm %s -o %t -// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem=false -O0 -S %t -o %t.table +// RUN: sycl-post-link -properties -split-esimd -lower-esimd -lower-esimd-force-stateless-mem=false -O0 -S %t -o %t.table // RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATEFUL // RUN: %clangxx %clang_O0 -fsycl -fsycl-device-only -fsycl-esimd-force-stateless-mem -Xclang -emit-llvm %s -o %t -// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem=true -O0 -S %t -o %t.table +// RUN: sycl-post-link -properties -split-esimd -lower-esimd -lower-esimd-force-stateless-mem=true -O0 -S %t -o %t.table // RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATELESS // Checks ESIMD intrinsic translation with opaque pointers. diff --git a/sycl/test/check_device_code/esimd/lower-external-funcs.cpp b/sycl/test/check_device_code/esimd/lower-external-funcs.cpp index 580d1bff5b121..7a8e0a492c5fa 100644 --- a/sycl/test/check_device_code/esimd/lower-external-funcs.cpp +++ b/sycl/test/check_device_code/esimd/lower-external-funcs.cpp @@ -1,5 +1,5 @@ // RUN: %clangxx -fsycl -fsycl-device-only -S -emit-llvm -x c++ %s -o %t -// RUN: sycl-post-link -split-esimd -lower-esimd -O2 -S %t -o %t.table +// RUN: sycl-post-link -properties -split-esimd -lower-esimd -O2 -S %t -o %t.table // RUN: FileCheck %s -input-file=%t_esimd_0.ll // This test checks that unreferenced SYCL_EXTERNAL functions are not dropped diff --git a/sycl/test/check_device_code/esimd/lsc.cpp b/sycl/test/check_device_code/esimd/lsc.cpp index 131952b2ee03a..e1e157225fe77 100644 --- a/sycl/test/check_device_code/esimd/lsc.cpp +++ b/sycl/test/check_device_code/esimd/lsc.cpp @@ -1,9 +1,9 @@ // RUN: %clangxx -O0 -fsycl -fno-sycl-esimd-force-stateless-mem -fsycl-device-only -Xclang -emit-llvm %s -o %t -// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem=false -O0 -S %t -o %t.table +// RUN: sycl-post-link -properties -split-esimd -lower-esimd -lower-esimd-force-stateless-mem=false -O0 -S %t -o %t.table // RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATEFUL // RUN: %clangxx -O0 -fsycl -fsycl-esimd-force-stateless-mem -fsycl-device-only -Xclang -emit-llvm %s -o %t -// RUN: sycl-post-link -split-esimd -lower-esimd -O0 -S %t -o %t.table +// RUN: sycl-post-link -properties -split-esimd -lower-esimd -O0 -S %t -o %t.table // RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATELESS // Checks ESIMD intrinsic translation. diff --git a/sycl/test/check_device_code/esimd/memory_properties_atomic_update.cpp b/sycl/test/check_device_code/esimd/memory_properties_atomic_update.cpp index 403448e3677f0..d960dabc0b515 100644 --- a/sycl/test/check_device_code/esimd/memory_properties_atomic_update.cpp +++ b/sycl/test/check_device_code/esimd/memory_properties_atomic_update.cpp @@ -1,9 +1,9 @@ // RUN: %clangxx -O0 -fsycl -fsycl-device-only -fno-sycl-esimd-force-stateless-mem -D__ESIMD_GATHER_SCATTER_LLVM_IR -Xclang -emit-llvm %s -o %t -// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem=false -O0 -S %t -o %t.table +// RUN: sycl-post-link -properties -split-esimd -lower-esimd -lower-esimd-force-stateless-mem=false -O0 -S %t -o %t.table // RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATEFUL // RUN: %clangxx -O0 -fsycl -fsycl-device-only -fsycl-esimd-force-stateless-mem -D__ESIMD_GATHER_SCATTER_LLVM_IR -Xclang -emit-llvm %s -o %t -// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem -O0 -S %t -o %t.table +// RUN: sycl-post-link -properties -split-esimd -lower-esimd -lower-esimd-force-stateless-mem -O0 -S %t -o %t.table // RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATELESS // Checks ESIMD memory functions accepting compile time properties for diff --git a/sycl/test/check_device_code/esimd/memory_properties_copytocopyfrom.cpp b/sycl/test/check_device_code/esimd/memory_properties_copytocopyfrom.cpp index 6f9ab411b616d..05d953357bade 100644 --- a/sycl/test/check_device_code/esimd/memory_properties_copytocopyfrom.cpp +++ b/sycl/test/check_device_code/esimd/memory_properties_copytocopyfrom.cpp @@ -1,9 +1,9 @@ // RUN: %clangxx -O0 -fsycl -fsycl-device-only -fno-sycl-esimd-force-stateless-mem -D__ESIMD_GATHER_SCATTER_LLVM_IR -Xclang -emit-llvm %s -o %t -// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem=false -O0 -S %t -o %t.table +// RUN: sycl-post-link -properties -split-esimd -lower-esimd -lower-esimd-force-stateless-mem=false -O0 -S %t -o %t.table // RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATEFUL // RUN: %clangxx -O0 -fsycl -fsycl-device-only -fsycl-esimd-force-stateless-mem -D__ESIMD_GATHER_SCATTER_LLVM_IR -Xclang -emit-llvm %s -o %t -// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem -O0 -S %t -o %t.table +// RUN: sycl-post-link -properties -split-esimd -lower-esimd -lower-esimd-force-stateless-mem -O0 -S %t -o %t.table // RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATELESS // Checks ESIMD copy_to and copy_from functions accepting compile time diff --git a/sycl/test/check_device_code/esimd/memory_properties_gather.cpp b/sycl/test/check_device_code/esimd/memory_properties_gather.cpp index 5828ac743c24e..03b08b9e8e8b8 100644 --- a/sycl/test/check_device_code/esimd/memory_properties_gather.cpp +++ b/sycl/test/check_device_code/esimd/memory_properties_gather.cpp @@ -1,9 +1,9 @@ // RUN: %clangxx -O0 -fsycl -fsycl-device-only -fno-sycl-esimd-force-stateless-mem -D__ESIMD_GATHER_SCATTER_LLVM_IR -Xclang -emit-llvm %s -o %t -// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem=false -O0 -S %t -o %t.table +// RUN: sycl-post-link -properties -split-esimd -lower-esimd -lower-esimd-force-stateless-mem=false -O0 -S %t -o %t.table // RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATEFUL // RUN: %clangxx -O0 -fsycl -fsycl-device-only -fsycl-esimd-force-stateless-mem -D__ESIMD_GATHER_SCATTER_LLVM_IR -Xclang -emit-llvm %s -o %t -// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem -O0 -S %t -o %t.table +// RUN: sycl-post-link -properties -split-esimd -lower-esimd -lower-esimd-force-stateless-mem -O0 -S %t -o %t.table // RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATELESS // Checks ESIMD memory functions accepting compile time properties for gather diff --git a/sycl/test/check_device_code/esimd/memory_properties_load_store.cpp b/sycl/test/check_device_code/esimd/memory_properties_load_store.cpp index c1e465536268f..5dacc5e5c99b2 100644 --- a/sycl/test/check_device_code/esimd/memory_properties_load_store.cpp +++ b/sycl/test/check_device_code/esimd/memory_properties_load_store.cpp @@ -1,9 +1,9 @@ // RUN: %clangxx -O0 -fsycl -fsycl-device-only -fno-sycl-esimd-force-stateless-mem -D__ESIMD_GATHER_SCATTER_LLVM_IR -Xclang -emit-llvm %s -o %t -// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem=false -O0 -S %t -o %t.table +// RUN: sycl-post-link -properties -split-esimd -lower-esimd -lower-esimd-force-stateless-mem=false -O0 -S %t -o %t.table // RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATEFUL // RUN: %clangxx -O0 -fsycl -fsycl-device-only -fsycl-esimd-force-stateless-mem -D__ESIMD_GATHER_SCATTER_LLVM_IR -Xclang -emit-llvm %s -o %t -// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem -O0 -S %t -o %t.table +// RUN: sycl-post-link -properties -split-esimd -lower-esimd -lower-esimd-force-stateless-mem -O0 -S %t -o %t.table // RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATELESS // Checks ESIMD memory functions accepting compile time properties for diff --git a/sycl/test/check_device_code/esimd/memory_properties_prefetch_2d.cpp b/sycl/test/check_device_code/esimd/memory_properties_prefetch_2d.cpp index c9c5f33854057..03945af5a97b5 100644 --- a/sycl/test/check_device_code/esimd/memory_properties_prefetch_2d.cpp +++ b/sycl/test/check_device_code/esimd/memory_properties_prefetch_2d.cpp @@ -1,9 +1,9 @@ // RUN: %clangxx -O0 -fsycl -fsycl-device-only -fno-sycl-esimd-force-stateless-mem -D__ESIMD_GATHER_SCATTER_LLVM_IR -Xclang -emit-llvm %s -o %t -// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem=false -O0 -S %t -o %t.table +// RUN: sycl-post-link -properties -split-esimd -lower-esimd -lower-esimd-force-stateless-mem=false -O0 -S %t -o %t.table // RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATEFUL // RUN: %clangxx -O0 -fsycl -fsycl-device-only -fsycl-esimd-force-stateless-mem -D__ESIMD_GATHER_SCATTER_LLVM_IR -Xclang -emit-llvm %s -o %t -// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem -O0 -S %t -o %t.table +// RUN: sycl-post-link -properties -split-esimd -lower-esimd -lower-esimd-force-stateless-mem -O0 -S %t -o %t.table // RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATELESS // Checks ESIMD memory functions accepting compile time properties for prefetch diff --git a/sycl/test/check_device_code/esimd/memory_properties_scatter.cpp b/sycl/test/check_device_code/esimd/memory_properties_scatter.cpp index 2bf1b0f4019c7..e97532d528549 100644 --- a/sycl/test/check_device_code/esimd/memory_properties_scatter.cpp +++ b/sycl/test/check_device_code/esimd/memory_properties_scatter.cpp @@ -1,9 +1,9 @@ // RUN: %clangxx -O0 -fsycl -fsycl-device-only -fno-sycl-esimd-force-stateless-mem -D__ESIMD_GATHER_SCATTER_LLVM_IR -Xclang -emit-llvm %s -o %t -// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem=false -O0 -S %t -o %t.table +// RUN: sycl-post-link -properties -split-esimd -lower-esimd -lower-esimd-force-stateless-mem=false -O0 -S %t -o %t.table // RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATEFUL // RUN: %clangxx -O0 -fsycl -fsycl-device-only -fsycl-esimd-force-stateless-mem -D__ESIMD_GATHER_SCATTER_LLVM_IR -Xclang -emit-llvm %s -o %t -// RUN: sycl-post-link -split-esimd -lower-esimd -lower-esimd-force-stateless-mem -O0 -S %t -o %t.table +// RUN: sycl-post-link -properties -split-esimd -lower-esimd -lower-esimd-force-stateless-mem -O0 -S %t -o %t.table // RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes=CHECK,CHECK-STATELESS // Checks ESIMD memory functions accepting compile time properties for scatter diff --git a/sycl/test/check_device_code/esimd/slm_init_specconst_size.cpp b/sycl/test/check_device_code/esimd/slm_init_specconst_size.cpp index 39ecacb2fa663..f12404afc81fe 100644 --- a/sycl/test/check_device_code/esimd/slm_init_specconst_size.cpp +++ b/sycl/test/check_device_code/esimd/slm_init_specconst_size.cpp @@ -1,5 +1,5 @@ // RUN: %clangxx -O2 -fsycl -fsycl-device-only -emit-llvm %s -o %t -// RUN: sycl-post-link -split-esimd -lower-esimd -O2 -S %t -o %t.table +// RUN: sycl-post-link -properties -split-esimd -lower-esimd -O2 -S %t -o %t.table // RUN: FileCheck %s -input-file=%t_esimd_0.ll // Checks that we set 0 as VCSLMSize when slm_init is used with // non-constant operand, like with specialization constant. diff --git a/sycl/test/check_device_code/native_cpu/offload-wrapper.cpp b/sycl/test/check_device_code/native_cpu/offload-wrapper.cpp index 50bab6804255e..98d4ce88e623f 100644 --- a/sycl/test/check_device_code/native_cpu/offload-wrapper.cpp +++ b/sycl/test/check_device_code/native_cpu/offload-wrapper.cpp @@ -1,7 +1,7 @@ // This test checks the output for the clang-offload-wrapper for the Native CPU // target: // RUN: %clangxx -fsycl-device-only -fsycl-targets=native_cpu %s -o %t.bc -// RUN: sycl-post-link -emit-param-info -symbols -emit-exported-symbols -O2 -spec-const=native -device-globals -o %t.table %t.bc +// RUN: sycl-post-link -properties -emit-param-info -symbols -emit-exported-symbols -O2 -spec-const=native -device-globals -o %t.table %t.bc // RUN: clang-offload-wrapper -o=%t_wrap.bc -host=x86_64-unknown-linux-gnu -target=native_cpu -kind=sycl -batch %t.table // RUN: llvm-dis %t_wrap.bc -o - | FileCheck %s diff --git a/sycl/test/esimd/sycl_half_basic_ops.cpp b/sycl/test/esimd/sycl_half_basic_ops.cpp index ae357936a6f54..224286c95213a 100644 --- a/sycl/test/esimd/sycl_half_basic_ops.cpp +++ b/sycl/test/esimd/sycl_half_basic_ops.cpp @@ -1,5 +1,5 @@ // RUN: %clangxx -fsycl -fsycl-device-only -S %s -o %t -// RUN: sycl-post-link -split-esimd -lower-esimd -S %t -o %t.table +// RUN: sycl-post-link -properties -split-esimd -lower-esimd -S %t -o %t.table // RUN: FileCheck %s -input-file=%t_esimd_0.ll // The test checks that there are no unexpected extra conversions or intrinsic diff --git a/sycl/test/esimd/sycl_half_math_ops.cpp b/sycl/test/esimd/sycl_half_math_ops.cpp index 38e645c703ebc..10a81279b535c 100644 --- a/sycl/test/esimd/sycl_half_math_ops.cpp +++ b/sycl/test/esimd/sycl_half_math_ops.cpp @@ -1,5 +1,5 @@ // RUN: %clangxx -fsycl -fsycl-device-only -S %s -o %t.ll -// RUN: sycl-post-link -split-esimd -lower-esimd -S %t.ll -o %t.table +// RUN: sycl-post-link -properties -split-esimd -lower-esimd -S %t.ll -o %t.table // RUN: FileCheck %s -input-file=%t_esimd_0.ll // The test checks that there are no unexpected extra conversions or intrinsic diff --git a/sycl/test/extensions/private_alloca.cpp b/sycl/test/extensions/private_alloca.cpp index 3e3d3f91ddfc5..22f834dc5217c 100644 --- a/sycl/test/extensions/private_alloca.cpp +++ b/sycl/test/extensions/private_alloca.cpp @@ -1,5 +1,5 @@ // RUN: %clangxx -fsycl -fsycl-device-only -c -o %t.bc %s -// RUN: %if asserts %{sycl-post-link -debug-only=SpecConst %t.bc -spec-const=native -o %t.txt 2>&1 | FileCheck %s -check-prefixes=CHECK-LOG %} %else %{sycl-post-link %t.bc -spec-const=native -o %t.txt 2>&1 %} +// RUN: %if asserts %{sycl-post-link -properties -debug-only=SpecConst %t.bc -spec-const=native -o %t.txt 2>&1 | FileCheck %s -check-prefixes=CHECK-LOG %} %else %{sycl-post-link %t.bc -properties -spec-const=native -o %t.txt 2>&1 %} // RUN: cat %t_0.prop | FileCheck %s -check-prefixes=CHECK,CHECK-RT // RUN: llvm-spirv -o %t_0.spv -spirv-max-version=1.1 -spirv-ext=+all %t_0.bc // RUN: llvm-spirv -o - --to-text %t_0.spv | FileCheck %s -check-prefixes=CHECK-SPV diff --git a/sycl/test/optional_kernel_features/fp-accuracy.c b/sycl/test/optional_kernel_features/fp-accuracy.c index 80acc2baa893f..f1107acd59d72 100644 --- a/sycl/test/optional_kernel_features/fp-accuracy.c +++ b/sycl/test/optional_kernel_features/fp-accuracy.c @@ -3,7 +3,7 @@ // 1. Accuracy is specified for particular math functions. // RUN: %clangxx %s -o %test_func.bc -ffp-accuracy=high:sin,sqrt -ffp-accuracy=medium:cos -ffp-accuracy=low:tan -ffp-accuracy=cuda:exp,acos -ffp-accuracy=sycl:log,asin -fno-math-errno -fsycl -fsycl-device-only -// RUN: sycl-post-link -split=auto -symbols %test_func.bc -o %test_func.table +// RUN: sycl-post-link -properties -split=auto -symbols %test_func.bc -o %test_func.table // RUN: FileCheck %s -input-file=%test_func.table --check-prefixes CHECK-FUNC-TABLE // RUN: FileCheck %s -input-file=%test_func_0.sym --check-prefixes CHECK-FUNC-M0-SYMS // RUN: FileCheck %s -input-file=%test_func_1.sym --check-prefixes CHECK-FUNC-M1-SYMS @@ -14,14 +14,14 @@ // 2. Accuracy is specified for TU. // RUN: %clangxx %s -o %test_tu.bc -ffp-accuracy=high -fno-math-errno -fsycl -fsycl-device-only -// RUN: sycl-post-link -split=auto -symbols %test_tu.bc -o %test_tu.table +// RUN: sycl-post-link -properties -split=auto -symbols %test_tu.bc -o %test_tu.table // RUN: FileCheck %s -input-file=%test_tu.table --check-prefixes CHECK-TU-TABLE // RUN: FileCheck %s -input-file=%test_tu_0.sym --check-prefixes CHECK-TU-M0-SYMS // RUN: FileCheck %s -input-file=%test_tu_1.sym --check-prefixes CHECK-TU-M1-SYMS // 3. Mixed case. // RUN: %clangxx %s -o %test_mix.bc -ffp-accuracy=medium -ffp-accuracy=high:sin,sqrt -ffp-accuracy=medium:cos -ffp-accuracy=cuda:exp -ffp-accuracy=sycl:log -fno-math-errno -fsycl -fsycl-device-only -// RUN: sycl-post-link -split=auto -symbols %test_mix.bc -o %test_mix.table +// RUN: sycl-post-link -properties -split=auto -symbols %test_mix.bc -o %test_mix.table // RUN: FileCheck %s -input-file=%test_mix.table --check-prefixes CHECK-MIX-TABLE // RUN: FileCheck %s -input-file=%test_mix_0.sym --check-prefixes CHECK-MIX-M0-SYMS // RUN: FileCheck %s -input-file=%test_mix_1.sym --check-prefixes CHECK-MIX-M1-SYMS diff --git a/sycl/test/optional_kernel_features/fp-accuracy.cpp b/sycl/test/optional_kernel_features/fp-accuracy.cpp index 80acc2baa893f..f1107acd59d72 100644 --- a/sycl/test/optional_kernel_features/fp-accuracy.cpp +++ b/sycl/test/optional_kernel_features/fp-accuracy.cpp @@ -3,7 +3,7 @@ // 1. Accuracy is specified for particular math functions. // RUN: %clangxx %s -o %test_func.bc -ffp-accuracy=high:sin,sqrt -ffp-accuracy=medium:cos -ffp-accuracy=low:tan -ffp-accuracy=cuda:exp,acos -ffp-accuracy=sycl:log,asin -fno-math-errno -fsycl -fsycl-device-only -// RUN: sycl-post-link -split=auto -symbols %test_func.bc -o %test_func.table +// RUN: sycl-post-link -properties -split=auto -symbols %test_func.bc -o %test_func.table // RUN: FileCheck %s -input-file=%test_func.table --check-prefixes CHECK-FUNC-TABLE // RUN: FileCheck %s -input-file=%test_func_0.sym --check-prefixes CHECK-FUNC-M0-SYMS // RUN: FileCheck %s -input-file=%test_func_1.sym --check-prefixes CHECK-FUNC-M1-SYMS @@ -14,14 +14,14 @@ // 2. Accuracy is specified for TU. // RUN: %clangxx %s -o %test_tu.bc -ffp-accuracy=high -fno-math-errno -fsycl -fsycl-device-only -// RUN: sycl-post-link -split=auto -symbols %test_tu.bc -o %test_tu.table +// RUN: sycl-post-link -properties -split=auto -symbols %test_tu.bc -o %test_tu.table // RUN: FileCheck %s -input-file=%test_tu.table --check-prefixes CHECK-TU-TABLE // RUN: FileCheck %s -input-file=%test_tu_0.sym --check-prefixes CHECK-TU-M0-SYMS // RUN: FileCheck %s -input-file=%test_tu_1.sym --check-prefixes CHECK-TU-M1-SYMS // 3. Mixed case. // RUN: %clangxx %s -o %test_mix.bc -ffp-accuracy=medium -ffp-accuracy=high:sin,sqrt -ffp-accuracy=medium:cos -ffp-accuracy=cuda:exp -ffp-accuracy=sycl:log -fno-math-errno -fsycl -fsycl-device-only -// RUN: sycl-post-link -split=auto -symbols %test_mix.bc -o %test_mix.table +// RUN: sycl-post-link -properties -split=auto -symbols %test_mix.bc -o %test_mix.table // RUN: FileCheck %s -input-file=%test_mix.table --check-prefixes CHECK-MIX-TABLE // RUN: FileCheck %s -input-file=%test_mix_0.sym --check-prefixes CHECK-MIX-M0-SYMS // RUN: FileCheck %s -input-file=%test_mix_1.sym --check-prefixes CHECK-MIX-M1-SYMS From 0b593416cbf0f98a25289235443db1e5bdbf421e Mon Sep 17 00:00:00 2001 From: aelovikov-intel Date: Fri, 28 Jun 2024 09:21:44 -0700 Subject: [PATCH 20/40] [SYCL] Remove deprecated interop interfaces (#13306) --- sycl/include/sycl/backend.hpp | 5 +- sycl/include/sycl/backend/opencl.hpp | 44 -------- .../sycl/ext/oneapi/backend/level_zero.hpp | 100 ++++-------------- sycl/source/backend.cpp | 14 ++- sycl/source/backend/level_zero.cpp | 56 +--------- sycl/source/backend/opencl.cpp | 28 ----- .../interop-level-zero-keep-ownership.cpp | 11 +- sycl/test-e2e/Plugin/interop-opencl.cpp | 9 +- sycl/test-e2e/Regression/cache_test.cpp | 79 +++++--------- sycl/test/abi/sycl_symbols_linux.dump | 12 +-- sycl/test/abi/sycl_symbols_windows.dump | 12 +-- .../basic_tests/interop-level-zero-2020.cpp | 18 +--- 12 files changed, 79 insertions(+), 309 deletions(-) diff --git a/sycl/include/sycl/backend.hpp b/sycl/include/sycl/backend.hpp index efbc03d6a2ab6..7fab7ed12c8c3 100644 --- a/sycl/include/sycl/backend.hpp +++ b/sycl/include/sycl/backend.hpp @@ -263,7 +263,8 @@ __SYCL_EXPORT device make_device(pi_native_handle NativeHandle, backend Backend); __SYCL_EXPORT context make_context(pi_native_handle NativeHandle, const async_handler &Handler, - backend Backend); + backend Backend, bool KeepOwnership, + const std::vector &DeviceList = {}); __SYCL_EXPORT queue make_queue(pi_native_handle NativeHandle, int32_t nativeHandleDesc, const context &TargetContext, @@ -328,7 +329,7 @@ make_context( &BackendObject, const async_handler &Handler = {}) { return detail::make_context(detail::pi::cast(BackendObject), - Handler, Backend); + Handler, Backend, false /* KeepOwnership */); } template diff --git a/sycl/include/sycl/backend/opencl.hpp b/sycl/include/sycl/backend/opencl.hpp index 9376498580fc7..66d1b04c6d925 100644 --- a/sycl/include/sycl/backend/opencl.hpp +++ b/sycl/include/sycl/backend/opencl.hpp @@ -24,54 +24,10 @@ namespace sycl { inline namespace _V1 { namespace opencl { -// Implementation of various "make" functions resides in SYCL RT because -// creating SYCL objects requires knowing details not accessible here. -// Note that they take opaque pi_native_handle that real OpenCL handles -// are casted to. -// -__SYCL_EXPORT platform make_platform(pi_native_handle NativeHandle); -__SYCL_EXPORT device make_device(pi_native_handle NativeHandle); -__SYCL_EXPORT context make_context(pi_native_handle NativeHandle); -__SYCL_EXPORT queue make_queue(const context &Context, - pi_native_handle InteropHandle); - __SYCL_EXPORT bool has_extension(const sycl::platform &SyclPlatform, const std::string &Extension); __SYCL_EXPORT bool has_extension(const sycl::device &SyclDevice, const std::string &Extension); - -// Construction of SYCL platform. -template > * = nullptr> -__SYCL_DEPRECATED("Use SYCL 2020 sycl::make_platform free function") -T make(typename detail::interop::type Interop) { - return make_platform(detail::pi::cast(Interop)); -} - -// Construction of SYCL device. -template > * = nullptr> -__SYCL_DEPRECATED("Use SYCL 2020 sycl::make_device free function") -T make(typename detail::interop::type Interop) { - return make_device(detail::pi::cast(Interop)); -} - -// Construction of SYCL context. -template > * = nullptr> -__SYCL_DEPRECATED("Use SYCL 2020 sycl::make_context free function") -T make(typename detail::interop::type Interop) { - return make_context(detail::pi::cast(Interop)); -} - -// Construction of SYCL queue. -template > * = nullptr> -__SYCL_DEPRECATED("Use SYCL 2020 sycl::make_queue free function") -T make(const context &Context, - typename detail::interop::type Interop) { - return make_queue(Context, detail::pi::cast(Interop)); -} } // namespace opencl } // namespace _V1 } // namespace sycl diff --git a/sycl/include/sycl/ext/oneapi/backend/level_zero.hpp b/sycl/include/sycl/ext/oneapi/backend/level_zero.hpp index b63a2e2225173..a37cf98fd38f0 100644 --- a/sycl/include/sycl/ext/oneapi/backend/level_zero.hpp +++ b/sycl/include/sycl/ext/oneapi/backend/level_zero.hpp @@ -41,76 +41,10 @@ namespace sycl { inline namespace _V1 { -namespace ext::oneapi::level_zero { -// Implementation of various "make" functions resides in libsycl.so and thus -// their interface needs to be backend agnostic. -// TODO: remove/merge with similar functions in sycl::detail -__SYCL_EXPORT platform make_platform(pi_native_handle NativeHandle); +namespace ext::oneapi::level_zero::detail { __SYCL_EXPORT device make_device(const platform &Platform, pi_native_handle NativeHandle); -__SYCL_EXPORT context make_context(const std::vector &DeviceList, - pi_native_handle NativeHandle, - bool keep_ownership = false); -__SYCL_EXPORT queue make_queue(const context &Context, const device &Device, - pi_native_handle InteropHandle, - bool IsImmCmdList, bool keep_ownership, - const property_list &Properties); -__SYCL_EXPORT event make_event(const context &Context, - pi_native_handle InteropHandle, - bool keep_ownership = false); - -// Construction of SYCL platform. -template > * = nullptr> -__SYCL_DEPRECATED("Use SYCL 2020 sycl::make_platform free function") -T make(typename sycl::detail::interop::type - Interop) { - return make_platform(reinterpret_cast(Interop)); -} - -// Construction of SYCL device. -template > * = nullptr> -__SYCL_DEPRECATED("Use SYCL 2020 sycl::make_device free function") -T make(const platform &Platform, - typename sycl::detail::interop::type - Interop) { - return make_device(Platform, reinterpret_cast(Interop)); -} - -/// Construction of SYCL context. -/// \param DeviceList is a vector of devices which must be encapsulated by -/// created SYCL context. Provided devices and native context handle must -/// be associated with the same platform. -/// \param Interop is a Level Zero native context handle. -/// \param Ownership (optional) specifies who will assume ownership of the -/// native context handle. Default is that SYCL RT does, so it destroys -/// the native handle when the created SYCL object goes out of life. -/// -template > * = nullptr> -__SYCL_DEPRECATED("Use SYCL 2020 sycl::make_context free function") -T make(const std::vector &DeviceList, - typename sycl::detail::interop::type - Interop, - ownership Ownership = ownership::transfer) { - return make_context(DeviceList, - sycl::detail::pi::cast(Interop), - Ownership == ownership::keep); -} - -// Construction of SYCL event. -template > * = nullptr> -__SYCL_DEPRECATED("Use SYCL 2020 sycl::make_event free function") -T make(const context &Context, - typename sycl::detail::interop::type - Interop, - ownership Ownership = ownership::transfer) { - return make_event(Context, reinterpret_cast(Interop), - Ownership == ownership::keep); -} - -} // namespace ext::oneapi::level_zero +} // namespace ext::oneapi::level_zero::detail // Specialization of sycl::make_context for Level-Zero backend. template <> @@ -118,11 +52,16 @@ inline context make_context( const backend_input_t &BackendObject, const async_handler &Handler) { - (void)Handler; - return ext::oneapi::level_zero::make_context( - BackendObject.DeviceList, - detail::pi::cast(BackendObject.NativeHandle), - BackendObject.Ownership == ext::oneapi::level_zero::ownership::keep); + + const std::vector &DeviceList = BackendObject.DeviceList; + pi_native_handle NativeHandle = + detail::pi::cast(BackendObject.NativeHandle); + bool KeepOwnership = + BackendObject.Ownership == ext::oneapi::level_zero::ownership::keep; + + return sycl::detail::make_context(NativeHandle, Handler, + backend::ext_oneapi_level_zero, + KeepOwnership, DeviceList); } namespace detail { @@ -191,7 +130,6 @@ template <> inline queue make_queue( const backend_input_t &BackendObject, const context &TargetContext, const async_handler Handler) { - (void)Handler; const device Device = device{BackendObject.Device}; bool IsImmCmdList = std::holds_alternative( BackendObject.NativeHandle); @@ -202,10 +140,11 @@ inline queue make_queue( : reinterpret_cast( *(std::get_if( &BackendObject.NativeHandle))); - return ext::oneapi::level_zero::make_queue( - TargetContext, Device, Handle, IsImmCmdList, + + return sycl::detail::make_queue( + Handle, IsImmCmdList, TargetContext, &Device, BackendObject.Ownership == ext::oneapi::level_zero::ownership::keep, - BackendObject.Properties); + BackendObject.Properties, Handler, backend::ext_oneapi_level_zero); } // Specialization of sycl::get_native for Level-Zero backend. @@ -227,10 +166,11 @@ template <> inline event make_event( const backend_input_t &BackendObject, const context &TargetContext) { - return ext::oneapi::level_zero::make_event( - TargetContext, + return sycl::detail::make_event( detail::pi::cast(BackendObject.NativeHandle), - BackendObject.Ownership == ext::oneapi::level_zero::ownership::keep); + TargetContext, + BackendObject.Ownership == ext::oneapi::level_zero::ownership::keep, + backend::ext_oneapi_level_zero); } // Specialization of sycl::make_kernel_bundle for Level-Zero backend. diff --git a/sycl/source/backend.cpp b/sycl/source/backend.cpp index ee21740484af9..cb142906a5970 100644 --- a/sycl/source/backend.cpp +++ b/sycl/source/backend.cpp @@ -91,15 +91,21 @@ __SYCL_EXPORT device make_device(pi_native_handle NativeHandle, __SYCL_EXPORT context make_context(pi_native_handle NativeHandle, const async_handler &Handler, - backend Backend) { + backend Backend, bool KeepOwnership, + const std::vector &DeviceList) { const auto &Plugin = getPlugin(Backend); pi::PiContext PiContext = nullptr; + std::vector DeviceHandles; + for (auto Dev : DeviceList) { + DeviceHandles.push_back(detail::getSyclObjImpl(Dev)->getHandleRef()); + } Plugin->call( - NativeHandle, 0, nullptr, false, &PiContext); + NativeHandle, DeviceHandles.size(), DeviceHandles.data(), false, + &PiContext); // Construct the SYCL context from PI context. - return detail::createSyclObjFromImpl( - std::make_shared(PiContext, Handler, Plugin)); + return detail::createSyclObjFromImpl(std::make_shared( + PiContext, Handler, Plugin, DeviceList, !KeepOwnership)); } __SYCL_EXPORT queue make_queue(pi_native_handle NativeHandle, diff --git a/sycl/source/backend/level_zero.cpp b/sycl/source/backend/level_zero.cpp index 7f43f12c0cc4e..ade630ba426c2 100644 --- a/sycl/source/backend/level_zero.cpp +++ b/sycl/source/backend/level_zero.cpp @@ -15,17 +15,9 @@ namespace sycl { inline namespace _V1 { -namespace ext::oneapi::level_zero { -using namespace detail; +namespace ext::oneapi::level_zero::detail { +using namespace sycl::detail; -//---------------------------------------------------------------------------- -// Implementation of level_zero::make -__SYCL_EXPORT platform make_platform(pi_native_handle NativeHandle) { - return detail::make_platform(NativeHandle, backend::ext_oneapi_level_zero); -} - -//---------------------------------------------------------------------------- -// Implementation of level_zero::make __SYCL_EXPORT device make_device(const platform &Platform, pi_native_handle NativeHandle) { const auto &Plugin = pi::getPlugin(); @@ -39,48 +31,6 @@ __SYCL_EXPORT device make_device(const platform &Platform, PlatformImpl->getOrMakeDeviceImpl(PiDevice, PlatformImpl)); } -//---------------------------------------------------------------------------- -// Implementation of level_zero::make -__SYCL_EXPORT context make_context(const std::vector &DeviceList, - pi_native_handle NativeHandle, - bool KeepOwnership) { - const auto &Plugin = pi::getPlugin(); - // Create PI context first. - pi_context PiContext; - std::vector DeviceHandles; - for (auto Dev : DeviceList) { - DeviceHandles.push_back(detail::getSyclObjImpl(Dev)->getHandleRef()); - } - Plugin->call( - NativeHandle, DeviceHandles.size(), DeviceHandles.data(), !KeepOwnership, - &PiContext); - // Construct the SYCL context from PI context. - return detail::createSyclObjFromImpl( - std::make_shared(PiContext, detail::defaultAsyncHandler, - Plugin, DeviceList, !KeepOwnership)); -} - -//---------------------------------------------------------------------------- -// Implementation of level_zero::make -__SYCL_EXPORT queue make_queue(const context &Context, const device &Device, - pi_native_handle NativeHandle, bool IsImmCmdList, - bool KeepOwnership, - const property_list &Properties) { - const auto &ContextImpl = getSyclObjImpl(Context); - return detail::make_queue( - NativeHandle, IsImmCmdList, Context, &Device, KeepOwnership, Properties, - ContextImpl->get_async_handler(), backend::ext_oneapi_level_zero); -} - -//---------------------------------------------------------------------------- -// Implementation of level_zero::make -__SYCL_EXPORT event make_event(const context &Context, - pi_native_handle NativeHandle, - bool KeepOwnership) { - return detail::make_event(NativeHandle, Context, KeepOwnership, - backend::ext_oneapi_level_zero); -} - -} // namespace ext::oneapi::level_zero +} // namespace ext::oneapi::level_zero::detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/backend/opencl.cpp b/sycl/source/backend/opencl.cpp index 5a282542c5b24..9bdd8bfaeb317 100644 --- a/sycl/source/backend/opencl.cpp +++ b/sycl/source/backend/opencl.cpp @@ -21,34 +21,6 @@ inline namespace _V1 { namespace opencl { using namespace detail; -//---------------------------------------------------------------------------- -// Implementation of opencl::make -__SYCL_EXPORT platform make_platform(pi_native_handle NativeHandle) { - return detail::make_platform(NativeHandle, backend::opencl); -} - -//---------------------------------------------------------------------------- -// Implementation of opencl::make -__SYCL_EXPORT device make_device(pi_native_handle NativeHandle) { - return detail::make_device(NativeHandle, backend::opencl); -} - -//---------------------------------------------------------------------------- -// Implementation of opencl::make -__SYCL_EXPORT context make_context(pi_native_handle NativeHandle) { - return detail::make_context(NativeHandle, detail::defaultAsyncHandler, - backend::opencl); -} - -//---------------------------------------------------------------------------- -// Implementation of opencl::make -__SYCL_EXPORT queue make_queue(const context &Context, - pi_native_handle NativeHandle) { - const auto &ContextImpl = getSyclObjImpl(Context); - return detail::make_queue(NativeHandle, 0, Context, nullptr, false, {}, - ContextImpl->get_async_handler(), backend::opencl); -} - //---------------------------------------------------------------------------- // Free functions to query OpenCL backend extensions __SYCL_EXPORT bool has_extension(const sycl::platform &SyclPlatform, diff --git a/sycl/test-e2e/Plugin/interop-level-zero-keep-ownership.cpp b/sycl/test-e2e/Plugin/interop-level-zero-keep-ownership.cpp index befdb27f439e2..cfa4f0f53e060 100644 --- a/sycl/test-e2e/Plugin/interop-level-zero-keep-ownership.cpp +++ b/sycl/test-e2e/Plugin/interop-level-zero-keep-ownership.cpp @@ -30,8 +30,9 @@ int main() { { // Scope in which SYCL interop context object is live std::vector Devices{}; Devices.push_back(Device); - auto Context = level_zero::make(Devices, ZeContext, - level_zero::ownership::keep); + auto Context = make_context( + backend_input_t{ + ZeContext, Devices, ext::oneapi::level_zero::ownership::keep}); // Create L0 event pool ze_event_pool_handle_t ZeEventPool; @@ -52,8 +53,10 @@ int main() { { // Scope in which SYCL interop event is alive int i = 0; - event Event = level_zero::make(Context, ZeEvent, - level_zero::ownership::keep); + event Event = make_event( + backend_input_t{ + ZeEvent, ext::oneapi::level_zero::ownership::keep}, + Context); info::event_command_status status; do { diff --git a/sycl/test-e2e/Plugin/interop-opencl.cpp b/sycl/test-e2e/Plugin/interop-opencl.cpp index 9c52bc0ecf8b0..faa5ae348191a 100644 --- a/sycl/test-e2e/Plugin/interop-opencl.cpp +++ b/sycl/test-e2e/Plugin/interop-opencl.cpp @@ -47,10 +47,11 @@ int main() { assert(ocl_buffers.size() == 1); // Re-create SYCL objects from native OpenCL handles - auto PlatformInterop = opencl::make(ocl_platform); - auto DeviceInterop = opencl::make(ocl_device); - auto ContextInterop = opencl::make(ocl_context); - auto QueueInterop = opencl::make(ContextInterop, ocl_queue); + auto PlatformInterop = sycl::make_platform(ocl_platform); + auto DeviceInterop = sycl::make_device(ocl_device); + auto ContextInterop = sycl::make_context(ocl_context); + auto QueueInterop = + sycl::make_queue(ocl_queue, ContextInterop); auto BufferInterop = sycl::make_buffer(ocl_buffers[0], ContextInterop); diff --git a/sycl/test-e2e/Regression/cache_test.cpp b/sycl/test-e2e/Regression/cache_test.cpp index 2bbf7a6431746..72304a2444fec 100644 --- a/sycl/test-e2e/Regression/cache_test.cpp +++ b/sycl/test-e2e/Regression/cache_test.cpp @@ -91,64 +91,32 @@ int queryFromNativeHandle(std::vector *platform_list, zeDeviceGet(l0_drivers[0], &l0_device_count, l0_devices.data()); // Create the platform and device objects using the native handle. - { - // Using deprecated level_zero-specific interface. Intentionally copy-pasted - // and not outlined into a helper because the deprecated interface will be - // removed in a few months. - auto plt = level_zero::make(l0_drivers[0]); - auto dev = level_zero::make(plt, l0_devices[0]); - - // Check to see if this platform is in the platform list. - std::cout << "Platform created with native handle: " - << plt.get_info() << std::endl; - auto plt_result = std::find_if(platform_list->begin(), platform_list->end(), - [&](sycl::platform &p) { return p == plt; }); - if (plt_result != platform_list->end()) { - std::cout << "The platform list contains: " - << plt.get_info() << std::endl; - } else { - std::cout << plt.get_info() - << " was not in the platform list.\n"; - failures++; - } + auto plt = + sycl::make_platform(l0_drivers[0]); + auto dev = + sycl::make_device(l0_devices[0]); - // Check to see if this device is in the device list. - std::cout << "Device created with native handle: " - << dev.get_info() << std::endl; - auto dev_result = std::find_if(device_list->begin(), device_list->end(), - [&](sycl::device &d) { return d == dev; }); - if (dev_result != device_list->end()) { - std::cout << "The device list contains: " - << dev.get_info() << std::endl; - } else { - std::cout << dev.get_info() - << " was not in the device list.\n"; - failures++; - } - } - { - // Using SYCL2020 interface. - auto plt = sycl::make_platform( - l0_drivers[0]); - auto dev = - sycl::make_device(l0_devices[0]); - - // Check to see if this platform is in the platform list. - std::cout << "Platform created with native handle: " + // Check to see if this platform is in the platform list. + std::cout << "Platform created with native handle: " + << plt.get_info() << std::endl; + auto plt_result = std::find_if(platform_list->begin(), platform_list->end(), + [&](sycl::platform &p) { return p == plt; }); + if (plt_result != platform_list->end()) { + std::cout << "The platform list contains: " << plt.get_info() << std::endl; - auto plt_result = std::find_if(platform_list->begin(), platform_list->end(), - [&](sycl::platform &p) { return p == plt; }); - if (plt_result != platform_list->end()) { - std::cout << "The platform list contains: " - << plt.get_info() << std::endl; - } else { - std::cout << plt.get_info() - << " was not in the platform list.\n"; - failures++; - } + } else { + std::cout << plt.get_info() + << " was not in the platform list.\n"; + failures++; + } - // Check to see if this device is in the device list. - std::cout << "Device created with native handle: " + // Check to see if this device is in the device list. + std::cout << "Device created with native handle: " + << dev.get_info() << std::endl; + auto dev_result = std::find_if(device_list->begin(), device_list->end(), + [&](sycl::device &d) { return d == dev; }); + if (dev_result != device_list->end()) { + std::cout << "The device list contains: " << dev.get_info() << std::endl; auto dev_result = std::find_if(device_list->begin(), device_list->end(), [&](sycl::device &d) { return d == dev; }); @@ -177,6 +145,7 @@ int queryFromNativeHandle(std::vector *platform_list, failures++; } } + return failures; } diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 2ff3670ba94df..a852910bd4431 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -2988,11 +2988,7 @@ _ZN4sycl3_V13ext5intel12experimental15online_compilerILNS3_15source_languageE0EE _ZN4sycl3_V13ext5intel12experimental15online_compilerILNS3_15source_languageE1EE7compileIJSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaISE_EEEEES8_IhSaIhEERKSE_DpRKT_ _ZN4sycl3_V13ext5intel12experimental9pipe_base13get_pipe_nameB5cxx11EPKv _ZN4sycl3_V13ext5intel12experimental9pipe_base17wait_non_blockingERKNS0_5eventE -_ZN4sycl3_V13ext6oneapi10level_zero10make_eventERKNS0_7contextEmb -_ZN4sycl3_V13ext6oneapi10level_zero10make_queueERKNS0_7contextERKNS0_6deviceEmbbRKNS0_13property_listE -_ZN4sycl3_V13ext6oneapi10level_zero11make_deviceERKNS0_8platformEm -_ZN4sycl3_V13ext6oneapi10level_zero12make_contextERKSt6vectorINS0_6deviceESaIS5_EEmb -_ZN4sycl3_V13ext6oneapi10level_zero13make_platformEm +_ZN4sycl3_V13ext6oneapi10level_zero6detail11make_deviceERKNS0_8platformEm _ZN4sycl3_V13ext6oneapi12experimental12create_imageENS3_16image_mem_handleERKNS3_16image_descriptorERKNS0_5queueE _ZN4sycl3_V13ext6oneapi12experimental12create_imageENS3_16image_mem_handleERKNS3_16image_descriptorERKNS0_6deviceERKNS0_7contextE _ZN4sycl3_V13ext6oneapi12experimental12create_imageENS3_16image_mem_handleERKNS3_22bindless_image_samplerERKNS3_16image_descriptorERKNS0_5queueE @@ -3258,7 +3254,7 @@ _ZN4sycl3_V16detail12buffer_plainC2EmNS0_7contextESt10unique_ptrINS1_19SYCLMemOb _ZN4sycl3_V16detail12buffer_plainC2EmmRKNS0_13property_listESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS7_EE _ZN4sycl3_V16detail12compile_implERKNS0_13kernel_bundleILNS0_12bundle_stateE0EEERKSt6vectorINS0_6deviceESaIS8_EERKNS0_13property_listE _ZN4sycl3_V16detail12isOutOfRangeENS0_3vecIiLi4EEENS0_15addressing_modeENS0_5rangeILi3EEE -_ZN4sycl3_V16detail12make_contextEmRKSt8functionIFvNS0_14exception_listEEENS0_7backendE +_ZN4sycl3_V16detail12make_contextEmRKSt8functionIFvNS0_14exception_listEEENS0_7backendEbRKSt6vectorINS0_6deviceESaISA_EE _ZN4sycl3_V16detail12sampler_impl18getOrCreateSamplerERKNS0_7contextE _ZN4sycl3_V16detail12sampler_implC1ENS0_29coordinate_normalization_modeENS0_15addressing_modeENS0_14filtering_modeERKNS0_13property_listE _ZN4sycl3_V16detail12sampler_implC1EP11_cl_samplerRKNS0_7contextE @@ -3523,12 +3519,8 @@ _ZN4sycl3_V16native13__divide_implENS0_3vecIfLi3EEES3_ _ZN4sycl3_V16native13__divide_implENS0_3vecIfLi4EEES3_ _ZN4sycl3_V16native13__divide_implENS0_3vecIfLi8EEES3_ _ZN4sycl3_V16native13__divide_implEff -_ZN4sycl3_V16opencl10make_queueERKNS0_7contextEm -_ZN4sycl3_V16opencl11make_deviceEm -_ZN4sycl3_V16opencl12make_contextEm _ZN4sycl3_V16opencl13has_extensionERKNS0_6deviceERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE _ZN4sycl3_V16opencl13has_extensionERKNS0_8platformERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE -_ZN4sycl3_V16opencl13make_platformEm _ZN4sycl3_V16streamC1EmmRNS0_7handlerE _ZN4sycl3_V16streamC1EmmRNS0_7handlerERKNS0_13property_listE _ZN4sycl3_V16streamC2EmmRNS0_7handlerE diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 45efa2732ec3a..c0d564ebe65e4 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -4440,27 +4440,19 @@ ?lgamma_r_impl@detail@_V1@sycl@@YANNPEAH@Z ?link_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBV?$vector@V?$kernel_bundle@$00@_V1@sycl@@V?$allocator@V?$kernel_bundle@$00@_V1@sycl@@@std@@@5@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBVproperty_list@23@@Z ?makeDir@OSUtil@detail@_V1@sycl@@SAHPEBD@Z -?make_context@detail@_V1@sycl@@YA?AVcontext@23@_KAEBV?$function@$$A6AXVexception_list@_V1@sycl@@@Z@std@@W4backend@23@@Z -?make_context@level_zero@oneapi@ext@_V1@sycl@@YA?AVcontext@45@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@_K_N@Z -?make_context@opencl@_V1@sycl@@YA?AVcontext@23@_K@Z +?make_context@detail@_V1@sycl@@YA?AVcontext@23@_KAEBV?$function@$$A6AXVexception_list@_V1@sycl@@@Z@std@@W4backend@23@_NAEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@6@@Z ?make_device@detail@_V1@sycl@@YA?AVdevice@23@_KW4backend@23@@Z -?make_device@level_zero@oneapi@ext@_V1@sycl@@YA?AVdevice@45@AEBVplatform@45@_K@Z -?make_device@opencl@_V1@sycl@@YA?AVdevice@23@_K@Z +?make_device@detail@level_zero@oneapi@ext@_V1@sycl@@YA?AVdevice@56@AEBVplatform@56@_K@Z ?make_edge@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAXAEAVnode@34567@0@Z ?make_error_code@_V1@sycl@@YA?AVerror_code@std@@W4errc@12@@Z ?make_event@detail@_V1@sycl@@YA?AVevent@23@_KAEBVcontext@23@W4backend@23@@Z ?make_event@detail@_V1@sycl@@YA?AVevent@23@_KAEBVcontext@23@_NW4backend@23@@Z -?make_event@level_zero@oneapi@ext@_V1@sycl@@YA?AVevent@45@AEBVcontext@45@_K_N@Z ?make_kernel@detail@_V1@sycl@@YA?AVkernel@23@AEBVcontext@23@AEBV?$kernel_bundle@$01@23@_K_NW4backend@23@@Z ?make_kernel@detail@_V1@sycl@@YA?AVkernel@23@_KAEBVcontext@23@W4backend@23@@Z ?make_kernel_bundle@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@_KAEBVcontext@23@W4bundle_state@23@W4backend@23@@Z ?make_kernel_bundle@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@_KAEBVcontext@23@_NW4bundle_state@23@W4backend@23@@Z ?make_platform@detail@_V1@sycl@@YA?AVplatform@23@_KW4backend@23@@Z -?make_platform@level_zero@oneapi@ext@_V1@sycl@@YA?AVplatform@45@_K@Z -?make_platform@opencl@_V1@sycl@@YA?AVplatform@23@_K@Z ?make_queue@detail@_V1@sycl@@YA?AVqueue@23@_KHAEBVcontext@23@PEBVdevice@23@_NAEBVproperty_list@23@AEBV?$function@$$A6AXVexception_list@_V1@sycl@@@Z@std@@W4backend@23@@Z -?make_queue@level_zero@oneapi@ext@_V1@sycl@@YA?AVqueue@45@AEBVcontext@45@AEBVdevice@45@_K_N3AEBVproperty_list@45@@Z -?make_queue@opencl@_V1@sycl@@YA?AVqueue@23@AEBVcontext@23@_K@Z ?malloc@_V1@sycl@@YAPEAX_KAEBVdevice@12@AEBVcontext@12@W4alloc@usm@12@AEBUcode_location@detail@12@@Z ?malloc@_V1@sycl@@YAPEAX_KAEBVdevice@12@AEBVcontext@12@W4alloc@usm@12@AEBVproperty_list@12@AEBUcode_location@detail@12@@Z ?malloc@_V1@sycl@@YAPEAX_KAEBVqueue@12@W4alloc@usm@12@AEBUcode_location@detail@12@@Z diff --git a/sycl/test/basic_tests/interop-level-zero-2020.cpp b/sycl/test/basic_tests/interop-level-zero-2020.cpp index 1f7c2a83d578b..1a39b8acb16b1 100644 --- a/sycl/test/basic_tests/interop-level-zero-2020.cpp +++ b/sycl/test/basic_tests/interop-level-zero-2020.cpp @@ -1,12 +1,13 @@ // RUN: %clangxx %fsycl-host-only -fsyntax-only -Xclang -verify -Xclang -verify-ignore-unexpected=note %s // RUN: %clangxx %fsycl-host-only -fsyntax-only -Xclang -verify -Xclang -verify-ignore-unexpected=note -D__SYCL_INTERNAL_API %s +// expected-no-diagnostics + // Test for SYCL-2020 Level Zero interop API -// clang-format off #include + #include -// clang-format on #include @@ -122,18 +123,5 @@ int main() { {KernelBundle, ZeKernel, ext::oneapi::level_zero::ownership::keep}, Context); - // Check deprecated - // expected-warning@+1 {{'make' is deprecated: Use SYCL 2020 sycl::make_platform free function}} - auto P = ext::oneapi::level_zero::make(ZeDriver); - // expected-warning@+1 {{'make' is deprecated: Use SYCL 2020 sycl::make_device free function}} - auto D = ext::oneapi::level_zero::make(P, ZeDevice); - // expected-warning@+1 {{'make' is deprecated: Use SYCL 2020 sycl::make_context free function}} - auto C = ext::oneapi::level_zero::make( - std::vector(1, D), ZeContext, - ext::oneapi::level_zero::ownership::keep); - // expected-warning@+1 {{'make' is deprecated: Use SYCL 2020 sycl::make_event free function}} - auto E = ext::oneapi::level_zero::make( - Context, ZeEvent, ext::oneapi::level_zero::ownership::keep); - return 0; } From 7ce48cf412fd53a0e4234989a57c7b5ecf37840e Mon Sep 17 00:00:00 2001 From: Nick Sarnie Date: Fri, 28 Jun 2024 12:25:41 -0400 Subject: [PATCH 21/40] [SYCL][ESIMD][E2E] Disable atomic_update test on Windows until new driver (#14334) A similar test was disabled in https://github.com/intel/llvm/commit/672b225e137b15de2465b62837ba32d17ab750a8, and this test was found to have the same issue. Signed-off-by: Sarnie, Nick --- .../unified_memory_api/atomic_update_usm_dg2_pvc_cmpxchg.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/test-e2e/ESIMD/unified_memory_api/atomic_update_usm_dg2_pvc_cmpxchg.cpp b/sycl/test-e2e/ESIMD/unified_memory_api/atomic_update_usm_dg2_pvc_cmpxchg.cpp index ea7f851f5c709..9ee3932f8689d 100644 --- a/sycl/test-e2e/ESIMD/unified_memory_api/atomic_update_usm_dg2_pvc_cmpxchg.cpp +++ b/sycl/test-e2e/ESIMD/unified_memory_api/atomic_update_usm_dg2_pvc_cmpxchg.cpp @@ -7,6 +7,7 @@ //===---------------------------------------------------------------------===// // REQUIRES: gpu-intel-pvc || gpu-intel-dg2 +// REQUIRES-INTEL-DRIVER: win: 101.5660 // RUN: %{build} -o %t.out // RUN: %{run} %t.out From deeb66413a6db7fb18082450bfad5e94a727c7f9 Mon Sep 17 00:00:00 2001 From: David Garcia Orozco Date: Fri, 28 Jun 2024 14:04:58 -0400 Subject: [PATCH 22/40] [SYCL][E2E] Remove subgroup supported checks from e2e tests (#14313) Subgroups are core sycl functionality which should be tested on all backends. --- sycl/test-e2e/Basic/linear-sub_group.cpp | 5 -- .../Regression/get_subgroup_sizes.cpp | 18 +++-- sycl/test-e2e/SubGroup/attributes.cpp | 22 ++---- sycl/test-e2e/SubGroup/helper.hpp | 21 ------ sycl/test-e2e/SubGroup/info.cpp | 74 +++++++++---------- sycl/test-e2e/SubGroup/reduce.cpp | 4 - sycl/test-e2e/SubGroup/reduce_fp16.cpp | 4 - sycl/test-e2e/SubGroup/reduce_fp64.cpp | 4 - sycl/test-e2e/SubGroup/reduce_spirv13.cpp | 4 - .../test-e2e/SubGroup/reduce_spirv13_fp16.cpp | 4 - .../test-e2e/SubGroup/reduce_spirv13_fp64.cpp | 4 - sycl/test-e2e/SubGroup/scan.cpp | 4 - sycl/test-e2e/SubGroup/scan_fp16.cpp | 4 - sycl/test-e2e/SubGroup/scan_fp64.cpp | 4 - sycl/test-e2e/SubGroup/scan_spirv13.cpp | 4 - sycl/test-e2e/SubGroup/scan_spirv13_fp16.cpp | 4 - sycl/test-e2e/SubGroup/scan_spirv13_fp64.cpp | 4 - sycl/test-e2e/SubGroup/vote.cpp | 4 - 18 files changed, 55 insertions(+), 137 deletions(-) diff --git a/sycl/test-e2e/Basic/linear-sub_group.cpp b/sycl/test-e2e/Basic/linear-sub_group.cpp index df99664c8f9b9..a7b5f8c720732 100644 --- a/sycl/test-e2e/Basic/linear-sub_group.cpp +++ b/sycl/test-e2e/Basic/linear-sub_group.cpp @@ -9,7 +9,6 @@ // //===----------------------------------------------------------------------===// -#include "../SubGroup/helper.hpp" #include #include #include @@ -20,10 +19,6 @@ using namespace sycl; int main(int argc, char *argv[]) { queue q; - if (!core_sg_supported(q.get_device())) { - std::cout << "Skipping test\n"; - return 0; - } // Fill output array with sub-group IDs const uint32_t outer = 2; diff --git a/sycl/test-e2e/Regression/get_subgroup_sizes.cpp b/sycl/test-e2e/Regression/get_subgroup_sizes.cpp index eb910425ea8d4..38a07ce20ef79 100644 --- a/sycl/test-e2e/Regression/get_subgroup_sizes.cpp +++ b/sycl/test-e2e/Regression/get_subgroup_sizes.cpp @@ -1,3 +1,9 @@ +// UNSUPPORTED: accelerator +// TODO: FPGAs currently report `sub_group_sizes` as non-empty list, +// despite not having extension `cl_intel_required_subgroup_size` +// UNSUPPORTED: cuda || hip +// TODO: Similar issue to FPGAs + // RUN: %{build} -o %t.out // RUN: %{run} %t.out @@ -18,13 +24,15 @@ int main() { queue Q; auto Dev = Q.get_device(); auto Vec = Dev.get_info(); + std::vector SubGroupSizes = + Dev.get_info(); if (std::find(Vec.begin(), Vec.end(), "cl_intel_required_subgroup_size") != std::end(Vec)) { - std::vector SubGroupSizes = - Dev.get_info(); - std::vector::const_iterator MaxIter = - std::max_element(SubGroupSizes.begin(), SubGroupSizes.end()); - int MaxSubGroup_size = *MaxIter; + assert(!SubGroupSizes.empty() && + "Required sub-group size list should not be empty"); + } else { + assert(SubGroupSizes.empty() && + "Required sub-group size list should be empty"); } return 0; } diff --git a/sycl/test-e2e/SubGroup/attributes.cpp b/sycl/test-e2e/SubGroup/attributes.cpp index 44de853e07f91..a4503726c1dec 100644 --- a/sycl/test-e2e/SubGroup/attributes.cpp +++ b/sycl/test-e2e/SubGroup/attributes.cpp @@ -1,3 +1,10 @@ +// UNSUPPORTED: accelerator +// TODO: FPGAs currently report supported subgroups as {4,8,16,32,64}, causing +// this test to fail +// UNSUPPORTED: cuda || hip +// TODO: Device subgroup sizes reports {32}, but when we try to use it with a +// kernel attribute and check it, we get a subgroup size of 0. + // RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out // RUN: %{run} %t.out //==------- attributes.cpp - SYCL sub_group attributes test ----*- C++ -*---==// @@ -13,7 +20,7 @@ #define KERNEL_FUNCTOR_WITH_SIZE(SIZE) \ class KernelFunctor##SIZE { \ public: \ - [[intel::reqd_sub_group_size(SIZE)]] void \ + [[sycl::reqd_sub_group_size(SIZE)]] void \ operator()(sycl::nd_item<1> Item) const { \ const auto GID = Item.get_global_id(); \ } \ @@ -49,19 +56,6 @@ int main() { queue Queue; device Device = Queue.get_device(); - // According to specification, this kernel query requires `cl_khr_subgroups` - // or `cl_intel_subgroups`, and also `cl_intel_required_subgroup_size` - auto Vec = Device.get_info(); - if (std::find(Vec.begin(), Vec.end(), "cl_intel_subgroups") == - std::end(Vec) && - std::find(Vec.begin(), Vec.end(), "cl_khr_subgroups") == - std::end(Vec) || - std::find(Vec.begin(), Vec.end(), "cl_intel_required_subgroup_size") == - std::end(Vec)) { - std::cout << "Skipping test\n"; - return 0; - } - try { const auto SGSizes = Device.get_info(); diff --git a/sycl/test-e2e/SubGroup/helper.hpp b/sycl/test-e2e/SubGroup/helper.hpp index dfb47988d85e0..2a88bb0f5652e 100644 --- a/sycl/test-e2e/SubGroup/helper.hpp +++ b/sycl/test-e2e/SubGroup/helper.hpp @@ -164,24 +164,3 @@ void exit_if_not_equal_vec(vec val, vec ref, const char *name) { exit(1); } } - -bool core_sg_supported(const device &Device) { - auto Vec = Device.get_info(); - if (std::find(Vec.begin(), Vec.end(), "cl_khr_subgroups") != std::end(Vec)) - return true; - - if (std::find(Vec.begin(), Vec.end(), "cl_intel_subgroups") != std::end(Vec)) - return true; - - if (Device.get_backend() == sycl::backend::opencl) { - // Extract the numerical version from the version string, OpenCL version - // string have the format "OpenCL . ". - std::string ver = Device.get_info().substr(7, 3); - - // cl_khr_subgroups was core in OpenCL 2.1 and 2.2, but went back to - // optional in 3.0 - return ver >= "2.1" && ver < "3.0"; - } - - return false; -} diff --git a/sycl/test-e2e/SubGroup/info.cpp b/sycl/test-e2e/SubGroup/info.cpp index 83e9fdd5a64bf..51e5f760b8de0 100644 --- a/sycl/test-e2e/SubGroup/info.cpp +++ b/sycl/test-e2e/SubGroup/info.cpp @@ -1,3 +1,7 @@ +// UNSUPPORTED: accelerator +// TODO: FPGAs currently report supported subgroups as {4,8,16,32,64}, causing +// this test to fail. Additionally, the kernel max_sub_group_size checks +// crash on FPGAs // RUN: %{build} -o %t.out // RUN: %{run} %t.out @@ -17,14 +21,20 @@ int main() { queue Queue; device Device = Queue.get_device(); - /* Basic sub-group functionality is supported as part of cl_khr_subgroups - * extension or as core OpenCL 2.1 feature. */ - if (!core_sg_supported(Device)) { - std::cout << "Skipping test\n"; - return 0; + bool old_opencl = false; + if (Device.get_backend() == sycl::backend::opencl) { + // Extract the numerical version from the version string, OpenCL version + // string have the format "OpenCL . ". + std::string ver = Device.get_info().substr(7, 3); + old_opencl = (ver < "2.1"); } + /* Check info::device parameters. */ - Device.get_info(); + if (!old_opencl) { + // Independent forward progress is missing on OpenCL backend prior to + // version 2.1 + Device.get_info(); + } Device.get_info(); try { @@ -49,30 +59,24 @@ int main() { }); uint32_t Res = 0; - /* sub_group_sizes can be queried only if cl_intel_required_subgroup_size - * extension is supported by device*/ - auto Vec = Device.get_info(); - if (std::find(Vec.begin(), Vec.end(), "cl_intel_required_subgroup_size") != - std::end(Vec)) { - auto sg_sizes = Device.get_info(); + auto sg_sizes = Device.get_info(); + + // Max sub-group size for a particular kernel might not be the max + // supported size on the device in general. Can only check that it is + // contained in list of valid sizes. + Res = Kernel.get_info( + Device); + bool Expected = + std::find(sg_sizes.begin(), sg_sizes.end(), Res) != sg_sizes.end(); + exit_if_not_equal(Expected, true, "max_sub_group_size"); - // Max sub-group size for a particular kernel might not be the max - // supported size on the device in general. Can only check that it is - // contained in list of valid sizes. + for (auto r : {range<3>(3, 4, 5), range<3>(1, 1, 1), range<3>(4, 2, 1), + range<3>(32, 3, 4), range<3>(7, 9, 11)}) { Res = Kernel.get_info( Device); - bool Expected = + Expected = std::find(sg_sizes.begin(), sg_sizes.end(), Res) != sg_sizes.end(); exit_if_not_equal(Expected, true, "max_sub_group_size"); - - for (auto r : {range<3>(3, 4, 5), range<3>(1, 1, 1), range<3>(4, 2, 1), - range<3>(32, 3, 4), range<3>(7, 9, 11)}) { - Res = Kernel.get_info( - Device); - Expected = - std::find(sg_sizes.begin(), sg_sizes.end(), Res) != sg_sizes.end(); - exit_if_not_equal(Expected, true, "max_sub_group_size"); - } } Res = Kernel.get_info( @@ -81,21 +85,11 @@ int main() { /* Sub-group size is not specified in kernel or IL*/ exit_if_not_equal(Res, 0, "compile_num_sub_groups"); - // According to specification, this kernel query requires `cl_khr_subgroups` - // or `cl_intel_subgroups` - if ((std::find(Vec.begin(), Vec.end(), "cl_khr_subgroups") != - std::end(Vec)) || - std::find(Vec.begin(), Vec.end(), "cl_intel_subgroups") != - std::end(Vec) && - std::find(Vec.begin(), Vec.end(), - "cl_intel_required_subgroup_size") != std::end(Vec)) { - Res = - Kernel.get_info( - Device); - - /* Required sub-group size is not specified in kernel or IL*/ - exit_if_not_equal(Res, 0, "compile_sub_group_size"); - } + Res = Kernel.get_info( + Device); + + /* Required sub-group size is not specified in kernel or IL*/ + exit_if_not_equal(Res, 0, "compile_sub_group_size"); } catch (exception e) { std::cout << "SYCL exception caught: " << e.what(); diff --git a/sycl/test-e2e/SubGroup/reduce.cpp b/sycl/test-e2e/SubGroup/reduce.cpp index 9422db73a599a..165556df711b1 100644 --- a/sycl/test-e2e/SubGroup/reduce.cpp +++ b/sycl/test-e2e/SubGroup/reduce.cpp @@ -13,10 +13,6 @@ #include int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { - std::cout << "Skipping test\n"; - return 0; - } check(Queue); check(Queue); check(Queue); diff --git a/sycl/test-e2e/SubGroup/reduce_fp16.cpp b/sycl/test-e2e/SubGroup/reduce_fp16.cpp index 1140ab26677f9..ea40b0897e7c1 100644 --- a/sycl/test-e2e/SubGroup/reduce_fp16.cpp +++ b/sycl/test-e2e/SubGroup/reduce_fp16.cpp @@ -10,10 +10,6 @@ int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { - std::cout << "Skipping test\n"; - return 0; - } check(Queue); std::cout << "Test passed." << std::endl; return 0; diff --git a/sycl/test-e2e/SubGroup/reduce_fp64.cpp b/sycl/test-e2e/SubGroup/reduce_fp64.cpp index 0fd801deda7ec..f00b0000a6351 100644 --- a/sycl/test-e2e/SubGroup/reduce_fp64.cpp +++ b/sycl/test-e2e/SubGroup/reduce_fp64.cpp @@ -8,10 +8,6 @@ int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { - std::cout << "Skipping test\n"; - return 0; - } check(Queue); std::cout << "Test passed." << std::endl; return 0; diff --git a/sycl/test-e2e/SubGroup/reduce_spirv13.cpp b/sycl/test-e2e/SubGroup/reduce_spirv13.cpp index 43fbdb3b215c5..aaee44963d54a 100644 --- a/sycl/test-e2e/SubGroup/reduce_spirv13.cpp +++ b/sycl/test-e2e/SubGroup/reduce_spirv13.cpp @@ -8,10 +8,6 @@ #include int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { - std::cout << "Skipping test\n"; - return 0; - } check_mul(Queue); check_mul(Queue); diff --git a/sycl/test-e2e/SubGroup/reduce_spirv13_fp16.cpp b/sycl/test-e2e/SubGroup/reduce_spirv13_fp16.cpp index d7e074551d9f9..d74fbd9a856a8 100644 --- a/sycl/test-e2e/SubGroup/reduce_spirv13_fp16.cpp +++ b/sycl/test-e2e/SubGroup/reduce_spirv13_fp16.cpp @@ -11,10 +11,6 @@ int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { - std::cout << "Skipping test\n"; - return 0; - } check_mul(Queue); std::cout << "Test passed." << std::endl; return 0; diff --git a/sycl/test-e2e/SubGroup/reduce_spirv13_fp64.cpp b/sycl/test-e2e/SubGroup/reduce_spirv13_fp64.cpp index 6106ad2ba86a1..543dc32e24fc3 100644 --- a/sycl/test-e2e/SubGroup/reduce_spirv13_fp64.cpp +++ b/sycl/test-e2e/SubGroup/reduce_spirv13_fp64.cpp @@ -10,10 +10,6 @@ #include int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { - std::cout << "Skipping test\n"; - return 0; - } check_mul(Queue); std::cout << "Test passed." << std::endl; return 0; diff --git a/sycl/test-e2e/SubGroup/scan.cpp b/sycl/test-e2e/SubGroup/scan.cpp index 989fcf7f588a7..74db27c7f5208 100644 --- a/sycl/test-e2e/SubGroup/scan.cpp +++ b/sycl/test-e2e/SubGroup/scan.cpp @@ -14,10 +14,6 @@ int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { - std::cout << "Skipping test\n"; - return 0; - } check(Queue); check(Queue); check(Queue); diff --git a/sycl/test-e2e/SubGroup/scan_fp16.cpp b/sycl/test-e2e/SubGroup/scan_fp16.cpp index d49b960f4148d..a9d26f4a396cd 100644 --- a/sycl/test-e2e/SubGroup/scan_fp16.cpp +++ b/sycl/test-e2e/SubGroup/scan_fp16.cpp @@ -11,10 +11,6 @@ #include int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { - std::cout << "Skipping test\n"; - return 0; - } check(Queue); std::cout << "Test passed." << std::endl; return 0; diff --git a/sycl/test-e2e/SubGroup/scan_fp64.cpp b/sycl/test-e2e/SubGroup/scan_fp64.cpp index 6e32d278386ab..abd29566e9d77 100644 --- a/sycl/test-e2e/SubGroup/scan_fp64.cpp +++ b/sycl/test-e2e/SubGroup/scan_fp64.cpp @@ -9,10 +9,6 @@ #include int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { - std::cout << "Skipping test\n"; - return 0; - } check(Queue); std::cout << "Test passed." << std::endl; return 0; diff --git a/sycl/test-e2e/SubGroup/scan_spirv13.cpp b/sycl/test-e2e/SubGroup/scan_spirv13.cpp index 0966161844d1f..774283d0d6e13 100644 --- a/sycl/test-e2e/SubGroup/scan_spirv13.cpp +++ b/sycl/test-e2e/SubGroup/scan_spirv13.cpp @@ -9,10 +9,6 @@ int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { - std::cout << "Skipping test\n"; - return 0; - } check_mul(Queue); check_mul(Queue); check_mul(Queue); diff --git a/sycl/test-e2e/SubGroup/scan_spirv13_fp16.cpp b/sycl/test-e2e/SubGroup/scan_spirv13_fp16.cpp index 9a213bd885b1b..136c3210d122a 100644 --- a/sycl/test-e2e/SubGroup/scan_spirv13_fp16.cpp +++ b/sycl/test-e2e/SubGroup/scan_spirv13_fp16.cpp @@ -12,10 +12,6 @@ int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { - std::cout << "Skipping test\n"; - return 0; - } check_mul(Queue); std::cout << "Test passed." << std::endl; return 0; diff --git a/sycl/test-e2e/SubGroup/scan_spirv13_fp64.cpp b/sycl/test-e2e/SubGroup/scan_spirv13_fp64.cpp index f78a86c9a8e2d..8ea36613ed613 100644 --- a/sycl/test-e2e/SubGroup/scan_spirv13_fp64.cpp +++ b/sycl/test-e2e/SubGroup/scan_spirv13_fp64.cpp @@ -11,10 +11,6 @@ int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { - std::cout << "Skipping test\n"; - return 0; - } check(Queue); std::cout << "Test passed." << std::endl; return 0; diff --git a/sycl/test-e2e/SubGroup/vote.cpp b/sycl/test-e2e/SubGroup/vote.cpp index d835e2d2d9ec1..5fb9ac0066194 100644 --- a/sycl/test-e2e/SubGroup/vote.cpp +++ b/sycl/test-e2e/SubGroup/vote.cpp @@ -69,10 +69,6 @@ void check(queue Queue, const int G, const int L, const int D, const int R) { } int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { - std::cout << "Skipping test\n"; - return 0; - } check(Queue, 240, 80, 3, 1); check(Queue, 24, 12, 3, 4); check(Queue, 1024, 256, 3, 1); From bf7c84c38a6f20f2abb10f12f19d627967ee7cd2 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Sat, 29 Jun 2024 01:20:54 +0200 Subject: [PATCH 23/40] [SYCL] Simplify storePlainArg to avoid alias violations (#14344) The helper function storePlainArg in handler and jit_compiler reinterpret-casts pointers to memory in vectors of char in order to store arguments in them. However, this violates strict aliasing and is unnecessary as the resulting pointers are immediately converted to void* after all calls to the function. As such, this patch simplfies these implementations to always return void* and use memcpy to avoid the alias violation. Signed-off-by: Larsen, Steffen --- sycl/include/sycl/handler.hpp | 12 +++++------- sycl/source/detail/jit_compiler.cpp | 18 +++++++----------- 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index a71f5400a813d..2b313c8834443 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -489,12 +489,10 @@ class __SYCL_EXPORT handler { handler(std::shared_ptr Graph); /// Stores copy of Arg passed to the CGData.MArgsStorage. - template >> - F *storePlainArg(T &&Arg) { + template void *storePlainArg(T &&Arg) { CGData.MArgsStorage.emplace_back(sizeof(T)); - auto Storage = reinterpret_cast(CGData.MArgsStorage.back().data()); - *Storage = Arg; + void *Storage = static_cast(CGData.MArgsStorage.back().data()); + std::memcpy(Storage, &Arg, sizeof(T)); return Storage; } @@ -691,7 +689,7 @@ class __SYCL_EXPORT handler { } template void setArgHelper(int ArgIndex, T &&Arg) { - auto StoredArg = static_cast(storePlainArg(Arg)); + void *StoredArg = storePlainArg(Arg); if (!std::is_same::value && std::is_pointer::value) { MArgs.emplace_back(detail::kernel_param_kind_t::kind_pointer, StoredArg, @@ -703,7 +701,7 @@ class __SYCL_EXPORT handler { } void setArgHelper(int ArgIndex, sampler &&Arg) { - auto StoredArg = static_cast(storePlainArg(Arg)); + void *StoredArg = storePlainArg(Arg); MArgs.emplace_back(detail::kernel_param_kind_t::kind_sampler, StoredArg, sizeof(sampler), ArgIndex); } diff --git a/sycl/source/detail/jit_compiler.cpp b/sycl/source/detail/jit_compiler.cpp index e849fb3b57ad0..952482e42d79f 100644 --- a/sycl/source/detail/jit_compiler.cpp +++ b/sycl/source/detail/jit_compiler.cpp @@ -468,15 +468,6 @@ detectIdenticalParameter(std::vector &Params, ArgDesc Arg) { return Params.end(); } -template >> -F *storePlainArg(std::vector> &ArgStorage, T &&Arg) { - ArgStorage.emplace_back(sizeof(T)); - auto Storage = reinterpret_cast(ArgStorage.back().data()); - *Storage = Arg; - return Storage; -} - void *storePlainArgRaw(std::vector> &ArgStorage, void *ArgPtr, size_t ArgSize) { ArgStorage.emplace_back(ArgSize); @@ -485,6 +476,11 @@ void *storePlainArgRaw(std::vector> &ArgStorage, void *ArgPtr, return Storage; } +template +void *storePlainArg(std::vector> &ArgStorage, T &&Arg) { + return storePlainArgRaw(ArgStorage, &Arg, sizeof(T)); +} + static ParamIterator preProcessArguments( std::vector> &ArgStorage, ParamIterator Arg, PromotionMap &PromotedAccs, @@ -648,10 +644,10 @@ updatePromotedArgs(const ::jit_compiler::SYCLKernelInfo &FusedKernelInfo, Req, Promotion::Local) : 0; range<3> AccessRange{1, 1, LocalSize}; - auto *RangeArg = storePlainArg(FusedArgStorage, AccessRange); + void *RangeArg = storePlainArg(FusedArgStorage, AccessRange); // Use all-zero as the offset id<3> AcessOffset{0, 0, 0}; - auto *OffsetArg = storePlainArg(FusedArgStorage, AcessOffset); + void *OffsetArg = storePlainArg(FusedArgStorage, AcessOffset); // Override the arguments. // 1. Override the pointer with a std-layout argument with 'nullptr' as From c3402c93a71a9406e568e7bb39ea173c968194f4 Mon Sep 17 00:00:00 2001 From: smanna12 Date: Sat, 29 Jun 2024 12:37:31 -0700 Subject: [PATCH 24/40] [SYCL][FPGA] Fix the way we handle duplicate vs conflicting values with loop attributes (#14342) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch improves diagnostic supports by resolving bugs the way we handle duplicate vs conflicting values with the following SYCL FPGA loop attributes: [[intel::max_reinvocation_delay()]] [[intel::initiation_interval()]] [[intel::max_concurrency()]] [[intel::speculated_iterations()]] [[intel::max_interleaving()]] The patch addresses issues in the test case below, which previously missed diagnostics due to a discontinuation in the while loop while checking for duplicate versus conflicting attribute values in the routine CheckForDuplicateAttrs(). Example with `speculated_iterations' attribute:    Before the fix:     [[intel::speculated_iterations(1)]] // expected-note {{previous attribute is here}}     [[intel::speculated_iterations(1)]] // OK     [[intel::speculated_iterations(2)]] // expected-error {{conflicting loop attribute 'speculated_iterations'}}     [[intel::speculated_iterations(4)]] // OK     for (int i = 0; i != 10; ++i) { a[i] = 0; }     After the fix:     [[intel::speculated_iterations(1)]] // expected-note 2 {{previous attribute is here}}     [[intel::speculated_iterations(1)]] // OK     [[intel::speculated_iterations(2)]] // expected-error {{conflicting loop attribute 'speculated_iterations'}}     [[intel::speculated_iterations(4)]] // expected-error {{conflicting loop attribute 'speculated_iterations'}}     for (int i = 0; i != 10; ++i) { a[i] = 0; } Signed-off-by: Soumi Manna --------- Signed-off-by: Soumi Manna --- clang/lib/Sema/SemaStmtAttr.cpp | 1 - clang/test/SemaSYCL/intel-fpga-loops.cpp | 77 ++++++++++++++++++++++-- 2 files changed, 71 insertions(+), 7 deletions(-) diff --git a/clang/lib/Sema/SemaStmtAttr.cpp b/clang/lib/Sema/SemaStmtAttr.cpp index a7a7347cfcbab..dcee4506e99f1 100644 --- a/clang/lib/Sema/SemaStmtAttr.cpp +++ b/clang/lib/Sema/SemaStmtAttr.cpp @@ -1025,7 +1025,6 @@ static void CheckForDuplicateAttrs(Sema &S, ArrayRef Attrs) { S.Diag((*LastFoundItr)->getLocation(), diag::err_loop_attr_conflict) << *FirstItr; S.Diag((*FirstItr)->getLocation(), diag::note_previous_attribute); - return; } } } diff --git a/clang/test/SemaSYCL/intel-fpga-loops.cpp b/clang/test/SemaSYCL/intel-fpga-loops.cpp index 2b1a6d5043c1a..53d3e61f69c57 100644 --- a/clang/test/SemaSYCL/intel-fpga-loops.cpp +++ b/clang/test/SemaSYCL/intel-fpga-loops.cpp @@ -311,6 +311,30 @@ void zoo() { [[intel::speculated_iterations(2)]] for (int i = 0; i != 10; ++i) a[i] = 0; + [[intel::speculated_iterations(1)]] // expected-note 2{{previous attribute is here}} + [[intel::speculated_iterations(1)]] // OK + [[intel::speculated_iterations(2)]] // expected-error {{conflicting loop attribute 'speculated_iterations'}} + [[intel::speculated_iterations(4)]] // expected-error {{conflicting loop attribute 'speculated_iterations'}} + for (int i = 0; i != 10; ++i) { a[i] = 0; } + + [[intel::max_interleaving(0)]] // expected-note 2{{previous attribute is here}} + [[intel::max_interleaving(0)]] // OK + [[intel::max_interleaving(1)]] // expected-error {{conflicting loop attribute 'max_interleaving'}} + [[intel::max_interleaving(1)]] // expected-error {{conflicting loop attribute 'max_interleaving'}} + for (int i = 0; i != 10; ++i) { a[i] = 0; } + + [[intel::max_concurrency(10)]] // expected-note 2{{previous attribute is here}} + [[intel::max_concurrency(10)]] // OK + [[intel::max_concurrency(20)]] // expected-error {{conflicting loop attribute 'max_concurrency'}} + [[intel::max_concurrency(40)]] // expected-error {{conflicting loop attribute 'max_concurrency'}} + for (int i = 0; i != 10; ++i) { a[i] = 0; } + + [[intel::initiation_interval(10)]] // expected-note 2{{previous attribute is here}} + [[intel::initiation_interval(10)]] // OK + [[intel::initiation_interval(20)]] // expected-error {{conflicting loop attribute 'initiation_interval'}} + [[intel::initiation_interval(40)]] // expected-error {{conflicting loop attribute 'initiation_interval'}} + for (int i = 0; i != 10; ++i) { a[i] = 0; } + [[intel::ivdep]] // expected-warning@+2 {{ignoring redundant Intel FPGA loop attribute 'ivdep': safelen INF >= safelen INF}} // expected-note@-2 {{previous attribute is here}} @@ -383,6 +407,12 @@ void zoo() { [[intel::max_reinvocation_delay(20)]] for (int i = 0; i != 10; ++i) a[i] = 0; + [[intel::max_reinvocation_delay(10)]] // expected-note 2{{previous attribute is here}} + [[intel::max_reinvocation_delay(10)]] // OK + [[intel::max_reinvocation_delay(20)]] // expected-error {{conflicting loop attribute 'max_reinvocation_delay'}} + [[intel::max_reinvocation_delay(40)]] // expected-error {{conflicting loop attribute 'max_reinvocation_delay'}} + for (int i = 0; i != 10; ++i) { a[i] = 0; } + [[intel::enable_loop_pipelining]] // expected-error@+1 {{duplicate Intel FPGA loop attribute 'enable_loop_pipelining'}} [[intel::enable_loop_pipelining]] for (int i = 0; i != 10; ++i) @@ -476,7 +506,7 @@ void ivdep_dependent() { }; } -template +template void ii_dependent() { int a[10]; // expected-error@+1 {{'initiation_interval' attribute requires a positive integral compile time constant expression}} @@ -491,6 +521,13 @@ void ii_dependent() { [[intel::initiation_interval(A)]] // expected-note {{previous attribute is here}} [[intel::initiation_interval(B)]] for (int i = 0; i != 10; ++i) a[i] = 0; + + [[intel::initiation_interval(A)]] // expected-note 2{{previous attribute is here}} + [[intel::initiation_interval(A)]] // OK + [[intel::initiation_interval(B)]] // expected-error {{conflicting loop attribute 'initiation_interval'}} + [[intel::initiation_interval(D)]] // expected-error {{conflicting loop attribute 'initiation_interval'}} + for (int i = 0; i != 10; ++i) { a[i] = 0; } + } template @@ -515,6 +552,13 @@ void max_concurrency_dependent() { // max_concurrency attribute accepts value 0. [[intel::max_concurrency(D)]] for (int i = 0; i != 10; ++i) a[i] = 0; + + [[intel::max_concurrency(D)]] // expected-note 2{{previous attribute is here}} + [[intel::max_concurrency(D)]] // OK + [[intel::max_concurrency(A)]] // expected-error {{conflicting loop attribute 'max_concurrency'}} + [[intel::max_concurrency(B)]] // expected-error {{conflicting loop attribute 'max_concurrency'}} + for (int i = 0; i != 10; ++i) { a[i] = 0; } + } template @@ -540,9 +584,16 @@ void max_interleaving_dependent() { [[intel::max_interleaving(D)]] [[intel::max_interleaving(D)]] for (int i = 0; i != 10; ++i) a[i] = 0; + + [[intel::max_interleaving(D)]] // expected-note 2{{previous attribute is here}} + [[intel::max_interleaving(D)]] // OK + [[intel::max_interleaving(C)]] // expected-error {{conflicting loop attribute 'max_interleaving'}} + [[intel::max_interleaving(C)]] // expected-error {{conflicting loop attribute 'max_interleaving'}} + for (int i = 0; i != 10; ++i) { a[i] = 0; } + } -template +template void speculated_iterations_dependent() { int a[10]; // expected-error@+1 {{'speculated_iterations' attribute requires a non-negative integral compile time constant expression}} @@ -561,6 +612,13 @@ void speculated_iterations_dependent() { [[intel::speculated_iterations(B)]] [[intel::speculated_iterations(B)]] for (int i = 0; i != 10; ++i) a[i] = 0; + + [[intel::speculated_iterations(A)]] // expected-note 2{{previous attribute is here}} + [[intel::speculated_iterations(A)]] // OK + [[intel::speculated_iterations(B)]] // expected-error {{conflicting loop attribute 'speculated_iterations'}} + [[intel::speculated_iterations(E)]] // expected-error {{conflicting loop attribute 'speculated_iterations'}} + for (int i = 0; i != 10; ++i) { a[i] = 0; } + } template @@ -624,7 +682,7 @@ void loop_count_control_dependent() { a[i] = 0; } -template +template void max_reinvocation_delay_dependent() { int a[10]; // expected-error@+1 {{'max_reinvocation_delay' attribute requires a positive integral compile time constant expression}} @@ -639,6 +697,13 @@ void max_reinvocation_delay_dependent() { [[intel::max_reinvocation_delay(A)]] [[intel::max_reinvocation_delay(A)]] for (int i = 0; i != 10; ++i) a[i] = 0; + + [[intel::max_reinvocation_delay(A)]] // expected-note 2{{previous attribute is here}} + [[intel::max_reinvocation_delay(A)]] // OK + [[intel::max_reinvocation_delay(B)]] // expected-error {{conflicting loop attribute 'max_reinvocation_delay'}} + [[intel::max_reinvocation_delay(D)]] // expected-error {{conflicting loop attribute 'max_reinvocation_delay'}} + for (int i = 0; i != 10; ++i) { a[i] = 0; } + } void check_max_concurrency_expression() { @@ -815,14 +880,14 @@ int main() { //expected-note@-1 +{{in instantiation of function template specialization}} ivdep_dependent<2, 4, -1>(); //expected-note@-1 +{{in instantiation of function template specialization}} - ii_dependent<2, 4, -1>(); + ii_dependent<2, 4, -1, 8>(); //expected-note@-1 +{{in instantiation of function template specialization}} max_concurrency_dependent<1, 4, -2, 0>(); // expected-note{{in instantiation of function template specialization 'max_concurrency_dependent<1, 4, -2, 0>' requested here}} max_interleaving_dependent<-1, 4, 0, 1>(); // expected-note{{in instantiation of function template specialization 'max_interleaving_dependent<-1, 4, 0, 1>' requested here}} - speculated_iterations_dependent<1, 8, -3, 0>(); // expected-note{{in instantiation of function template specialization 'speculated_iterations_dependent<1, 8, -3, 0>' requested here}} + speculated_iterations_dependent<1, 8, -3, 0, 16>(); // expected-note{{in instantiation of function template specialization 'speculated_iterations_dependent<1, 8, -3, 0, 16>' requested here}} loop_coalesce_dependent<-1, 4, 0>(); // expected-note{{in instantiation of function template specialization 'loop_coalesce_dependent<-1, 4, 0>' requested here}} loop_count_control_dependent<3, 2, -1>(); // expected-note{{in instantiation of function template specialization 'loop_count_control_dependent<3, 2, -1>' requested here}} - max_reinvocation_delay_dependent<1, 3, 0>(); // expected-note{{in instantiation of function template specialization 'max_reinvocation_delay_dependent<1, 3, 0>' requested here}} + max_reinvocation_delay_dependent<1, 3, 0, 6>(); // expected-note{{in instantiation of function template specialization 'max_reinvocation_delay_dependent<1, 3, 0, 6>' requested here}} check_max_concurrency_expression(); check_max_interleaving_expression(); check_speculated_iterations_expression(); From 860aa1a708a6b6b360c2e4b1593d58b8e0495d02 Mon Sep 17 00:00:00 2001 From: aelovikov-intel Date: Sun, 30 Jun 2024 21:52:20 -0700 Subject: [PATCH 25/40] [SYCL] Use SYCL 2020 exception in `DispatchHostTask::operator()` (#14356) Part of getting rid of the deprecated `exception::get_cl_code`. --- sycl/source/detail/scheduler/commands.cpp | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index b71ce3d41882e..78c52327ff289 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -321,7 +321,7 @@ class DispatchHostTask { ExecCGCommand *MThisCmd; std::vector MReqToMem; - pi_result waitForEvents() const { + bool waitForEvents() const { std::map> RequiredEventsPerPlugin; @@ -343,14 +343,14 @@ class DispatchHostTask { try { PluginWithEvents.first->call(RawEvents.size(), RawEvents.data()); - } catch (const sycl::exception &E) { + } catch (const sycl::exception &) { CGHostTask &HostTask = static_cast(MThisCmd->getCG()); HostTask.MQueue->reportAsyncException(std::current_exception()); - return (pi_result)E.get_cl_code(); + return false; } catch (...) { CGHostTask &HostTask = static_cast(MThisCmd->getCG()); HostTask.MQueue->reportAsyncException(std::current_exception()); - return PI_ERROR_UNKNOWN; + return false; } } @@ -360,7 +360,7 @@ class DispatchHostTask { Event->waitInternal(); } - return PI_SUCCESS; + return true; } public: @@ -385,11 +385,10 @@ class DispatchHostTask { } #endif - pi_result WaitResult = waitForEvents(); - if (WaitResult != PI_SUCCESS) { - std::exception_ptr EPtr = std::make_exception_ptr(sycl::runtime_error( - std::string("Couldn't wait for host-task's dependencies"), - WaitResult)); + if (!waitForEvents()) { + std::exception_ptr EPtr = std::make_exception_ptr(sycl::exception( + make_error_code(errc::runtime), + std::string("Couldn't wait for host-task's dependencies"))); HostTask.MQueue->reportAsyncException(EPtr); // reset host-task's lambda and quit HostTask.MHostTask.reset(); From 1e0ef19f6d0fb472bc19afe1e063bf15a393e5b8 Mon Sep 17 00:00:00 2001 From: aelovikov-intel Date: Sun, 30 Jun 2024 21:52:54 -0700 Subject: [PATCH 26/40] [SYCL] Remove `__SYCL_EXPORT` from headers under `sycl/source` (#13733) Those are implementation details and shouldn't be exposed through ABI. --- sycl/source/detail/accessor_impl.hpp | 4 +- sycl/source/detail/buffer_impl.hpp | 2 +- sycl/source/detail/image_impl.hpp | 15 +- .../kernel_compiler_opencl.hpp | 1 - sycl/source/detail/memory_manager.hpp | 2 +- sycl/source/detail/sampler_impl.hpp | 2 +- sycl/source/detail/stream_impl.hpp | 2 +- sycl/source/detail/sycl_mem_obj_t.hpp | 2 +- sycl/test/abi/sycl_symbols_linux.dump | 92 --------- sycl/test/abi/sycl_symbols_windows.dump | 177 ------------------ 10 files changed, 14 insertions(+), 285 deletions(-) diff --git a/sycl/source/detail/accessor_impl.hpp b/sycl/source/detail/accessor_impl.hpp index b564f28d7f334..377dca7cc90ee 100644 --- a/sycl/source/detail/accessor_impl.hpp +++ b/sycl/source/detail/accessor_impl.hpp @@ -39,7 +39,7 @@ class SYCLMemObjI; class Command; -class __SYCL_EXPORT AccessorImplHost { +class AccessorImplHost { public: AccessorImplHost(id<3> Offset, range<3> AccessRange, range<3> MemoryRange, access::mode AccessMode, void *SYCLMemObject, int Dims, @@ -127,7 +127,7 @@ class __SYCL_EXPORT AccessorImplHost { using AccessorImplPtr = std::shared_ptr; -class __SYCL_EXPORT LocalAccessorImplHost { +class LocalAccessorImplHost { public: // Allocate ElemSize more data to have sufficient padding to enforce // alignment. diff --git a/sycl/source/detail/buffer_impl.hpp b/sycl/source/detail/buffer_impl.hpp index 9389b760e7737..8408cb0da38aa 100644 --- a/sycl/source/detail/buffer_impl.hpp +++ b/sycl/source/detail/buffer_impl.hpp @@ -38,7 +38,7 @@ class host_accessor; namespace detail { -class __SYCL_EXPORT buffer_impl final : public SYCLMemObjT { +class buffer_impl final : public SYCLMemObjT { using BaseT = SYCLMemObjT; using typename BaseT::MemObjType; diff --git a/sycl/source/detail/image_impl.hpp b/sycl/source/detail/image_impl.hpp index 386a4636b5fe5..d526ce19281a4 100644 --- a/sycl/source/detail/image_impl.hpp +++ b/sycl/source/detail/image_impl.hpp @@ -40,25 +40,24 @@ namespace detail { using image_allocator = aligned_allocator; // utility function: Returns the Number of Channels for a given Order. -__SYCL_EXPORT uint8_t getImageNumberChannels(image_channel_order Order); +uint8_t getImageNumberChannels(image_channel_order Order); // utility function: Returns the number of bytes per image element -__SYCL_EXPORT uint8_t getImageElementSize(uint8_t NumChannels, - image_channel_type Type); +uint8_t getImageElementSize(uint8_t NumChannels, image_channel_type Type); -__SYCL_EXPORT sycl::detail::pi::PiMemImageChannelOrder +sycl::detail::pi::PiMemImageChannelOrder convertChannelOrder(image_channel_order Order); -__SYCL_EXPORT image_channel_order +image_channel_order convertChannelOrder(sycl::detail::pi::PiMemImageChannelOrder Order); -__SYCL_EXPORT sycl::detail::pi::PiMemImageChannelType +sycl::detail::pi::PiMemImageChannelType convertChannelType(image_channel_type Type); -__SYCL_EXPORT image_channel_type +image_channel_type convertChannelType(sycl::detail::pi::PiMemImageChannelType Type); -class __SYCL_EXPORT image_impl final : public SYCLMemObjT { +class image_impl final : public SYCLMemObjT { using BaseT = SYCLMemObjT; using typename BaseT::MemObjType; diff --git a/sycl/source/detail/kernel_compiler/kernel_compiler_opencl.hpp b/sycl/source/detail/kernel_compiler/kernel_compiler_opencl.hpp index f457256884cda..d618c86e07d97 100644 --- a/sycl/source/detail/kernel_compiler/kernel_compiler_opencl.hpp +++ b/sycl/source/detail/kernel_compiler/kernel_compiler_opencl.hpp @@ -9,7 +9,6 @@ #pragma once #include -#include // for __SYCL_EXPORT #include #include diff --git a/sycl/source/detail/memory_manager.hpp b/sycl/source/detail/memory_manager.hpp index 805673d40d359..06ba2e2a25313 100644 --- a/sycl/source/detail/memory_manager.hpp +++ b/sycl/source/detail/memory_manager.hpp @@ -33,7 +33,7 @@ using ContextImplPtr = std::shared_ptr; // The class contains methods that work with memory. All operations with // device memory should go through MemoryManager. -class __SYCL_EXPORT MemoryManager { +class MemoryManager { public: // The following method releases memory allocation of memory object. // Depending on the context it releases memory on host or on device. diff --git a/sycl/source/detail/sampler_impl.hpp b/sycl/source/detail/sampler_impl.hpp index 7b3878e149ff9..4ae318c29ffbe 100644 --- a/sycl/source/detail/sampler_impl.hpp +++ b/sycl/source/detail/sampler_impl.hpp @@ -25,7 +25,7 @@ enum class filtering_mode : unsigned int; enum class coordinate_normalization_mode : unsigned int; namespace detail { -class __SYCL_EXPORT sampler_impl { +class sampler_impl { public: sampler_impl(coordinate_normalization_mode normalizationMode, addressing_mode addressingMode, filtering_mode filteringMode, diff --git a/sycl/source/detail/stream_impl.hpp b/sycl/source/detail/stream_impl.hpp index ab9565e0d1641..1578a8d7cb508 100644 --- a/sycl/source/detail/stream_impl.hpp +++ b/sycl/source/detail/stream_impl.hpp @@ -22,7 +22,7 @@ namespace sycl { inline namespace _V1 { namespace detail { -class __SYCL_EXPORT stream_impl { +class stream_impl { public: stream_impl(size_t BufferSize, size_t MaxStatementSize, const property_list &PropList); diff --git a/sycl/source/detail/sycl_mem_obj_t.hpp b/sycl/source/detail/sycl_mem_obj_t.hpp index f67453d8ac221..d86468f4dc497 100644 --- a/sycl/source/detail/sycl_mem_obj_t.hpp +++ b/sycl/source/detail/sycl_mem_obj_t.hpp @@ -37,7 +37,7 @@ using ContextImplPtr = std::shared_ptr; using EventImplPtr = std::shared_ptr; // The class serves as a base for all SYCL memory objects. -class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI { +class SYCLMemObjT : public SYCLMemObjI { // The check for output iterator is commented out as it blocks set_final_data // with void * argument to be used. diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index a852910bd4431..6391a69978a56 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3154,37 +3154,10 @@ _ZN4sycl3_V16detail10build_implERKNS0_13kernel_bundleILNS0_12bundle_stateE0EEERK _ZN4sycl3_V16detail10frexp_implENS1_9half_impl4halfEPi _ZN4sycl3_V16detail10frexp_implEdPi _ZN4sycl3_V16detail10frexp_implEfPi -_ZN4sycl3_V16detail10image_impl10getDevicesESt10shared_ptrINS1_12context_implEE -_ZN4sycl3_V16detail10image_impl11allocateMemESt10shared_ptrINS1_12context_implEEbPvRP9_pi_event -_ZN4sycl3_V16detail10image_impl14checkImageDescERK14_pi_image_descSt10shared_ptrINS1_12context_implEEPv -_ZN4sycl3_V16detail10image_impl16checkImageFormatERK16_pi_image_formatSt10shared_ptrINS1_12context_implEE -_ZN4sycl3_V16detail10image_impl34sampledImageDestructorNotificationEPv -_ZN4sycl3_V16detail10image_impl35sampledImageConstructorNotificationERKNS1_13code_locationEPvPKvjPmNS0_12image_formatERKNS0_13image_samplerE -_ZN4sycl3_V16detail10image_impl36unsampledImageDestructorNotificationEPv -_ZN4sycl3_V16detail10image_impl37unsampledImageConstructorNotificationERKNS1_13code_locationEPvPKvjPmNS0_12image_formatE -_ZN4sycl3_V16detail10image_implC1EP7_cl_memRKNS0_7contextENS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteISA_EEh -_ZN4sycl3_V16detail10image_implC1EmRKNS0_7contextENS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EEhNS0_19image_channel_orderENS0_18image_channel_typeEbNS0_5rangeILi3EEE -_ZN4sycl3_V16detail10image_implC2EP7_cl_memRKNS0_7contextENS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteISA_EEh -_ZN4sycl3_V16detail10image_implC2EmRKNS0_7contextENS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EEhNS0_19image_channel_orderENS0_18image_channel_typeEbNS0_5rangeILi3EEE _ZN4sycl3_V16detail10make_eventEmRKNS0_7contextENS0_7backendE _ZN4sycl3_V16detail10make_eventEmRKNS0_7contextEbNS0_7backendE _ZN4sycl3_V16detail10make_queueEmiRKNS0_7contextEPKNS0_6deviceEbRKNS0_13property_listERKSt8functionIFvNS0_14exception_listEEENS0_7backendE _ZN4sycl3_V16detail10waitEventsESt6vectorINS0_5eventESaIS3_EE -_ZN4sycl3_V16detail11SYCLMemObjT10releaseMemESt10shared_ptrINS1_12context_implEEPv -_ZN4sycl3_V16detail11SYCLMemObjT16determineHostPtrERKSt10shared_ptrINS1_12context_implEEbRPvRb -_ZN4sycl3_V16detail11SYCLMemObjT16updateHostMemoryEPv -_ZN4sycl3_V16detail11SYCLMemObjT16updateHostMemoryEv -_ZN4sycl3_V16detail11SYCLMemObjT20getBufSizeForContextERKSt10shared_ptrINS1_12context_implEEm -_ZN4sycl3_V16detail11SYCLMemObjT27handleWriteAccessorCreationEv -_ZN4sycl3_V16detail11SYCLMemObjTC1EmRKNS0_7contextEbNS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EE -_ZN4sycl3_V16detail11SYCLMemObjTC1EmRKNS0_7contextEbNS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EE23_pi_image_channel_order22_pi_image_channel_typeNS0_5rangeILi3EEEjm -_ZN4sycl3_V16detail11SYCLMemObjTC1EmRKNS0_7contextEmNS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EE -_ZN4sycl3_V16detail11SYCLMemObjTC2EmRKNS0_7contextEbNS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EE -_ZN4sycl3_V16detail11SYCLMemObjTC2EmRKNS0_7contextEbNS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EE23_pi_image_channel_order22_pi_image_channel_typeNS0_5rangeILi3EEEjm -_ZN4sycl3_V16detail11SYCLMemObjTC2EmRKNS0_7contextEmNS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EE -_ZN4sycl3_V16detail11buffer_impl11allocateMemESt10shared_ptrINS1_12context_implEEbPvRP9_pi_event -_ZN4sycl3_V16detail11buffer_impl22destructorNotificationEPv -_ZN4sycl3_V16detail11buffer_impl23constructorNotificationERKNS1_13code_locationEPvPKvS8_jjPm _ZN4sycl3_V16detail11image_plain14set_write_backEb _ZN4sycl3_V16detail11image_plain23set_final_data_internalERKSt8functionIFvRKS3_IFvPvEEEE _ZN4sycl3_V16detail11image_plain23set_final_data_internalEv @@ -3227,13 +3200,6 @@ _ZN4sycl3_V16detail11remquo_implEffPi _ZN4sycl3_V16detail11sincos_implENS1_9half_impl4halfEPS3_ _ZN4sycl3_V16detail11sincos_implEdPd _ZN4sycl3_V16detail11sincos_implEfPf -_ZN4sycl3_V16detail11stream_impl14initStreamHostESt10shared_ptrINS1_10queue_implEE -_ZN4sycl3_V16detail11stream_impl15accessGlobalBufERNS0_7handlerE -_ZN4sycl3_V16detail11stream_impl18accessGlobalOffsetERNS0_7handlerE -_ZN4sycl3_V16detail11stream_impl20accessGlobalFlushBufERNS0_7handlerE -_ZN4sycl3_V16detail11stream_impl5flushERKSt10shared_ptrINS1_10event_implEE -_ZN4sycl3_V16detail11stream_implC1EmmRKNS0_13property_listE -_ZN4sycl3_V16detail11stream_implC2EmmRKNS0_13property_listE _ZN4sycl3_V16detail12buffer_plain14deleteAccPropsERKNS1_16PropWithDataKindE _ZN4sycl3_V16detail12buffer_plain14set_write_backEb _ZN4sycl3_V16detail12buffer_plain23constructorNotificationERKNS1_13code_locationEPvPKvS8_jjPm @@ -3255,45 +3221,6 @@ _ZN4sycl3_V16detail12buffer_plainC2EmmRKNS0_13property_listESt10unique_ptrINS1_1 _ZN4sycl3_V16detail12compile_implERKNS0_13kernel_bundleILNS0_12bundle_stateE0EEERKSt6vectorINS0_6deviceESaIS8_EERKNS0_13property_listE _ZN4sycl3_V16detail12isOutOfRangeENS0_3vecIiLi4EEENS0_15addressing_modeENS0_5rangeILi3EEE _ZN4sycl3_V16detail12make_contextEmRKSt8functionIFvNS0_14exception_listEEENS0_7backendEbRKSt6vectorINS0_6deviceESaISA_EE -_ZN4sycl3_V16detail12sampler_impl18getOrCreateSamplerERKNS0_7contextE -_ZN4sycl3_V16detail12sampler_implC1ENS0_29coordinate_normalization_modeENS0_15addressing_modeENS0_14filtering_modeERKNS0_13property_listE -_ZN4sycl3_V16detail12sampler_implC1EP11_cl_samplerRKNS0_7contextE -_ZN4sycl3_V16detail12sampler_implC2ENS0_29coordinate_normalization_modeENS0_15addressing_modeENS0_14filtering_modeERKNS0_13property_listE -_ZN4sycl3_V16detail12sampler_implC2EP11_cl_samplerRKNS0_7contextE -_ZN4sycl3_V16detail12sampler_implD1Ev -_ZN4sycl3_V16detail12sampler_implD2Ev -_ZN4sycl3_V16detail13MemoryManager10advise_usmEPKvSt10shared_ptrINS1_10queue_implEEm14_pi_mem_adviceSt6vectorIP9_pi_eventSaISB_EEPSB_RKS5_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager11copy_2d_usmEPKvmSt10shared_ptrINS1_10queue_implEEPvmmmSt6vectorIP9_pi_eventSaISB_EEPSB_RKS5_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager11fill_2d_usmEPvSt10shared_ptrINS1_10queue_implEEmmmRKSt6vectorIcSaIcEES7_IP9_pi_eventSaISD_EEPSD_RKS4_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager12prefetch_usmEPvSt10shared_ptrINS1_10queue_implEEmSt6vectorIP9_pi_eventSaIS9_EEPS9_RKS4_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager13memset_2d_usmEPvSt10shared_ptrINS1_10queue_implEEmmmcSt6vectorIP9_pi_eventSaIS9_EEPS9_RKS4_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager13releaseMemObjESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEPvS8_ -_ZN4sycl3_V16detail13MemoryManager16allocateMemImageESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEPvbmRK14_pi_image_descRK16_pi_image_formatRKS3_INS1_10event_implEERKS5_RKNS0_13property_listERP9_pi_event -_ZN4sycl3_V16detail13MemoryManager17allocateMemBufferESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEPvbmRKS3_INS1_10event_implEERKS5_RKNS0_13property_listERP9_pi_event -_ZN4sycl3_V16detail13MemoryManager18allocateHostMemoryEPNS1_11SYCLMemObjIEPvbmRKNS0_13property_listE -_ZN4sycl3_V16detail13MemoryManager19allocateImageObjectESt10shared_ptrINS1_12context_implEEPvbRK14_pi_image_descRK16_pi_image_formatRKNS0_13property_listE -_ZN4sycl3_V16detail13MemoryManager19copy_image_bindlessEPvSt10shared_ptrINS1_10queue_implEES3_RK14_pi_image_descRK16_pi_image_format20_pi_image_copy_flags22pi_image_offset_structSE_22pi_image_region_structSF_RKSt6vectorIP9_pi_eventSaISI_EEPSI_ -_ZN4sycl3_V16detail13MemoryManager20allocateBufferObjectESt10shared_ptrINS1_12context_implEEPvbmRKNS0_13property_listE -_ZN4sycl3_V16detail13MemoryManager20allocateMemSubBufferESt10shared_ptrINS1_12context_implEEPvmmNS0_5rangeILi3EEESt6vectorIS3_INS1_10event_implEESaISB_EERP9_pi_event -_ZN4sycl3_V16detail13MemoryManager21copy_to_device_globalEPKvbSt10shared_ptrINS1_10queue_implEEmmS4_RKSt6vectorIP9_pi_eventSaISA_EEPSA_RKS5_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager23copy_from_device_globalEPKvbSt10shared_ptrINS1_10queue_implEEmmPvRKSt6vectorIP9_pi_eventSaISB_EEPSB_RKS5_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager24allocateInteropMemObjectESt10shared_ptrINS1_12context_implEEPvRKS3_INS1_10event_implEERKS5_RKNS0_13property_listERP9_pi_event -_ZN4sycl3_V16detail13MemoryManager26ext_oneapi_fill_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPNS1_11SYCLMemObjIEPvmPKcjNS0_5rangeILi3EEESE_NS0_2idILi3EEEjSt6vectorIjSaIjEEPj -_ZN4sycl3_V16detail13MemoryManager29ext_oneapi_copyD2D_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPNS1_11SYCLMemObjIEPvjNS0_5rangeILi3EEESC_NS0_2idILi3EEEjSA_jSC_SC_SE_jSt6vectorIjSaIjEEPj -_ZN4sycl3_V16detail13MemoryManager29ext_oneapi_copyD2H_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPNS1_11SYCLMemObjIEPvjNS0_5rangeILi3EEESC_NS0_2idILi3EEEjPcjSC_SE_jSt6vectorIjSaIjEEPj -_ZN4sycl3_V16detail13MemoryManager29ext_oneapi_copyH2D_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPNS1_11SYCLMemObjIEPcjNS0_5rangeILi3EEENS0_2idILi3EEEjPvjSC_SC_SE_jSt6vectorIjSaIjEEPj -_ZN4sycl3_V16detail13MemoryManager30ext_oneapi_copy_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEPKvP22_pi_ext_command_buffermPvSt6vectorIjSaIjEEPj -_ZN4sycl3_V16detail13MemoryManager30ext_oneapi_fill_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPvmiSt6vectorIjSaIjEEPj -_ZN4sycl3_V16detail13MemoryManager32ext_oneapi_advise_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPKvm14_pi_mem_adviceSt6vectorIjSaIjEEPj -_ZN4sycl3_V16detail13MemoryManager34ext_oneapi_prefetch_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPvmSt6vectorIjSaIjEEPj -_ZN4sycl3_V16detail13MemoryManager3mapEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEENS0_6access4modeEjNS0_5rangeILi3EEESC_NS0_2idILi3EEEjSt6vectorIP9_pi_eventSaISH_EERSH_ -_ZN4sycl3_V16detail13MemoryManager4copyEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEEjNS0_5rangeILi3EEESA_NS0_2idILi3EEEjS5_S8_jSA_SA_SC_jSt6vectorIP9_pi_eventSaISF_EERSF_RKS6_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager4fillEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEEmPKcjNS0_5rangeILi3EEESC_NS0_2idILi3EEEjSt6vectorIP9_pi_eventSaISH_EERSH_RKS6_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager5unmapEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEES5_St6vectorIP9_pi_eventSaISB_EERSB_ -_ZN4sycl3_V16detail13MemoryManager7releaseESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEPvSt6vectorIS3_INS1_10event_implEESaISB_EERP9_pi_event -_ZN4sycl3_V16detail13MemoryManager8allocateESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEbPvSt6vectorIS3_INS1_10event_implEESaISB_EERP9_pi_event -_ZN4sycl3_V16detail13MemoryManager8copy_usmEPKvSt10shared_ptrINS1_10queue_implEEmPvSt6vectorIP9_pi_eventSaISB_EEPSB_RKS5_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager8fill_usmEPvSt10shared_ptrINS1_10queue_implEEmiSt6vectorIP9_pi_eventSaIS9_EEPS9_RKS4_INS1_10event_implEE _ZN4sycl3_V16detail13host_pipe_map3addEPKvPKc _ZN4sycl3_V16detail13lgamma_r_implENS1_9half_impl4halfEPi _ZN4sycl3_V16detail13lgamma_r_implEdPi @@ -3320,23 +3247,15 @@ _ZN4sycl3_V16detail16AccessorBaseHostC1ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6a _ZN4sycl3_V16detail16AccessorBaseHostC1ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6access4modeEPviimbRKNS0_13property_listE _ZN4sycl3_V16detail16AccessorBaseHostC2ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6access4modeEPviibmbRKNS0_13property_listE _ZN4sycl3_V16detail16AccessorBaseHostC2ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6access4modeEPviimbRKNS0_13property_listE -_ZN4sycl3_V16detail16AccessorImplHost6resizeEm -_ZN4sycl3_V16detail16AccessorImplHostD1Ev -_ZN4sycl3_V16detail16AccessorImplHostD2Ev _ZN4sycl3_V16detail16reduGetMaxWGSizeESt10shared_ptrINS1_10queue_implEEm _ZN4sycl3_V16detail17HostProfilingInfo3endEv _ZN4sycl3_V16detail17HostProfilingInfo5startEv _ZN4sycl3_V16detail17device_global_map3addEPKvPKc _ZN4sycl3_V16detail17reduComputeWGSizeEmmRm -_ZN4sycl3_V16detail18convertChannelTypeE22_pi_image_channel_type -_ZN4sycl3_V16detail18convertChannelTypeENS0_18image_channel_typeE _ZN4sycl3_V16detail18get_kernel_id_implENS1_11string_viewE _ZN4sycl3_V16detail18make_kernel_bundleEmRKNS0_7contextENS0_12bundle_stateENS0_7backendE _ZN4sycl3_V16detail18make_kernel_bundleEmRKNS0_7contextEbNS0_12bundle_stateENS0_7backendE _ZN4sycl3_V16detail18stringifyErrorCodeEi -_ZN4sycl3_V16detail19convertChannelOrderE23_pi_image_channel_order -_ZN4sycl3_V16detail19convertChannelOrderENS0_19image_channel_orderE -_ZN4sycl3_V16detail19getImageElementSizeEhNS0_18image_channel_typeE _ZN4sycl3_V16detail19kernel_bundle_plain21ext_oneapi_get_kernelERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE _ZN4sycl3_V16detail19kernel_bundle_plain21ext_oneapi_has_kernelERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE _ZN4sycl3_V16detail19kernel_bundle_plain32set_specialization_constant_implEPKcPvm @@ -3352,7 +3271,6 @@ _ZN4sycl3_V16detail21LocalAccessorBaseHost7getSizeEv _ZN4sycl3_V16detail21LocalAccessorBaseHostC1ENS0_5rangeILi3EEEiiRKNS0_13property_listE _ZN4sycl3_V16detail21LocalAccessorBaseHostC2ENS0_5rangeILi3EEEiiRKNS0_13property_listE _ZN4sycl3_V16detail22addHostAccessorAndWaitEPNS1_16AccessorImplHostE -_ZN4sycl3_V16detail22getImageNumberChannelsENS0_19image_channel_orderE _ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EENS0_12bundle_stateE _ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EENS0_12bundle_stateERKSt8functionIFbRKSt10shared_ptrINS1_17device_image_implEEEE _ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EERKS5_INS0_9kernel_idESaISB_EENS0_12bundle_stateE @@ -3742,11 +3660,6 @@ _ZNK4sycl3_V15queue9getNativeERi _ZNK4sycl3_V16ONEAPI15filter_selector13select_deviceEv _ZNK4sycl3_V16ONEAPI15filter_selector5resetEv _ZNK4sycl3_V16ONEAPI15filter_selectorclERKNS0_6deviceE -_ZNK4sycl3_V16detail11SYCLMemObjT18detachMemoryObjectERKSt10shared_ptrIS2_E -_ZNK4sycl3_V16detail11SYCLMemObjT9getPluginEv -_ZNK4sycl3_V16detail11SYCLMemObjT9isInteropEv -_ZNK4sycl3_V16detail11buffer_impl15getNativeVectorENS0_7backendE -_ZNK4sycl3_V16detail11buffer_impl16addInteropObjectERSt6vectorImSaImEE _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device10gpu_slicesEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device11free_memoryEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device11pci_addressEEENT_11return_typeEv @@ -3827,8 +3740,6 @@ _ZNK4sycl3_V16detail11image_plain8get_sizeEv _ZNK4sycl3_V16detail11image_plain9get_countEv _ZNK4sycl3_V16detail11image_plain9get_pitchEv _ZNK4sycl3_V16detail11image_plain9get_rangeEv -_ZNK4sycl3_V16detail11stream_impl25get_work_item_buffer_sizeEv -_ZNK4sycl3_V16detail11stream_impl4sizeEv _ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_3ext6oneapi4cuda8property7context19use_primary_contextEEET_v _ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_3ext6oneapi8property5queue12priority_lowEEET_v _ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_3ext6oneapi8property5queue13priority_highEEET_v @@ -3867,9 +3778,6 @@ _ZNK4sycl3_V16detail12buffer_plain13handleReleaseEv _ZNK4sycl3_V16detail12buffer_plain15getNativeVectorENS0_7backendE _ZNK4sycl3_V16detail12buffer_plain22get_allocator_internalEv _ZNK4sycl3_V16detail12buffer_plain7getSizeEv -_ZNK4sycl3_V16detail12sampler_impl18get_filtering_modeEv -_ZNK4sycl3_V16detail12sampler_impl19get_addressing_modeEv -_ZNK4sycl3_V16detail12sampler_impl33get_coordinate_normalization_modeEv _ZNK4sycl3_V16detail16AccessorBaseHost11getElemSizeEv _ZNK4sycl3_V16detail16AccessorBaseHost11getPropListEv _ZNK4sycl3_V16detail16AccessorBaseHost13isPlaceholderEv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index c0d564ebe65e4..bcfdab110778d 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -466,24 +466,12 @@ ??0AccessorBaseHost@detail@_V1@sycl@@QEAA@AEBV0123@@Z ??0AccessorBaseHost@detail@_V1@sycl@@QEAA@V?$id@$02@23@V?$range@$02@23@1W4mode@access@23@PEAXHH_K_NAEBVproperty_list@23@@Z ??0AccessorBaseHost@detail@_V1@sycl@@QEAA@V?$id@$02@23@V?$range@$02@23@1W4mode@access@23@PEAXHH_N_K4AEBVproperty_list@23@@Z -??0AccessorImplHost@detail@_V1@sycl@@QEAA@AEBV0123@@Z -??0AccessorImplHost@detail@_V1@sycl@@QEAA@V?$id@$02@23@V?$range@$02@23@1W4mode@access@23@PEAXHH_K_NAEBVproperty_list@23@@Z -??0AccessorImplHost@detail@_V1@sycl@@QEAA@V?$id@$02@23@V?$range@$02@23@1W4mode@access@23@PEAXHH_N_K4AEBVproperty_list@23@@Z ??0HostProfilingInfo@detail@_V1@sycl@@QEAA@XZ ??0LocalAccessorBaseHost@detail@_V1@sycl@@IEAA@AEBV?$shared_ptr@VLocalAccessorImplHost@detail@_V1@sycl@@@std@@@Z ??0LocalAccessorBaseHost@detail@_V1@sycl@@QEAA@$$QEAV0123@@Z ??0LocalAccessorBaseHost@detail@_V1@sycl@@QEAA@AEBV0123@@Z ??0LocalAccessorBaseHost@detail@_V1@sycl@@QEAA@V?$range@$02@23@HHAEBVproperty_list@23@@Z -??0LocalAccessorImplHost@detail@_V1@sycl@@QEAA@$$QEAV0123@@Z -??0LocalAccessorImplHost@detail@_V1@sycl@@QEAA@AEBV0123@@Z -??0LocalAccessorImplHost@detail@_V1@sycl@@QEAA@V?$range@$02@23@HHAEBVproperty_list@23@@Z ??0SYCLCategory@detail@_V1@sycl@@QEAA@XZ -??0SYCLMemObjT@detail@_V1@sycl@@QEAA@AEBVproperty_list@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@@Z -??0SYCLMemObjT@detail@_V1@sycl@@QEAA@PEAU_cl_mem@@AEBVcontext@23@Vevent@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@@Z -??0SYCLMemObjT@detail@_V1@sycl@@QEAA@_KAEBVcontext@23@_KVevent@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@@Z -??0SYCLMemObjT@detail@_V1@sycl@@QEAA@_KAEBVcontext@23@_NVevent@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@@Z -??0SYCLMemObjT@detail@_V1@sycl@@QEAA@_KAEBVcontext@23@_NVevent@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@W4_pi_image_channel_order@@W4_pi_image_channel_type@@V?$range@$02@23@I0@Z -??0SYCLMemObjT@detail@_V1@sycl@@QEAA@_KAEBVproperty_list@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@@Z ??0SampledImageAccessorBaseHost@detail@_V1@sycl@@IEAA@AEBV?$shared_ptr@VSampledImageAccessorImplHost@detail@_V1@sycl@@@std@@@Z ??0SampledImageAccessorBaseHost@detail@_V1@sycl@@QEAA@$$QEAV0123@@Z ??0SampledImageAccessorBaseHost@detail@_V1@sycl@@QEAA@AEBV0123@@Z @@ -495,13 +483,6 @@ ??0accelerator_selector@_V1@sycl@@QEAA@$$QEAV012@@Z ??0accelerator_selector@_V1@sycl@@QEAA@AEBV012@@Z ??0accelerator_selector@_V1@sycl@@QEAA@XZ -??0buffer_impl@detail@_V1@sycl@@QEAA@AEBV?$function@$$A6AXPEAX@Z@std@@_K_KAEBVproperty_list@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@5@_N@Z -??0buffer_impl@detail@_V1@sycl@@QEAA@AEBV?$shared_ptr@$$CBX@std@@_K_KAEBVproperty_list@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@5@_N@Z -??0buffer_impl@detail@_V1@sycl@@QEAA@PEAU_cl_mem@@AEBVcontext@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@Vevent@23@@Z -??0buffer_impl@detail@_V1@sycl@@QEAA@PEAX_K1AEBVproperty_list@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@@Z -??0buffer_impl@detail@_V1@sycl@@QEAA@PEBX_K1AEBVproperty_list@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@@Z -??0buffer_impl@detail@_V1@sycl@@QEAA@_K0AEBVproperty_list@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@@Z -??0buffer_impl@detail@_V1@sycl@@QEAA@_KAEBVcontext@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@_NVevent@23@@Z ??0buffer_plain@detail@_V1@sycl@@IEAA@AEBV?$function@$$A6AXPEAX@Z@std@@_K_KAEBVproperty_list@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@5@_N@Z ??0buffer_plain@detail@_V1@sycl@@IEAA@AEBV?$shared_ptr@$$CBX@std@@_K_KAEBVproperty_list@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@5@_N@Z ??0buffer_plain@detail@_V1@sycl@@IEAA@AEBV?$shared_ptr@Vbuffer_impl@detail@_V1@sycl@@@std@@@Z @@ -593,19 +574,6 @@ ??0host_selector@_V1@sycl@@QEAA@$$QEAV012@@Z ??0host_selector@_V1@sycl@@QEAA@AEBV012@@Z ??0host_selector@_V1@sycl@@QEAA@XZ -??0image_impl@detail@_V1@sycl@@QEAA@AEBV?$shared_ptr@$$CBX@std@@W4image_channel_order@23@W4image_channel_type@23@AEBV?$range@$02@23@AEBV?$range@$01@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@5@EAEBVproperty_list@23@_N@Z -??0image_impl@detail@_V1@sycl@@QEAA@AEBV?$shared_ptr@$$CBX@std@@W4image_channel_order@23@W4image_channel_type@23@AEBV?$range@$02@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@5@EAEBVproperty_list@23@_N@Z -??0image_impl@detail@_V1@sycl@@QEAA@AEBV?$shared_ptr@$$CBX@std@@W4image_channel_order@23@W4image_channel_type@23@Uimage_sampler@23@AEBV?$range@$02@23@AEBV?$range@$01@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@5@EAEBVproperty_list@23@@Z -??0image_impl@detail@_V1@sycl@@QEAA@AEBV?$shared_ptr@$$CBX@std@@W4image_channel_order@23@W4image_channel_type@23@Uimage_sampler@23@AEBV?$range@$02@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@5@EAEBVproperty_list@23@@Z -??0image_impl@detail@_V1@sycl@@QEAA@PEAU_cl_mem@@AEBVcontext@23@Vevent@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@E@Z -??0image_impl@detail@_V1@sycl@@QEAA@PEAXW4image_channel_order@23@W4image_channel_type@23@AEBV?$range@$02@23@AEBV?$range@$01@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@EAEBVproperty_list@23@@Z -??0image_impl@detail@_V1@sycl@@QEAA@PEAXW4image_channel_order@23@W4image_channel_type@23@AEBV?$range@$02@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@EAEBVproperty_list@23@@Z -??0image_impl@detail@_V1@sycl@@QEAA@PEBXW4image_channel_order@23@W4image_channel_type@23@AEBV?$range@$02@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@EAEBVproperty_list@23@@Z -??0image_impl@detail@_V1@sycl@@QEAA@PEBXW4image_channel_order@23@W4image_channel_type@23@Uimage_sampler@23@AEBV?$range@$02@23@AEBV?$range@$01@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@EAEBVproperty_list@23@@Z -??0image_impl@detail@_V1@sycl@@QEAA@PEBXW4image_channel_order@23@W4image_channel_type@23@Uimage_sampler@23@AEBV?$range@$02@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@EAEBVproperty_list@23@@Z -??0image_impl@detail@_V1@sycl@@QEAA@W4image_channel_order@23@W4image_channel_type@23@AEBV?$range@$02@23@AEBV?$range@$01@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@EAEBVproperty_list@23@@Z -??0image_impl@detail@_V1@sycl@@QEAA@W4image_channel_order@23@W4image_channel_type@23@AEBV?$range@$02@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@EAEBVproperty_list@23@@Z -??0image_impl@detail@_V1@sycl@@QEAA@_KAEBVcontext@23@Vevent@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@EW4image_channel_order@23@W4image_channel_type@23@_NV?$range@$02@23@@Z ??0image_mem@experimental@oneapi@ext@_V1@sycl@@QEAA@$$QEAV012345@@Z ??0image_mem@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBUimage_descriptor@12345@AEBVdevice@45@AEBVcontext@45@@Z ??0image_mem@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBUimage_descriptor@12345@AEBVqueue@45@@Z @@ -672,28 +640,19 @@ ??0sampler@_V1@sycl@@QEAA@AEBV012@@Z ??0sampler@_V1@sycl@@QEAA@PEAU_cl_sampler@@AEBVcontext@12@@Z ??0sampler@_V1@sycl@@QEAA@W4coordinate_normalization_mode@12@W4addressing_mode@12@W4filtering_mode@12@AEBVproperty_list@12@@Z -??0sampler_impl@detail@_V1@sycl@@QEAA@PEAU_cl_sampler@@AEBVcontext@23@@Z -??0sampler_impl@detail@_V1@sycl@@QEAA@W4coordinate_normalization_mode@23@W4addressing_mode@23@W4filtering_mode@23@AEBVproperty_list@23@@Z ??0stream@_V1@sycl@@AEAA@V?$shared_ptr@Vstream_impl@detail@_V1@sycl@@@std@@V?$accessor@D$00$0EAC@$0HNO@$0A@V?$accessor_property_list@$$V@oneapi@ext@_V1@sycl@@@12@V?$accessor@I$00$0EAF@$0HNO@$0A@V?$accessor_property_list@$$V@oneapi@ext@_V1@sycl@@@12@1@Z ??0stream@_V1@sycl@@QEAA@$$QEAV012@@Z ??0stream@_V1@sycl@@QEAA@AEBV012@@Z ??0stream@_V1@sycl@@QEAA@_K0AEAVhandler@12@@Z ??0stream@_V1@sycl@@QEAA@_K0AEAVhandler@12@AEBVproperty_list@12@@Z -??0stream_impl@detail@_V1@sycl@@QEAA@$$QEAV0123@@Z -??0stream_impl@detail@_V1@sycl@@QEAA@AEBV0123@@Z -??0stream_impl@detail@_V1@sycl@@QEAA@_K0AEBVproperty_list@23@@Z ??0tls_code_loc_t@detail@_V1@sycl@@QEAA@AEBUcode_location@123@@Z ??0tls_code_loc_t@detail@_V1@sycl@@QEAA@XZ ??1AccessorBaseHost@detail@_V1@sycl@@QEAA@XZ -??1AccessorImplHost@detail@_V1@sycl@@QEAA@XZ ??1LocalAccessorBaseHost@detail@_V1@sycl@@QEAA@XZ -??1LocalAccessorImplHost@detail@_V1@sycl@@QEAA@XZ ??1SYCLCategory@detail@_V1@sycl@@UEAA@XZ -??1SYCLMemObjT@detail@_V1@sycl@@UEAA@XZ ??1SampledImageAccessorBaseHost@detail@_V1@sycl@@QEAA@XZ ??1UnsampledImageAccessorBaseHost@detail@_V1@sycl@@QEAA@XZ ??1accelerator_selector@_V1@sycl@@UEAA@XZ -??1buffer_impl@detail@_V1@sycl@@UEAA@XZ ??1buffer_plain@detail@_V1@sycl@@QEAA@XZ ??1context@_V1@sycl@@QEAA@XZ ??1cpu_selector@_V1@sycl@@UEAA@XZ @@ -712,7 +671,6 @@ ??1gpu_selector@_V1@sycl@@UEAA@XZ ??1handler@_V1@sycl@@AEAA@XZ ??1host_selector@_V1@sycl@@UEAA@XZ -??1image_impl@detail@_V1@sycl@@UEAA@XZ ??1image_mem@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ ??1image_mem_impl@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ ??1image_plain@detail@_V1@sycl@@QEAA@XZ @@ -724,9 +682,7 @@ ??1platform@_V1@sycl@@QEAA@XZ ??1queue@_V1@sycl@@QEAA@XZ ??1sampler@_V1@sycl@@QEAA@XZ -??1sampler_impl@detail@_V1@sycl@@QEAA@XZ ??1stream@_V1@sycl@@QEAA@XZ -??1stream_impl@detail@_V1@sycl@@QEAA@XZ ??1tls_code_loc_t@detail@_V1@sycl@@QEAA@XZ ??4?$OwnerLessBase@Vcontext@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z ??4?$OwnerLessBase@Vcontext@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z @@ -748,15 +704,10 @@ ??4?$hash@Vqueue@_V1@sycl@@@std@@QEAAAEAU01@AEBU01@@Z ??4AccessorBaseHost@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z ??4AccessorBaseHost@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z -??4AccessorImplHost@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4HostProfilingInfo@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z ??4HostProfilingInfo@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4LocalAccessorBaseHost@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z ??4LocalAccessorBaseHost@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z -??4LocalAccessorImplHost@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z -??4LocalAccessorImplHost@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z -??4MemoryManager@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z -??4MemoryManager@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4OSUtil@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z ??4OSUtil@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4SampledImageAccessorBaseHost@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z @@ -820,8 +771,6 @@ ??4sampler@_V1@sycl@@QEAAAEAV012@AEBV012@@Z ??4stream@_V1@sycl@@QEAAAEAV012@$$QEAV012@@Z ??4stream@_V1@sycl@@QEAAAEAV012@AEBV012@@Z -??4stream_impl@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z -??4stream_impl@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4tls_code_loc_t@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??8context@_V1@sycl@@QEBA_NAEBV012@@Z ??8device@_V1@sycl@@QEBA_NAEBV012@@Z @@ -866,9 +815,7 @@ ??Zhalf@host_half_impl@detail@_V1@sycl@@QEAAAEAV01234@AEBV01234@@Z ??_0half@host_half_impl@detail@_V1@sycl@@QEAAAEAV01234@AEBV01234@@Z ??_7SYCLCategory@detail@_V1@sycl@@6B@ -??_7SYCLMemObjT@detail@_V1@sycl@@6B@ ??_7accelerator_selector@_V1@sycl@@6B@ -??_7buffer_impl@detail@_V1@sycl@@6B@ ??_7cpu_selector@_V1@sycl@@6B@ ??_7default_selector@_V1@sycl@@6B@ ??_7device_selector@_V1@sycl@@6B@ @@ -877,7 +824,6 @@ ??_7filter_selector@oneapi@ext@_V1@sycl@@6B@ ??_7gpu_selector@_V1@sycl@@6B@ ??_7host_selector@_V1@sycl@@6B@ -??_7image_impl@detail@_V1@sycl@@6B@ ??_8exception@_V1@sycl@@7B@ ??_Dexception@_V1@sycl@@QEAAXXZ ??_Fcontext@_V1@sycl@@QEAAXXZ @@ -889,7 +835,6 @@ ?GDBMethodsAnchor@SampledImageAccessorBaseHost@detail@_V1@sycl@@IEAAXXZ ?GDBMethodsAnchor@UnsampledImageAccessorBaseHost@detail@_V1@sycl@@IEAAXXZ ?GetRangeRoundingSettings@handler@_V1@sycl@@AEAAXAEA_K00@Z -?OffsetSize@stream_impl@detail@_V1@sycl@@0_KB ?PushBack@exception_list@_V1@sycl@@AEAAX$$QEAVexception_ptr@std@@@Z ?PushBack@exception_list@_V1@sycl@@AEAAXAEBVexception_ptr@std@@@Z ?RangeRoundingTrace@handler@_V1@sycl@@AEAA_NXZ @@ -3927,9 +3872,6 @@ ?__trunc_impl@_V1@sycl@@YAMM@Z ?__trunc_impl@_V1@sycl@@YANN@Z ?accelerator_selector_v@_V1@sycl@@YAHAEBVdevice@12@@Z -?accessGlobalBuf@stream_impl@detail@_V1@sycl@@QEAA?AV?$accessor@D$00$0EAC@$0HNO@$0A@V?$accessor_property_list@$$V@oneapi@ext@_V1@sycl@@@34@AEAVhandler@34@@Z -?accessGlobalFlushBuf@stream_impl@detail@_V1@sycl@@QEAA?AV?$accessor@D$00$0EAC@$0HNO@$0A@V?$accessor_property_list@$$V@oneapi@ext@_V1@sycl@@@34@AEAVhandler@34@@Z -?accessGlobalOffset@stream_impl@detail@_V1@sycl@@QEAA?AV?$accessor@I$00$0EAF@$0HNO@$0A@V?$accessor_property_list@$$V@oneapi@ext@_V1@sycl@@@34@AEAVhandler@34@@Z ?add@device_global_map@detail@_V1@sycl@@YAXPEBXPEBD@Z ?add@host_pipe_map@detail@_V1@sycl@@YAXPEBXPEBD@Z ?add@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA?AVnode@34567@AEBVproperty_list@67@@Z @@ -3940,12 +3882,9 @@ ?addHostUnsampledImageAccessorAndWait@detail@_V1@sycl@@YAXPEAVUnsampledImageAccessorImplHost@123@@Z ?addImpl@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@IEAA?AVnode@34567@AEBV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@@Z ?addImpl@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@IEAA?AVnode@34567@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@@Z -?addInteropObject@buffer_impl@detail@_V1@sycl@@QEBAXAEAV?$vector@_KV?$allocator@_K@std@@@std@@@Z -?addOrReplaceAccessorProperties@SYCLMemObjT@detail@_V1@sycl@@QEAAXAEBVproperty_list@34@@Z ?addOrReplaceAccessorProperties@buffer_plain@detail@_V1@sycl@@IEAAXAEBVproperty_list@34@@Z ?addReduction@handler@_V1@sycl@@AEAAXAEBV?$shared_ptr@$$CBX@std@@@Z ?addStream@handler@_V1@sycl@@AEAAXAEBV?$shared_ptr@Vstream_impl@detail@_V1@sycl@@@std@@@Z -?advise_usm@MemoryManager@detail@_V1@sycl@@SAXPEBXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KW4_pi_mem_advice@@V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z ?alignedAlloc@OSUtil@detail@_V1@sycl@@SAPEAX_K0@Z ?alignedFree@OSUtil@detail@_V1@sycl@@SAXPEAX@Z ?aligned_alloc@_V1@sycl@@YAPEAX_K0AEBVdevice@12@AEBVcontext@12@W4alloc@usm@12@AEBUcode_location@detail@12@@Z @@ -3968,18 +3907,6 @@ ?alloc_image_mem@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@AEBUimage_descriptor@12345@AEBVqueue@45@@Z ?alloc_mipmap_mem@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@AEBUimage_descriptor@12345@AEBVdevice@45@AEBVcontext@45@@Z ?alloc_mipmap_mem@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@AEBUimage_descriptor@12345@AEBVqueue@45@@Z -?allocate@MemoryManager@detail@_V1@sycl@@SAPEAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAVSYCLMemObjI@234@_NPEAXV?$vector@V?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@std@@V?$allocator@V?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@std@@@2@@6@AEAPEAU_pi_event@@@Z -?allocateBufferObject@MemoryManager@detail@_V1@sycl@@SAPEAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAX_N_KAEBVproperty_list@34@@Z -?allocateHostMem@SYCLMemObjT@detail@_V1@sycl@@UEAAPEAXXZ -?allocateHostMemory@MemoryManager@detail@_V1@sycl@@SAPEAXPEAVSYCLMemObjI@234@PEAX_N_KAEBVproperty_list@34@@Z -?allocateImageObject@MemoryManager@detail@_V1@sycl@@SAPEAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAX_NAEBU_pi_image_desc@@AEBU_pi_image_format@@AEBVproperty_list@34@@Z -?allocateInteropMemObject@MemoryManager@detail@_V1@sycl@@SAPEAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAXAEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@AEBV56@AEBVproperty_list@34@AEAPEAU_pi_event@@@Z -?allocateMem@SYCLMemObjT@detail@_V1@sycl@@UEAAPEAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@_NPEAXAEAPEAU_pi_event@@@Z -?allocateMem@buffer_impl@detail@_V1@sycl@@UEAAPEAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@_NPEAXAEAPEAU_pi_event@@@Z -?allocateMem@image_impl@detail@_V1@sycl@@UEAAPEAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@_NPEAXAEAPEAU_pi_event@@@Z -?allocateMemBuffer@MemoryManager@detail@_V1@sycl@@SAPEAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAVSYCLMemObjI@234@PEAX_N_KAEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@AEBV56@AEBVproperty_list@34@AEAPEAU_pi_event@@@Z -?allocateMemImage@MemoryManager@detail@_V1@sycl@@SAPEAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAVSYCLMemObjI@234@PEAX_N_KAEBU_pi_image_desc@@AEBU_pi_image_format@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@AEBV56@AEBVproperty_list@34@AEAPEAU_pi_event@@@Z -?allocateMemSubBuffer@MemoryManager@detail@_V1@sycl@@SAPEAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAX_K2V?$range@$02@34@V?$vector@V?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@std@@V?$allocator@V?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@std@@@2@@6@AEAPEAU_pi_event@@@Z ?aspect_selector@_V1@sycl@@YA?AV?$function@$$A6AHAEBVdevice@_V1@sycl@@@Z@std@@AEBV?$vector@W4aspect@_V1@sycl@@V?$allocator@W4aspect@_V1@sycl@@@std@@@4@0@Z ?assertion@pi@detail@_V1@sycl@@YAX_NPEBD@Z ?associateWithHandler@detail@_V1@sycl@@YAXAEAVhandler@23@PEAVAccessorBaseHost@123@W4target@access@23@@Z @@ -3995,31 +3922,16 @@ ?begin_recording@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAXAEBV?$vector@Vqueue@_V1@sycl@@V?$allocator@Vqueue@_V1@sycl@@@std@@@std@@AEBVproperty_list@67@@Z ?build_from_source@detail@experimental@oneapi@ext@_V1@sycl@@YA?AV?$kernel_bundle@$01@56@AEAV?$kernel_bundle@$02@56@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@AEBV?$vector@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@PEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@Z ?build_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBV?$kernel_bundle@$0A@@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBVproperty_list@23@@Z -?canReadHostPtr@SYCLMemObjT@detail@_V1@sycl@@QEAA_NPEAX_K@Z -?canReuseHostPtr@SYCLMemObjT@detail@_V1@sycl@@QEAA_NPEAX_K@Z ?cancel_fusion@fusion_wrapper@experimental@codeplay@ext@_V1@sycl@@QEAAXXZ ?category@exception@_V1@sycl@@QEBAAEBVerror_category@std@@XZ -?checkImageDesc@image_impl@detail@_V1@sycl@@AEAA_NAEBU_pi_image_desc@@V?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAX@Z -?checkImageFormat@image_impl@detail@_V1@sycl@@AEAA_NAEBU_pi_image_format@@V?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@@Z ?code@exception@_V1@sycl@@QEBAAEBVerror_code@std@@XZ ?compile_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBV?$kernel_bundle@$0A@@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBVproperty_list@23@@Z ?complete_fusion@fusion_wrapper@experimental@codeplay@ext@_V1@sycl@@QEAA?AVevent@56@AEBVproperty_list@56@@Z ?computeFallbackKernelBounds@handler@_V1@sycl@@AEAA?AV?$id@$01@23@_K0@Z -?constructorNotification@buffer_impl@detail@_V1@sycl@@QEAAXAEBUcode_location@234@PEAXPEBX2IIQEA_K@Z ?constructorNotification@buffer_plain@detail@_V1@sycl@@IEAAXAEBUcode_location@234@PEAXPEBX2IIQEA_K@Z ?constructorNotification@detail@_V1@sycl@@YAXPEAX0W4target@access@23@W4mode@523@AEBUcode_location@123@@Z ?contains_specialization_constants@kernel_bundle_plain@detail@_V1@sycl@@QEBA_NXZ ?contextSetExtendedDeleter@pi@detail@_V1@sycl@@YAXAEBVcontext@34@P6AXPEAX@Z1@Z -?convertChannelOrder@detail@_V1@sycl@@YA?AW4_pi_image_channel_order@@W4image_channel_order@23@@Z -?convertChannelOrder@detail@_V1@sycl@@YA?AW4image_channel_order@23@W4_pi_image_channel_order@@@Z -?convertChannelType@detail@_V1@sycl@@YA?AW4_pi_image_channel_type@@W4image_channel_type@23@@Z -?convertChannelType@detail@_V1@sycl@@YA?AW4image_channel_type@23@W4_pi_image_channel_type@@@Z -?copy@MemoryManager@detail@_V1@sycl@@SAXPEAVSYCLMemObjI@234@PEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@IV?$range@$02@34@3V?$id@$02@34@I12I334IV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@7@AEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@7@@Z -?copy_2d_usm@MemoryManager@detail@_V1@sycl@@SAXPEBX_KV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@PEAX111V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z -?copy_from_device_global@MemoryManager@detail@_V1@sycl@@SAXPEBX_NV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K3PEAXAEBV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z -?copy_image_bindless@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@0AEBU_pi_image_desc@@AEBU_pi_image_format@@W4_pi_image_copy_flags@@Upi_image_offset_struct@@5Upi_image_region_struct@@6AEBV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z -?copy_to_device_global@MemoryManager@detail@_V1@sycl@@SAXPEBX_NV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K30AEBV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z -?copy_usm@MemoryManager@detail@_V1@sycl@@SAXPEBXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KPEAXV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z ?cpu_selector_v@_V1@sycl@@YAHAEBVdevice@12@@Z ?create_image@experimental@oneapi@ext@_V1@sycl@@YA?AUsampled_image_handle@12345@AEAVimage_mem@12345@AEBUbindless_image_sampler@12345@AEBUimage_descriptor@12345@AEBVdevice@45@AEBVcontext@45@@Z ?create_image@experimental@oneapi@ext@_V1@sycl@@YA?AUsampled_image_handle@12345@AEAVimage_mem@12345@AEBUbindless_image_sampler@12345@AEBUimage_descriptor@12345@AEBVqueue@45@@Z @@ -4035,7 +3947,6 @@ ?create_kernel_bundle_from_source@experimental@oneapi@ext@_V1@sycl@@YA?AV?$kernel_bundle@$02@45@AEBVcontext@45@W4source_language@12345@AEBV?$vector@W4byte@std@@V?$allocator@W4byte@std@@@2@@std@@@Z ?default_selector_v@_V1@sycl@@YAHAEBVdevice@12@@Z ?deleteAccProps@buffer_plain@detail@_V1@sycl@@IEAAXAEBW4PropWithDataKind@234@@Z -?deleteAccessorProperty@SYCLMemObjT@detail@_V1@sycl@@QEAAXAEBW4PropWithDataKind@234@@Z ?depends_on@handler@_V1@sycl@@IEAAXAEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@std@@@Z ?depends_on@handler@_V1@sycl@@IEAAXAEBV?$vector@V?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@std@@V?$allocator@V?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@std@@@2@@std@@@Z ?depends_on@handler@_V1@sycl@@QEAAXAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@@Z @@ -4046,9 +3957,6 @@ ?destroy_image_handle@experimental@oneapi@ext@_V1@sycl@@YAXAEAUsampled_image_handle@12345@AEBVqueue@45@@Z ?destroy_image_handle@experimental@oneapi@ext@_V1@sycl@@YAXAEAUunsampled_image_handle@12345@AEBVdevice@45@AEBVcontext@45@@Z ?destroy_image_handle@experimental@oneapi@ext@_V1@sycl@@YAXAEAUunsampled_image_handle@12345@AEBVqueue@45@@Z -?destructorNotification@buffer_impl@detail@_V1@sycl@@QEAAXPEAX@Z -?detachMemoryObject@SYCLMemObjT@detail@_V1@sycl@@QEBAXAEBV?$shared_ptr@VSYCLMemObjT@detail@_V1@sycl@@@std@@@Z -?determineHostPtr@SYCLMemObjT@detail@_V1@sycl@@IEAAXAEBV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@_NAEAPEAXAEA_N@Z ?device_has@queue@_V1@sycl@@AEBA_NW4aspect@23@@Z ?die@pi@detail@_V1@sycl@@YAXPEBD@Z ?discard_or_return@queue@_V1@sycl@@AEAA?AVevent@23@AEBV423@@Z @@ -4065,7 +3973,6 @@ ?ext_intel_read_host_pipe@handler@_V1@sycl@@AEAAXVstring_view@detail@23@PEAX_K_N@Z ?ext_intel_write_host_pipe@handler@_V1@sycl@@AEAAXAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAX_K_N@Z ?ext_intel_write_host_pipe@handler@_V1@sycl@@AEAAXVstring_view@detail@23@PEAX_K_N@Z -?ext_oneapi_advise_usm_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEBX_KW4_pi_mem_advice@@V?$vector@IV?$allocator@I@std@@@6@PEAI@Z ?ext_oneapi_architecture_is@device@_V1@sycl@@QEAA_NW4arch_category@experimental@oneapi@ext@23@@Z ?ext_oneapi_architecture_is@device@_V1@sycl@@QEAA_NW4architecture@experimental@oneapi@ext@23@@Z ?ext_oneapi_barrier@handler@_V1@sycl@@QEAAXAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@@Z @@ -4101,16 +4008,10 @@ ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@Uimage_mem_handle@experimental@oneapi@ext@23@V?$range@$02@23@AEBUimage_descriptor@67823@PEAX111AEBUcode_location@detail@23@@Z ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@Uimage_mem_handle@experimental@oneapi@ext@23@V?$range@$02@23@AEBUimage_descriptor@67823@PEAX111AEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@Uimage_mem_handle@experimental@oneapi@ext@23@V?$range@$02@23@AEBUimage_descriptor@67823@PEAX111V423@AEBUcode_location@detail@23@@Z -?ext_oneapi_copyD2D_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEAVSYCLMemObjI@234@PEAXIV?$range@$02@34@4V?$id@$02@34@I3I445IV?$vector@IV?$allocator@I@std@@@6@PEAI@Z -?ext_oneapi_copyD2H_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEAVSYCLMemObjI@234@PEAXIV?$range@$02@34@4V?$id@$02@34@IPEADI45IV?$vector@IV?$allocator@I@std@@@6@PEAI@Z -?ext_oneapi_copyH2D_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEAVSYCLMemObjI@234@PEADIV?$range@$02@34@V?$id@$02@34@IPEAXI445IV?$vector@IV?$allocator@I@std@@@6@PEAI@Z -?ext_oneapi_copy_usm_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEBXPEAU_pi_ext_command_buffer@@_KPEAXV?$vector@IV?$allocator@I@std@@@6@PEAI@Z ?ext_oneapi_disable_peer_access@device@_V1@sycl@@QEAAXAEBV123@@Z ?ext_oneapi_empty@queue@_V1@sycl@@QEBA_NXZ ?ext_oneapi_enable_peer_access@device@_V1@sycl@@QEAAXAEBV123@@Z ?ext_oneapi_fill2d_impl@handler@_V1@sycl@@AEAAXPEAX_KPEBX111@Z -?ext_oneapi_fill_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEAVSYCLMemObjI@234@PEAX_KPEBDIV?$range@$02@34@6V?$id@$02@34@IV?$vector@IV?$allocator@I@std@@@6@PEAI@Z -?ext_oneapi_fill_usm_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEAX_KHV?$vector@IV?$allocator@I@std@@@6@PEAI@Z ?ext_oneapi_get_composite_devices@platform@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@XZ ?ext_oneapi_get_default_context@platform@_V1@sycl@@QEBA?AVcontext@23@XZ ?ext_oneapi_get_graph@queue@_V1@sycl@@QEBA?AV?$command_graph@$0A@@experimental@oneapi@ext@23@XZ @@ -4140,7 +4041,6 @@ ?ext_oneapi_owner_before@?$OwnerLessBase@Vqueue@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBVqueue@34@@Z ?ext_oneapi_owner_before@?$OwnerLessBase@Vstream@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBV?$weak_object_base@Vstream@_V1@sycl@@@2oneapi@ext@34@@Z ?ext_oneapi_owner_before@?$OwnerLessBase@Vstream@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBVstream@34@@Z -?ext_oneapi_prefetch_usm_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEAX_KV?$vector@IV?$allocator@I@std@@@6@PEAI@Z ?ext_oneapi_prod@queue@_V1@sycl@@QEAAXXZ ?ext_oneapi_set_external_event@queue@_V1@sycl@@QEAAXAEBVevent@23@@Z ?ext_oneapi_signal_external_semaphore@handler@_V1@sycl@@QEAAXUinterop_semaphore_handle@experimental@oneapi@ext@23@@Z @@ -4166,14 +4066,10 @@ ?ext_oneapi_wait_external_semaphore@queue@_V1@sycl@@QEAA?AVevent@23@Uinterop_semaphore_handle@experimental@oneapi@ext@23@_KV423@AEBUcode_location@detail@23@@Z ?extractArgsAndReqs@handler@_V1@sycl@@AEAAXXZ ?extractArgsAndReqsFromLambda@handler@_V1@sycl@@AEAAXPEAD_KPEBUkernel_param_desc_t@detail@23@_N@Z -?fill@MemoryManager@detail@_V1@sycl@@SAXPEAVSYCLMemObjI@234@PEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KPEBDIV?$range@$02@34@5V?$id@$02@34@IV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@7@AEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@7@@Z -?fill_2d_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K22AEBV?$vector@DV?$allocator@D@std@@@6@V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z -?fill_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KHV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z ?finalize@handler@_V1@sycl@@AEAA?AVevent@23@XZ ?finalize@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$command_graph@$00@34567@AEBVproperty_list@67@@Z ?finalizeImpl@executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@IEAAXXZ ?find_device_intersection@detail@_V1@sycl@@YA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@AEBV?$vector@V?$kernel_bundle@$00@_V1@sycl@@V?$allocator@V?$kernel_bundle@$00@_V1@sycl@@@std@@@5@@Z -?flush@stream_impl@detail@_V1@sycl@@QEAAXAEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@std@@@Z ?free@_V1@sycl@@YAXPEAXAEBVcontext@12@AEBUcode_location@detail@12@@Z ?free@_V1@sycl@@YAXPEAXAEBVqueue@12@AEBUcode_location@detail@12@@Z ?free_image_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@AEBVdevice@45@AEBVcontext@45@@Z @@ -4196,34 +4092,23 @@ ?getAccessRange@AccessorBaseHost@detail@_V1@sycl@@QEAAAEAV?$range@$02@34@XZ ?getAccessRange@AccessorBaseHost@detail@_V1@sycl@@QEBAAEBV?$range@$02@34@XZ ?getBorderColor@detail@_V1@sycl@@YA?AV?$vec@M$03@23@W4image_channel_order@23@@Z -?getBufSizeForContext@SYCLMemObjT@detail@_V1@sycl@@SA_KAEBV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@_K@Z ?getChannelOrder@SampledImageAccessorBaseHost@detail@_V1@sycl@@QEBA?AW4image_channel_order@34@XZ ?getChannelOrder@UnsampledImageAccessorBaseHost@detail@_V1@sycl@@QEBA?AW4image_channel_order@34@XZ -?getChannelOrder@image_impl@detail@_V1@sycl@@QEBA?AW4image_channel_order@34@XZ ?getChannelOrder@image_plain@detail@_V1@sycl@@IEBA?AW4image_channel_order@34@XZ ?getChannelType@SampledImageAccessorBaseHost@detail@_V1@sycl@@QEBA?AW4image_channel_type@34@XZ ?getChannelType@UnsampledImageAccessorBaseHost@detail@_V1@sycl@@QEBA?AW4image_channel_type@34@XZ -?getChannelType@image_impl@detail@_V1@sycl@@QEBA?AW4image_channel_type@34@XZ ?getChannelType@image_plain@detail@_V1@sycl@@IEBA?AW4image_channel_type@34@XZ ?getCommandGraph@handler@_V1@sycl@@AEBA?AV?$shared_ptr@Vgraph_impl@detail@experimental@oneapi@ext@_V1@sycl@@@std@@XZ ?getContextImplPtr@handler@_V1@sycl@@AEBAAEBV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@XZ ?getCurrentDSODir@OSUtil@detail@_V1@sycl@@SA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ ?getDeviceFromHandler@detail@_V1@sycl@@YA?AVdevice@23@AEAVhandler@23@@Z -?getDevices@image_impl@detail@_V1@sycl@@AEAA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@V?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@6@@Z ?getDirName@OSUtil@detail@_V1@sycl@@SA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEBD@Z ?getElemSize@AccessorBaseHost@detail@_V1@sycl@@QEBAIXZ ?getElementSize@LocalAccessorBaseHost@detail@_V1@sycl@@QEAAHXZ ?getElementSize@SampledImageAccessorBaseHost@detail@_V1@sycl@@QEBAHXZ ?getElementSize@UnsampledImageAccessorBaseHost@detail@_V1@sycl@@QEBAHXZ -?getElementSize@image_impl@detail@_V1@sycl@@QEBA_KXZ ?getElementSize@image_plain@detail@_V1@sycl@@IEBA_KXZ ?getEndTime@HostProfilingInfo@detail@_V1@sycl@@QEBA_KXZ -?getImageDesc@image_impl@detail@_V1@sycl@@AEAA?AU_pi_image_desc@@_N@Z -?getImageElementSize@detail@_V1@sycl@@YAEEW4image_channel_type@23@@Z -?getImageFormat@image_impl@detail@_V1@sycl@@AEAA?AU_pi_image_format@@XZ -?getImageNumberChannels@detail@_V1@sycl@@YAEW4image_channel_order@23@@Z -?getImageType@image_impl@detail@_V1@sycl@@AEAA?AW4_pi_mem_type@@XZ -?getInteropContext@SYCLMemObjT@detail@_V1@sycl@@UEBA?AV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@XZ ?getKernelName@handler@_V1@sycl@@AEAA?AVstring@detail@23@XZ ?getMaxWorkGroups@handler@_V1@sycl@@AEAA?AV?$optional@V?$array@_K$02@std@@@std@@XZ ?getMaxWorkGroups_v2@handler@_V1@sycl@@AEAA?AV?$tuple@V?$array@_K$02@std@@_N@std@@XZ @@ -4244,7 +4129,6 @@ ?getNativeImpl@kernel@_V1@sycl@@AEBA_KXZ ?getNativeMem@interop_handle@_V1@sycl@@AEBA_KPEAVAccessorImplHost@detail@23@@Z ?getNativeQueue@interop_handle@_V1@sycl@@AEBA_KAEAH@Z -?getNativeVector@buffer_impl@detail@_V1@sycl@@QEBA?AV?$vector@_KV?$allocator@_K@std@@@std@@W4backend@34@@Z ?getNativeVector@buffer_plain@detail@_V1@sycl@@IEBA?AV?$vector@_KV?$allocator@_K@std@@@std@@W4backend@34@@Z ?getNativeVector@event@_V1@sycl@@AEBA?AV?$vector@_KV?$allocator@_K@std@@@std@@XZ ?getNumOfDims@LocalAccessorBaseHost@detail@_V1@sycl@@QEAAHXZ @@ -4253,14 +4137,12 @@ ?getOSMemSize@OSUtil@detail@_V1@sycl@@SA_KXZ ?getOffset@AccessorBaseHost@detail@_V1@sycl@@QEAAAEAV?$id@$02@34@XZ ?getOffset@AccessorBaseHost@detail@_V1@sycl@@QEBAAEBV?$id@$02@34@XZ -?getOrCreateSampler@sampler_impl@detail@_V1@sycl@@QEAAPEAU_pi_sampler@@AEBVcontext@34@@Z ?getOrInsertHandlerKernelBundle@handler@_V1@sycl@@AEBA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@_N@Z ?getOrWaitEvents@detail@_V1@sycl@@YA?AV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@std@@V?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@5@V?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@5@@Z ?getPitch@SampledImageAccessorBaseHost@detail@_V1@sycl@@QEBA?AV?$id@$02@34@XZ ?getPitch@UnsampledImageAccessorBaseHost@detail@_V1@sycl@@QEBA?AV?$id@$02@34@XZ ?getPixelCoordLinearFiltMode@detail@_V1@sycl@@YA?AV?$vec@H$07@23@V?$vec@M$03@23@W4addressing_mode@23@V?$range@$02@23@AEAV523@@Z ?getPixelCoordNearestFiltMode@detail@_V1@sycl@@YA?AV?$vec@H$03@23@V?$vec@M$03@23@W4addressing_mode@23@V?$range@$02@23@@Z -?getPlugin@SYCLMemObjT@detail@_V1@sycl@@QEBAAEBV?$shared_ptr@Vplugin@detail@_V1@sycl@@@std@@XZ ?getPropList@AccessorBaseHost@detail@_V1@sycl@@QEBAAEBVproperty_list@34@XZ ?getPropList@LocalAccessorBaseHost@detail@_V1@sycl@@QEBAAEBVproperty_list@34@XZ ?getPropList@SampledImageAccessorBaseHost@detail@_V1@sycl@@QEBAAEBVproperty_list@34@XZ @@ -4273,29 +4155,19 @@ ?getPtr@SampledImageAccessorBaseHost@detail@_V1@sycl@@QEBAPEAXXZ ?getPtr@UnsampledImageAccessorBaseHost@detail@_V1@sycl@@QEAAPEAXXZ ?getPtr@UnsampledImageAccessorBaseHost@detail@_V1@sycl@@QEBAPEAXXZ -?getRowPitch@image_impl@detail@_V1@sycl@@QEBA_KXZ ?getRowPitch@image_plain@detail@_V1@sycl@@IEBA_KXZ ?getSampler@SampledImageAccessorBaseHost@detail@_V1@sycl@@QEBA?AUimage_sampler@34@XZ -?getSampler@image_impl@detail@_V1@sycl@@QEBA?AUimage_sampler@34@XZ ?getSampler@image_plain@detail@_V1@sycl@@IEBA?AUimage_sampler@34@XZ ?getSize@LocalAccessorBaseHost@detail@_V1@sycl@@QEAAAEAV?$range@$02@34@XZ ?getSize@LocalAccessorBaseHost@detail@_V1@sycl@@QEBAAEBV?$range@$02@34@XZ ?getSize@SampledImageAccessorBaseHost@detail@_V1@sycl@@QEBAAEBV?$range@$02@34@XZ ?getSize@UnsampledImageAccessorBaseHost@detail@_V1@sycl@@QEBAAEBV?$range@$02@34@XZ ?getSize@buffer_plain@detail@_V1@sycl@@IEBA_KXZ -?getSizeInBytes@SYCLMemObjT@detail@_V1@sycl@@UEBA_KXZ -?getSlicePitch@image_impl@detail@_V1@sycl@@QEBA_KXZ ?getSlicePitch@image_plain@detail@_V1@sycl@@IEBA_KXZ ?getStartTime@HostProfilingInfo@detail@_V1@sycl@@QEBA_KXZ -?getType@SYCLMemObjT@detail@_V1@sycl@@UEBA?AW4MemObjType@SYCLMemObjI@234@XZ -?getType@buffer_impl@detail@_V1@sycl@@UEBA?AW4MemObjType@SYCLMemObjI@234@XZ ?getType@handler@_V1@sycl@@AEAA?AW4CGTYPE@CG@detail@23@XZ -?getType@image_impl@detail@_V1@sycl@@UEBA?AW4MemObjType@SYCLMemObjI@234@XZ -?getUserPtr@SYCLMemObjT@detail@_V1@sycl@@QEBAPEAXXZ ?getValueFromDynamicParameter@detail@_V1@sycl@@YAPEAXAEAVdynamic_parameter_base@1experimental@oneapi@ext@23@@Z ?get_addressing_mode@sampler@_V1@sycl@@QEBA?AW4addressing_mode@23@XZ -?get_addressing_mode@sampler_impl@detail@_V1@sycl@@QEBA?AW4addressing_mode@34@XZ -?get_allocator_internal@SYCLMemObjT@detail@_V1@sycl@@QEBAAEBV?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@XZ ?get_allocator_internal@buffer_plain@detail@_V1@sycl@@IEBAAEBV?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@XZ ?get_allocator_internal@image_plain@detail@_V1@sycl@@IEBAAEBV?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@XZ ?get_backend@context@_V1@sycl@@QEBA?AW4backend@23@XZ @@ -4316,9 +4188,6 @@ ?get_context@kernel_bundle_plain@detail@_V1@sycl@@QEBA?AVcontext@34@XZ ?get_context@queue@_V1@sycl@@QEBA?AVcontext@23@XZ ?get_coordinate_normalization_mode@sampler@_V1@sycl@@QEBA?AW4coordinate_normalization_mode@23@XZ -?get_coordinate_normalization_mode@sampler_impl@detail@_V1@sycl@@QEBA?AW4coordinate_normalization_mode@34@XZ -?get_count@SYCLMemObjT@detail@_V1@sycl@@QEBA_KXZ -?get_count@image_impl@detail@_V1@sycl@@QEBA_KXZ ?get_count@image_plain@detail@_V1@sycl@@IEBA_KXZ ?get_descriptor@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBAAEBUimage_descriptor@23456@XZ ?get_device@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AVdevice@56@XZ @@ -4329,7 +4198,6 @@ ?get_devices@platform@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@W4device_type@info@23@@Z ?get_empty_interop_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@@Z ?get_filtering_mode@sampler@_V1@sycl@@QEBA?AW4filtering_mode@23@XZ -?get_filtering_mode@sampler_impl@detail@_V1@sycl@@QEBA?AW4filtering_mode@34@XZ ?get_flags@stream@_V1@sycl@@AEBAIXZ ?get_handle@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AUimage_mem_handle@23456@XZ ?get_image_channel_type@experimental@oneapi@ext@_V1@sycl@@YA?AW4image_channel_type@45@Uimage_mem_handle@12345@AEBVdevice@45@AEBVcontext@45@@Z @@ -4355,7 +4223,6 @@ ?get_nodes@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ ?get_num_channels@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBAIXZ ?get_pipe_name@pipe_base@experimental@intel@ext@_V1@sycl@@KA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEBX@Z -?get_pitch@image_impl@detail@_V1@sycl@@QEBA?AV?$range@$01@34@XZ ?get_pitch@image_plain@detail@_V1@sycl@@IEBA?AV?$range@$01@34@XZ ?get_platform@context@_V1@sycl@@QEBA?AVplatform@23@XZ ?get_platform@device@_V1@sycl@@QEBA?AVplatform@23@XZ @@ -4365,7 +4232,6 @@ ?get_precision@stream@_V1@sycl@@QEBA_KXZ ?get_predecessors@node@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ ?get_queue@fusion_wrapper@experimental@codeplay@ext@_V1@sycl@@QEBA?AVqueue@56@XZ -?get_range@image_impl@detail@_V1@sycl@@QEBA?AV?$range@$02@34@XZ ?get_range@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$range@$02@56@XZ ?get_range@image_plain@detail@_V1@sycl@@IEBA?AV?$range@$02@34@XZ ?get_root_nodes@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ @@ -4379,17 +4245,10 @@ ?get_wait_list@event@_V1@sycl@@QEAA?AV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@XZ ?get_width@stream@_V1@sycl@@QEBA_KXZ ?get_work_item_buffer_size@stream@_V1@sycl@@QEBA_KXZ -?get_work_item_buffer_size@stream_impl@detail@_V1@sycl@@QEBA_KXZ ?gpu_selector_v@_V1@sycl@@YAHAEBVdevice@12@@Z -?handleHostData@SYCLMemObjT@detail@_V1@sycl@@QEAAXAEBV?$function@$$A6AXPEAX@Z@std@@_K_N@Z -?handleHostData@SYCLMemObjT@detail@_V1@sycl@@QEAAXAEBV?$shared_ptr@X@std@@_K_N@Z -?handleHostData@SYCLMemObjT@detail@_V1@sycl@@QEAAXPEAX_K@Z -?handleHostData@SYCLMemObjT@detail@_V1@sycl@@QEAAXPEBX_K@Z ?handleRelease@buffer_plain@detail@_V1@sycl@@IEBAXXZ -?handleWriteAccessorCreation@SYCLMemObjT@detail@_V1@sycl@@QEAAXXZ ?has@device@_V1@sycl@@QEBA_NW4aspect@23@@Z ?has@platform@_V1@sycl@@QEBA_NW4aspect@23@@Z -?hasUserDataPtr@SYCLMemObjT@detail@_V1@sycl@@UEBA_NXZ ?has_context@exception@_V1@sycl@@QEBA_NXZ ?has_extension@device@_V1@sycl@@QEBA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@Z ?has_extension@opencl@_V1@sycl@@YA_NAEBVdevice@23@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@Z @@ -4402,20 +4261,16 @@ ?has_kernel_bundle_impl@detail@_V1@sycl@@YA_NAEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@AEBV?$vector@Vkernel_id@_V1@sycl@@V?$allocator@Vkernel_id@_V1@sycl@@@std@@@6@W4bundle_state@23@@Z ?has_kernel_bundle_impl@detail@_V1@sycl@@YA_NAEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@W4bundle_state@23@@Z ?has_specialization_constant_impl@kernel_bundle_plain@detail@_V1@sycl@@IEBA_NPEBD@Z -?initStreamHost@stream_impl@detail@_V1@sycl@@QEAAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@@Z ?internalProfilingTagImpl@handler@_V1@sycl@@AEAAXXZ ?isBackendSupportedFillSize@handler@_V1@sycl@@CA_N_K@Z ?isConstOrGlobal@handler@_V1@sycl@@CA_NW4target@access@23@@Z ?isDeviceGlobalUsedInKernel@detail@_V1@sycl@@YA_NPEBX@Z -?isHostPointerReadOnly@SYCLMemObjT@detail@_V1@sycl@@UEBA_NXZ ?isImageOrImageArray@handler@_V1@sycl@@CA_NW4target@access@23@@Z -?isInterop@SYCLMemObjT@detail@_V1@sycl@@UEBA_NXZ ?isMemoryObjectUsedByGraph@AccessorBaseHost@detail@_V1@sycl@@QEBA_NXZ ?isOutOfRange@detail@_V1@sycl@@YA_NV?$vec@H$03@23@W4addressing_mode@23@V?$range@$02@23@@Z ?isPathPresent@OSUtil@detail@_V1@sycl@@SA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@Z ?isPlaceholder@AccessorBaseHost@detail@_V1@sycl@@QEBA_NXZ ?isStateExplicitKernelBundle@handler@_V1@sycl@@AEBA_NXZ -?isUsedInGraph@SYCLMemObjT@detail@_V1@sycl@@QEBA_NXZ ?isValidModeForDestinationAccessor@handler@_V1@sycl@@CA_NW4mode@access@23@@Z ?isValidModeForSourceAccessor@handler@_V1@sycl@@CA_NW4mode@access@23@@Z ?isValidTargetForExplicitOp@handler@_V1@sycl@@CA_NW4target@access@23@@Z @@ -4469,15 +4324,11 @@ ?malloc_shared@_V1@sycl@@YAPEAX_KAEBVdevice@12@AEBVcontext@12@AEBVproperty_list@12@AEBUcode_location@detail@12@@Z ?malloc_shared@_V1@sycl@@YAPEAX_KAEBVqueue@12@AEBUcode_location@detail@12@@Z ?malloc_shared@_V1@sycl@@YAPEAX_KAEBVqueue@12@AEBVproperty_list@12@AEBUcode_location@detail@12@@Z -?map@MemoryManager@detail@_V1@sycl@@SAPEAXPEAVSYCLMemObjI@234@PEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@W4mode@access@34@IV?$range@$02@34@4V?$id@$02@34@IV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@7@AEAPEAU_pi_event@@@Z ?map_external_image_memory@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@Uinterop_mem_handle@12345@AEBUimage_descriptor@12345@AEBVdevice@45@AEBVcontext@45@@Z ?map_external_image_memory@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@Uinterop_mem_handle@12345@AEBUimage_descriptor@12345@AEBVqueue@45@@Z ?map_external_memory_array@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@Uinterop_mem_handle@12345@AEBUimage_descriptor@12345@AEBVdevice@45@AEBVcontext@45@@Z ?map_external_memory_array@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@Uinterop_mem_handle@12345@AEBUimage_descriptor@12345@AEBVqueue@45@@Z -?markAsInternal@SYCLMemObjT@detail@_V1@sycl@@QEAAXXZ -?markBeingUsedInGraph@SYCLMemObjT@detail@_V1@sycl@@QEAAXXZ ?markBufferAsInternal@detail@_V1@sycl@@YAXAEBV?$shared_ptr@Vbuffer_impl@detail@_V1@sycl@@@std@@@Z -?markNoLongerBeingUsedInGraph@SYCLMemObjT@detail@_V1@sycl@@QEAAXXZ ?mem_advise@handler@_V1@sycl@@QEAAXPEBX_KH@Z ?mem_advise@queue@_V1@sycl@@QEAA?AVevent@23@PEBX_KHAEBUcode_location@detail@23@@Z ?mem_advise@queue@_V1@sycl@@QEAA?AVevent@23@PEBX_KHAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z @@ -4497,14 +4348,12 @@ ?memset@queue@_V1@sycl@@QEAA?AVevent@23@PEAXH_KAEBUcode_location@detail@23@@Z ?memset@queue@_V1@sycl@@QEAA?AVevent@23@PEAXH_KAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z ?memset@queue@_V1@sycl@@QEAA?AVevent@23@PEAXH_KV423@AEBUcode_location@detail@23@@Z -?memset_2d_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K22DV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z ?message@SYCLCategory@detail@_V1@sycl@@UEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@Z ?modf_impl@detail@_V1@sycl@@YA?AVhalf@half_impl@123@V45123@PEAV45123@@Z ?modf_impl@detail@_V1@sycl@@YAMMPEAM@Z ?modf_impl@detail@_V1@sycl@@YANNPEAN@Z ?name@SYCLCategory@detail@_V1@sycl@@UEBAPEBDXZ ?native_specialization_constant@kernel_bundle_plain@detail@_V1@sycl@@QEBA_NXZ -?needsWriteBack@SYCLMemObjT@detail@_V1@sycl@@QEBA_NXZ ?parallel_for@handler@_V1@sycl@@QEAAXV?$range@$00@23@Vkernel@23@@Z ?parallel_for@handler@_V1@sycl@@QEAAXV?$range@$01@23@Vkernel@23@@Z ?parallel_for@handler@_V1@sycl@@QEAAXV?$range@$02@23@Vkernel@23@@Z @@ -4516,7 +4365,6 @@ ?prefetch@queue@_V1@sycl@@QEAA?AVevent@23@PEBX_KAEBUcode_location@detail@23@@Z ?prefetch@queue@_V1@sycl@@QEAA?AVevent@23@PEBX_KAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z ?prefetch@queue@_V1@sycl@@QEAA?AVevent@23@PEBX_KV423@AEBUcode_location@detail@23@@Z -?prefetch_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z ?prepare_for_device_copy@experimental@oneapi@ext@_V1@sycl@@YAXPEBX_KAEBVcontext@45@@Z ?prepare_for_device_copy@experimental@oneapi@ext@_V1@sycl@@YAXPEBX_KAEBVqueue@45@@Z ?print_graph@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEBAXV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_N@Z @@ -4527,10 +4375,6 @@ ?reduGetMaxWGSize@detail@_V1@sycl@@YA_KV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K@Z ?reduGetPreferredWGSize@detail@_V1@sycl@@YA_KAEAV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K@Z ?registerDynamicParameter@handler@_V1@sycl@@AEAAXAEAVdynamic_parameter_base@detail@experimental@oneapi@ext@23@H@Z -?release@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAVSYCLMemObjI@234@PEAXV?$vector@V?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@std@@V?$allocator@V?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@std@@@2@@6@AEAPEAU_pi_event@@@Z -?releaseHostMem@SYCLMemObjT@detail@_V1@sycl@@UEAAXPEAX@Z -?releaseMem@SYCLMemObjT@detail@_V1@sycl@@UEAAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAX@Z -?releaseMemObj@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAVSYCLMemObjI@234@PEAX2@Z ?release_external_memory@experimental@oneapi@ext@_V1@sycl@@YAXUinterop_mem_handle@12345@AEBVdevice@45@AEBVcontext@45@@Z ?release_external_memory@experimental@oneapi@ext@_V1@sycl@@YAXUinterop_mem_handle@12345@AEBVqueue@45@@Z ?release_from_device_copy@experimental@oneapi@ext@_V1@sycl@@YAXPEBXAEBVcontext@45@@Z @@ -4541,12 +4385,8 @@ ?remquo_impl@detail@_V1@sycl@@YANNNPEAH@Z ?reset@filter_selector@ONEAPI@_V1@sycl@@QEBAXXZ ?reset@filter_selector@oneapi@ext@_V1@sycl@@QEBAXXZ -?resize@AccessorImplHost@detail@_V1@sycl@@QEAAX_K@Z -?resize@buffer_impl@detail@_V1@sycl@@QEAAX_K@Z ?sampledImageConstructorNotification@detail@_V1@sycl@@YAXPEAX0AEBV?$optional@W4image_target@_V1@sycl@@@std@@PEBXIAEBUcode_location@123@@Z -?sampledImageConstructorNotification@image_impl@detail@_V1@sycl@@QEAAXAEBUcode_location@234@PEAXPEBXIQEA_KW4image_format@34@AEBUimage_sampler@34@@Z ?sampledImageConstructorNotification@image_plain@detail@_V1@sycl@@IEAAXAEBUcode_location@234@PEAXPEBXIQEA_KW4image_format@34@AEBUimage_sampler@34@@Z -?sampledImageDestructorNotification@image_impl@detail@_V1@sycl@@QEAAXPEAX@Z ?sampledImageDestructorNotification@image_plain@detail@_V1@sycl@@IEAAXPEAX@Z ?saveCodeLoc@handler@_V1@sycl@@AEAAXUcode_location@detail@23@@Z ?select_device@detail@_V1@sycl@@YA?AVdevice@23@AEBV?$function@$$A6AHAEBVdevice@_V1@sycl@@@Z@std@@@Z @@ -4554,7 +4394,6 @@ ?select_device@device_selector@_V1@sycl@@UEBA?AVdevice@23@XZ ?select_device@filter_selector@ONEAPI@_V1@sycl@@UEBA?AVdevice@34@XZ ?select_device@filter_selector@oneapi@ext@_V1@sycl@@UEBA?AVdevice@45@XZ -?setAlign@SYCLMemObjT@detail@_V1@sycl@@QEAAX_K@Z ?setArgHelper@handler@_V1@sycl@@AEAAXH$$QEAVsampler@23@@Z ?setArgsHelper@handler@_V1@sycl@@AEAAXH@Z ?setHandlerKernelBundle@handler@_V1@sycl@@AEAAXAEBV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@@Z @@ -4563,15 +4402,10 @@ ?setKernelIsCooperative@handler@_V1@sycl@@AEAAX_N@Z ?setLocalAccessorArgHelper@handler@_V1@sycl@@AEAAXHAEAVLocalAccessorBaseHost@detail@23@@Z ?setNDRangeUsed@handler@_V1@sycl@@AEAAX_N@Z -?setPitches@image_impl@detail@_V1@sycl@@AEAAXAEBV?$range@$01@34@@Z -?setPitches@image_impl@detail@_V1@sycl@@AEAAXXZ ?setStateExplicitKernelBundle@handler@_V1@sycl@@AEAAXXZ ?setStateSpecConstSet@handler@_V1@sycl@@AEAAXXZ ?setType@handler@_V1@sycl@@AEAAXW4CGTYPE@CG@detail@23@@Z ?setUserFacingNodeType@handler@_V1@sycl@@AEAAXW4node_type@experimental@oneapi@ext@23@@Z -?set_final_data@SYCLMemObjT@detail@_V1@sycl@@QEAAX$$T@Z -?set_final_data@SYCLMemObjT@detail@_V1@sycl@@QEAAXAEBV?$function@$$A6AXAEBV?$function@$$A6AXPEAX@Z@std@@@Z@std@@@Z -?set_final_data_from_storage@SYCLMemObjT@detail@_V1@sycl@@QEAAXXZ ?set_final_data_internal@buffer_plain@detail@_V1@sycl@@IEAAXAEBV?$function@$$A6AXAEBV?$function@$$A6AXPEAX@Z@std@@@Z@std@@@Z ?set_final_data_internal@buffer_plain@detail@_V1@sycl@@IEAAXXZ ?set_final_data_internal@image_plain@detail@_V1@sycl@@IEAAXAEBV?$function@$$A6AXAEBV?$function@$$A6AXPEAX@Z@std@@@Z@std@@@Z @@ -4580,18 +4414,14 @@ ?set_flag@stream@_V1@sycl@@AEBAXII@Z ?set_manipulator@stream@_V1@sycl@@AEBAXW4stream_manipulator@23@@Z ?set_specialization_constant_impl@kernel_bundle_plain@detail@_V1@sycl@@IEAAXPEBDPEAX_K@Z -?set_write_back@SYCLMemObjT@detail@_V1@sycl@@QEAAX_N@Z ?set_write_back@buffer_plain@detail@_V1@sycl@@IEAAX_N@Z ?set_write_back@image_plain@detail@_V1@sycl@@IEAAX_N@Z ?sincos_impl@detail@_V1@sycl@@YA?AVhalf@half_impl@123@V45123@PEAV45123@@Z ?sincos_impl@detail@_V1@sycl@@YAMMPEAM@Z ?sincos_impl@detail@_V1@sycl@@YANNPEAN@Z ?single_task@handler@_V1@sycl@@QEAAXVkernel@23@@Z -?size@SYCLMemObjT@detail@_V1@sycl@@QEBA_KXZ ?size@exception_list@_V1@sycl@@QEBA_KXZ -?size@image_impl@detail@_V1@sycl@@QEBA_KXZ ?size@stream@_V1@sycl@@QEBA_KXZ -?size@stream_impl@detail@_V1@sycl@@QEBA_KXZ ?start@HostProfilingInfo@detail@_V1@sycl@@QEAAXXZ ?start_fusion@fusion_wrapper@experimental@codeplay@ext@_V1@sycl@@QEAAXXZ ?stringifyErrorCode@detail@_V1@sycl@@YAPEBDH@Z @@ -4605,23 +4435,16 @@ ?sycl_category@_V1@sycl@@YAAEBVerror_category@std@@XZ ?throwIfActionIsCreated@handler@_V1@sycl@@AEAAXXZ ?throw_asynchronous@queue@_V1@sycl@@QEAAXXZ -?unmap@MemoryManager@detail@_V1@sycl@@SAXPEAVSYCLMemObjI@234@PEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@1V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@7@AEAPEAU_pi_event@@@Z ?unsampledImageConstructorNotification@detail@_V1@sycl@@YAXPEAX0AEBV?$optional@W4image_target@_V1@sycl@@@std@@W4mode@access@23@PEBXIAEBUcode_location@123@@Z -?unsampledImageConstructorNotification@image_impl@detail@_V1@sycl@@QEAAXAEBUcode_location@234@PEAXPEBXIQEA_KW4image_format@34@@Z ?unsampledImageConstructorNotification@image_plain@detail@_V1@sycl@@IEAAXAEBUcode_location@234@PEAXPEBXIQEA_KW4image_format@34@@Z -?unsampledImageDestructorNotification@image_impl@detail@_V1@sycl@@QEAAXPEAX@Z ?unsampledImageDestructorNotification@image_plain@detail@_V1@sycl@@IEAAXPEAX@Z ?unset_flag@stream@_V1@sycl@@AEBAXI@Z ?update@executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAXAEBV?$command_graph@$0A@@34567@@Z ?update@executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAXAEBV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@@Z ?update@executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAXAEBVnode@34567@@Z ?updateAccessor@dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@IEAAXPEBVAccessorBaseHost@267@@Z -?updateHostMemory@SYCLMemObjT@detail@_V1@sycl@@IEAAXQEAX@Z -?updateHostMemory@SYCLMemObjT@detail@_V1@sycl@@IEAAXXZ ?updateValue@dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@IEAAXPEBX_K@Z -?useHostPtr@SYCLMemObjT@detail@_V1@sycl@@QEAA_NXZ ?use_kernel_bundle@handler@_V1@sycl@@QEAAXAEBV?$kernel_bundle@$01@23@@Z -?usesPinnedHostMemory@SYCLMemObjT@detail@_V1@sycl@@UEBA_NXZ ?verifyDeviceHasProgressGuarantee@handler@_V1@sycl@@AEAAXW4forward_progress_guarantee@experimental@oneapi@ext@23@W4execution_scope@56723@1@Z ?verifyKernelInvoc@handler@_V1@sycl@@AEAAXAEBVkernel@23@@Z ?verifyUsedKernelBundle@handler@_V1@sycl@@AEAAXAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@Z From bdd943756ab1f4195fc4ac56e6c6ba2fe364b868 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Mon, 1 Jul 2024 08:57:38 +0100 Subject: [PATCH 27/40] [UR] Bump main tag to aaf0810 (#14346) https://github.com/oneapi-src/unified-runtime/pull/1804 --------- Co-authored-by: Callum Fare --- sycl/plugins/unified_runtime/CMakeLists.txt | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 0cbbcf4a574be..252deba45219a 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -100,11 +100,12 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) endfunction() set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") - # commit 58ca3a34dea0f559b8d950bdfe7d5d8a610d3a94 - # Author: Yang Zhao - # Date: Thu Jun 27 20:26:17 2024 +0800 - # [DeviceSanitizer] Support out-of-bounds on private memory (#1676) - set(UNIFIED_RUNTIME_TAG 58ca3a34dea0f559b8d950bdfe7d5d8a610d3a94) + # commit aaf08109f2a05adb61f50478824ae2739526daee + # Author: Ben Tracy + # Date: Mon Jun 17 14:49:53 2024 +0100 + # [CMDBUF] Fix coverity issue in command buffers + # - Fix incorrect conditions for copy engine usage that were reported on coverity. + set(UNIFIED_RUNTIME_TAG aaf08109f2a05adb61f50478824ae2739526daee) fetch_adapter_source(level_zero ${UNIFIED_RUNTIME_REPO} From 2ac6184d2edb64c549668561aaa36f9accdafe51 Mon Sep 17 00:00:00 2001 From: smanna12 Date: Mon, 1 Jul 2024 00:59:18 -0700 Subject: [PATCH 28/40] [SYCL][NFC] Fix Static Analyzer bugs (#14349) This patch fixes two possible null pointer dereferences issues: 1. In CodeGenModule::Release(): Dereferencing a pointer that might be nullptr RD->getAttr() when calling getAspectsMD(). 2. In Sema::ActOnFinishFunctionBody(): Passing null pointer FD to hasAttr, which dereferences it. Signed-off-by: Soumi Manna --------- Signed-off-by: Soumi Manna --- clang/lib/CodeGen/CodeGenModule.cpp | 6 +++--- clang/lib/Sema/SemaDecl.cpp | 11 +++++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index ba02161f9c829..e9c202e4d39d2 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1380,9 +1380,9 @@ void CodeGenModule::Release() { for (const auto &Type : TypesWithAspects) { StringRef Name = Type.first; const RecordDecl *RD = Type.second; - AspectsMD->addOperand(getAspectsMD(Context, TheModule.getContext(), - Name, - RD->getAttr())); + if (const auto *Attr = RD->getAttr()) + AspectsMD->addOperand( + getAspectsMD(Context, TheModule.getContext(), Name, Attr)); } } diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index a9fa63a5e3fa9..fbafc428be6cb 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -16542,16 +16542,19 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body, DiscardCleanupsInEvaluationContext(); } - if (FD && ((LangOpts.OpenMP && (LangOpts.OpenMPIsTargetDevice || - !LangOpts.OMPTargetTriples.empty())) || - LangOpts.CUDA || LangOpts.SYCLIsDevice)) { + if (!FD) + return dcl; + + if ((LangOpts.OpenMP && + (LangOpts.OpenMPIsTargetDevice || !LangOpts.OMPTargetTriples.empty())) || + LangOpts.CUDA || LangOpts.SYCLIsDevice) { auto ES = getEmissionStatus(FD); if (ES == Sema::FunctionEmissionStatus::Emitted || ES == Sema::FunctionEmissionStatus::Unknown) DeclsToCheckForDeferredDiags.insert(FD); } - if (FD && !FD->isDeleted()) + if (!FD->isDeleted()) checkTypeSupport(FD->getType(), FD->getLocation(), FD); // Handle free functions. From 1f3f02bd0bc75af9c7d47dbf05bde4ef26279d6b Mon Sep 17 00:00:00 2001 From: Wenju He Date: Mon, 1 Jul 2024 00:59:44 -0700 Subject: [PATCH 29/40] [DeviceSanitizer] Don't instrument referenced-indirectly functions (#14298) When we create SLM __AsanLaunchInfo and store newly added kernel arg __asan_launch into the SLM, the SLM is loaded in asan report function. If instructions in referenced-indirectly function are instrumented, the report function is called. However, access to SLM in referenced- indirectly function isn't supported in intel-graphics-compiler yet. --- .../Instrumentation/AddressSanitizer.cpp | 12 ++++++++-- .../SPIRV/skip_referenced_indirectly.ll | 23 +++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Instrumentation/AddressSanitizer/SPIRV/skip_referenced_indirectly.ll diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 9e893938b3432..d6222972bedc5 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -3360,8 +3360,16 @@ bool AddressSanitizer::instrumentFunction(Function &F, if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false; if (!ClDebugFunc.empty() && ClDebugFunc == F.getName()) return false; if (F.getName().starts_with("__asan_")) return false; - if (F.getName().contains("__sycl_service_kernel__")) - return false; + + if (TargetTriple.isSPIR()) { + if (F.getName().contains("__sycl_service_kernel__")) + return false; + // Skip referenced-indirectly function as we insert access to shared local + // memory (SLM) __AsanLaunchInfo and access to SLM in referenced-indirectly + // function isn't supported yet in intel-graphics-compiler. + if (F.hasFnAttribute("referenced-indirectly")) + return false; + } bool FunctionModified = false; diff --git a/llvm/test/Instrumentation/AddressSanitizer/SPIRV/skip_referenced_indirectly.ll b/llvm/test/Instrumentation/AddressSanitizer/SPIRV/skip_referenced_indirectly.ll new file mode 100644 index 0000000000000..c2203ae7f242b --- /dev/null +++ b/llvm/test/Instrumentation/AddressSanitizer/SPIRV/skip_referenced_indirectly.ll @@ -0,0 +1,23 @@ +; RUN: opt < %s -passes=asan -asan-instrumentation-with-call-threshold=0 | FileCheck %s + +; Check referenced-indirectly function isn't instrumented. + +target triple = "spir64-unknown-unknown" + +%structtype = type { [3 x ptr addrspace(4)] } +%class.Base = type <{ ptr addrspace(4), i32, [4 x i8] }> +@_ZTV8Derived1 = linkonce_odr addrspace(1) constant %structtype { [3 x ptr addrspace(4)] [ptr addrspace(4) null, ptr addrspace(4) null, ptr addrspace(4) addrspacecast (ptr @_ZN8Derived17displayEv to ptr addrspace(4))] }, align 8, !spirv.Decorations !0 + +define linkonce_odr spir_func i32 @_ZN8Derived17displayEv(ptr addrspace(4) align 8 %this) sanitize_address "referenced-indirectly" { +entry: +; CHECK-NOT: call void @__asan_load + + %base_data = getelementptr inbounds %class.Base, ptr addrspace(4) %this, i64 0, i32 1 + %1 = load i32, ptr addrspace(4) %base_data, align 8 + ret i32 %1 +} + +!0 = !{!1, !2, !3} +!1 = !{i32 22} +!2 = !{i32 41, !"_ZTV8Derived1", i32 2} +!3 = !{i32 44, i32 8} From fd004340edb17184bb1e79c3b044a8ae710e16a1 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Mon, 1 Jul 2024 13:59:39 +0200 Subject: [PATCH 30/40] [SYCL] Discard events created with enqueue function submission (#14224) This patch makes the [enqueue free functions](https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/experimental/sycl_ext_oneapi_enqueue_functions.asciidoc) create discarded events when they do not return events. This allows calls to the backend to pass `nullptr` when enqueuing the functions, avoiding the creation of native events. This only happens under certain scenarios and currently only applies to in-order queues. However, in-order queues relies on knowing the last event when enqueuing `host_task` commands. To address this case, the `host_task` will insert a barrier when the last event was discarded and use that event for synchronization instead. This case also applies to discarded events resulting from the use of the [sycl_ext_oneapi_discard_queue_events](https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/supported/sycl_ext_oneapi_discard_queue_events.asciidoc) extension. --------- Signed-off-by: Larsen, Steffen --- .../oneapi/experimental/enqueue_functions.hpp | 44 +- sycl/include/sycl/handler.hpp | 39 ++ sycl/include/sycl/queue.hpp | 33 ++ sycl/include/sycl/reduction.hpp | 5 +- sycl/source/CMakeLists.txt | 1 + sycl/source/detail/graph_impl.cpp | 9 +- sycl/source/detail/handler_impl.hpp | 10 +- sycl/source/detail/queue_impl.cpp | 47 +- sycl/source/detail/queue_impl.hpp | 97 ++-- sycl/source/detail/scheduler/commands.cpp | 25 +- sycl/source/detail/scheduler/commands.hpp | 6 + .../source/detail/scheduler/graph_builder.cpp | 14 +- sycl/source/detail/scheduler/scheduler.cpp | 13 +- sycl/source/detail/scheduler/scheduler.hpp | 4 +- sycl/source/enqueue_functions.cpp | 42 ++ sycl/source/handler.cpp | 30 +- sycl/source/queue.cpp | 54 +- sycl/test/abi/sycl_symbols_linux.dump | 9 + sycl/test/abi/sycl_symbols_windows.dump | 7 + sycl/unittests/Extensions/CMakeLists.txt | 2 + sycl/unittests/Extensions/DiscardEvent.cpp | 83 +++ .../Extensions/EnqueueFunctionsEvents.cpp | 474 ++++++++++++++++++ sycl/unittests/helpers/PiMockPlugin.hpp | 75 ++- .../arg_mask/EliminatedArgMask.cpp | 2 +- .../scheduler/AccessorDefaultCtor.cpp | 7 +- sycl/unittests/scheduler/Commands.cpp | 3 +- .../scheduler/EnqueueWithDependsOnDeps.cpp | 5 +- sycl/unittests/scheduler/GraphCleanup.cpp | 15 +- .../scheduler/InOrderQueueSyncCheck.cpp | 34 +- sycl/unittests/scheduler/KernelFusion.cpp | 6 +- sycl/unittests/scheduler/QueueFlushing.cpp | 3 +- .../scheduler/SchedulerTestUtils.hpp | 20 +- .../scheduler/StreamInitDependencyOnHost.cpp | 10 +- 33 files changed, 1040 insertions(+), 188 deletions(-) create mode 100644 sycl/source/enqueue_functions.cpp create mode 100644 sycl/unittests/Extensions/DiscardEvent.cpp create mode 100644 sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp diff --git a/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp b/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp index 3201cf94f4065..7f10dd7f79e85 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp @@ -8,7 +8,7 @@ #pragma once -#include // for std::forward +#include #include #include @@ -72,14 +72,20 @@ template struct LaunchConfigAccess { return MLaunchConfig.getProperties(); } }; + +template +void submit_impl(queue &Q, CommandGroupFunc &&CGF, + const sycl::detail::code_location &CodeLoc) { + Q.submit_without_event(std::forward(CGF), CodeLoc); +} } // namespace detail template void submit(queue Q, CommandGroupFunc &&CGF, const sycl::detail::code_location &CodeLoc = sycl::detail::code_location::current()) { - // TODO: Use new submit without Events. - Q.submit(std::forward(CGF), CodeLoc); + sycl::ext::oneapi::experimental::detail::submit_impl( + Q, std::forward(CGF), CodeLoc); } template @@ -205,7 +211,8 @@ template Range, const KernelType &KernelObj, ReductionsT &&...Reductions) { submit(Q, [&](handler &CGH) { - nd_launch(CGH, Range, KernelObj, std::forward(Reductions)...); + nd_launch(CGH, Range, KernelObj, + std::forward(Reductions)...); }); } @@ -228,7 +235,8 @@ template , Properties> Config, const KernelType &KernelObj, ReductionsT &&...Reductions) { submit(Q, [&](handler &CGH) { - nd_launch(CGH, Config, KernelObj, std::forward(Reductions)...); + nd_launch(CGH, Config, KernelObj, + std::forward(Reductions)...); }); } @@ -270,11 +278,9 @@ inline void memcpy(handler &CGH, void *Dest, const void *Src, size_t NumBytes) { CGH.memcpy(Dest, Src, NumBytes); } -inline void memcpy(queue Q, void *Dest, const void *Src, size_t NumBytes, - const sycl::detail::code_location &CodeLoc = - sycl::detail::code_location::current()) { - submit(Q, [&](handler &CGH) { memcpy(CGH, Dest, Src, NumBytes); }, CodeLoc); -} +__SYCL_EXPORT void memcpy(queue Q, void *Dest, const void *Src, size_t NumBytes, + const sycl::detail::code_location &CodeLoc = + sycl::detail::code_location::current()); template void copy(handler &CGH, const T *Src, T *Dest, size_t Count) { @@ -292,11 +298,9 @@ inline void memset(handler &CGH, void *Ptr, int Value, size_t NumBytes) { CGH.memset(Ptr, Value, NumBytes); } -inline void memset(queue Q, void *Ptr, int Value, size_t NumBytes, - const sycl::detail::code_location &CodeLoc = - sycl::detail::code_location::current()) { - submit(Q, [&](handler &CGH) { memset(CGH, Ptr, Value, NumBytes); }, CodeLoc); -} +__SYCL_EXPORT void memset(queue Q, void *Ptr, int Value, size_t NumBytes, + const sycl::detail::code_location &CodeLoc = + sycl::detail::code_location::current()); template void fill(sycl::handler &CGH, T *Ptr, const T &Pattern, size_t Count) { @@ -324,13 +328,9 @@ inline void mem_advise(handler &CGH, void *Ptr, size_t NumBytes, int Advice) { CGH.mem_advise(Ptr, NumBytes, Advice); } -inline void mem_advise(queue Q, void *Ptr, size_t NumBytes, int Advice, - const sycl::detail::code_location &CodeLoc = - sycl::detail::code_location::current()) { - submit( - Q, [&](handler &CGH) { mem_advise(CGH, Ptr, NumBytes, Advice); }, - CodeLoc); -} +__SYCL_EXPORT void mem_advise(queue Q, void *Ptr, size_t NumBytes, int Advice, + const sycl::detail::code_location &CodeLoc = + sycl::detail::code_location::current()); inline void barrier(handler &CGH) { CGH.ext_oneapi_barrier(); } diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 2b313c8834443..0d3e757c175b0 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -465,6 +465,7 @@ class __SYCL_EXPORT handler { /// /// \param Queue is a SYCL queue. /// \param IsHost indicates if this handler is created for SYCL host device. + /// TODO: Unused. Remove with ABI break. handler(std::shared_ptr Queue, bool IsHost); /// Constructs SYCL handler from the associated queue and the submission's @@ -476,10 +477,36 @@ class __SYCL_EXPORT handler { /// \param SecondaryQueue is the secondary SYCL queue of the submission. This /// is null if no secondary queue is associated with the submission. /// \param IsHost indicates if this handler is created for SYCL host device. + /// TODO: Unused. Remove with ABI break. handler(std::shared_ptr Queue, std::shared_ptr PrimaryQueue, std::shared_ptr SecondaryQueue, bool IsHost); + /// Constructs SYCL handler from queue. + /// + /// \param Queue is a SYCL queue. + /// \param IsHost indicates if this handler is created for SYCL host device. + /// \param CallerNeedsEvent indicates if the event resulting from this handler + /// is needed by the caller. + handler(std::shared_ptr Queue, bool IsHost, + bool CallerNeedsEvent); + + /// Constructs SYCL handler from the associated queue and the submission's + /// primary and secondary queue. + /// + /// \param Queue is a SYCL queue. This is equal to either PrimaryQueue or + /// SecondaryQueue. + /// \param PrimaryQueue is the primary SYCL queue of the submission. + /// \param SecondaryQueue is the secondary SYCL queue of the submission. This + /// is null if no secondary queue is associated with the submission. + /// \param IsHost indicates if this handler is created for SYCL host device. + /// \param CallerNeedsEvent indicates if the event resulting from this handler + /// is needed by the caller. + handler(std::shared_ptr Queue, + std::shared_ptr PrimaryQueue, + std::shared_ptr SecondaryQueue, bool IsHost, + bool CallerNeedsEvent); + /// Constructs SYCL handler from Graph. /// /// The hander will add the command-group as a node to the graph rather than @@ -575,6 +602,16 @@ class __SYCL_EXPORT handler { /// \return a SYCL event object representing the command group event finalize(); + /// Constructs CG object of specific type, passes it to Scheduler and + /// returns sycl::event object representing the command group. + /// It's expected that the method is the latest method executed before + /// object destruction. + /// \param CallerNeedsEvent Specifies if the caller needs an event + /// representing the work related to this handler. + /// + /// \return a SYCL event object representing the command group + event finalize(bool CallerNeedsEvent); + /// Saves streams associated with this handler. /// /// Streams are then forwarded to command group and flushed in the scheduler. @@ -1184,6 +1221,8 @@ class __SYCL_EXPORT handler { Size == 32 || Size == 64 || Size == 128; } + bool eventNeeded() const; + template struct TransformUserItemType { using type = std::conditional_t< std::is_convertible_v, LambdaArgType>, nd_item, diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index 4d32218ab09d4..04b6969fe2b12 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -95,6 +95,12 @@ namespace ext ::oneapi ::experimental { // returned by info::queue::state enum class queue_state { executing, recording }; struct image_descriptor; + +namespace detail { +template +void submit_impl(queue &Q, CommandGroupFunc &&CGF, + const sycl::detail::code_location &CodeLoc); +} // namespace detail } // namespace ext::oneapi::experimental /// Encapsulates a single SYCL queue which schedules kernels on a SYCL device. @@ -2689,6 +2695,11 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { const detail::code_location &); #endif + template + friend void ext::oneapi::experimental::detail::submit_impl( + queue &Q, CommandGroupFunc &&CGF, + const sycl::detail::code_location &CodeLoc); + /// A template-free version of submit. event submit_impl(std::function CGH, const detail::code_location &CodeLoc); @@ -2696,6 +2707,28 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { event submit_impl(std::function CGH, queue secondQueue, const detail::code_location &CodeLoc); + /// A template-free version of submit_without_event. + void submit_without_event_impl(std::function CGH, + const detail::code_location &CodeLoc); + + /// Submits a command group function object to the queue, in order to be + /// scheduled for execution on the device. + /// + /// \param CGF is a function object containing command group. + /// \param CodeLoc is the code location of the submit call (default argument) + template + std::enable_if_t, void> + submit_without_event(T CGF, const detail::code_location &CodeLoc) { + detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); +#if __SYCL_USE_FALLBACK_ASSERT + // If post-processing is needed, fall back to the regular submit. + // TODO: Revisit whether we can avoid this. + submit(CGF, CodeLoc); +#else + submit_without_event_impl(CGF, CodeLoc); +#endif // __SYCL_USE_FALLBACK_ASSERT + } + /// Checks if the event needs to be discarded and if so, discards it and /// returns a discarded event. Otherwise, it returns input event. /// TODO: move to impl class in the next ABI Breaking window diff --git a/sycl/include/sycl/reduction.hpp b/sycl/include/sycl/reduction.hpp index ff955ce8b9eda..82c4448e37836 100644 --- a/sycl/include/sycl/reduction.hpp +++ b/sycl/include/sycl/reduction.hpp @@ -1178,8 +1178,9 @@ namespace reduction { inline void finalizeHandler(handler &CGH) { CGH.finalize(); } template void withAuxHandler(handler &CGH, FunctorTy Func) { event E = CGH.finalize(); - handler AuxHandler(CGH.MQueue, CGH.MIsHost); - AuxHandler.depends_on(E); + handler AuxHandler(CGH.MQueue, CGH.MIsHost, CGH.eventNeeded()); + if (!createSyclObjFromImpl(CGH.MQueue).is_in_order()) + AuxHandler.depends_on(E); AuxHandler.saveCodeLoc(CGH.MCodeLoc); Func(AuxHandler); CGH.MLastEvent = AuxHandler.finalize(); diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index 7ef8ff587f0e2..74497db20c9f1 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -248,6 +248,7 @@ set(SYCL_COMMON_SOURCES "context.cpp" "device.cpp" "device_selector.cpp" + "enqueue_functions.cpp" "event.cpp" "exception.cpp" "exception_list.cpp" diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 3268a27fbb827..09ccef30dacd2 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -690,7 +690,8 @@ sycl::detail::pi::PiExtSyncPoint exec_graph_impl::enqueueNode( sycl::detail::EventImplPtr Event = sycl::detail::Scheduler::getInstance().addCG( - Node->getCGCopy(), AllocaQueue, CommandBuffer, Deps); + Node->getCGCopy(), AllocaQueue, /*EventNeeded=*/true, CommandBuffer, + Deps); MCommandMap[Node] = Event->getCommandBufferCommand(); return Event->getSyncPoint(); @@ -928,7 +929,7 @@ exec_graph_impl::enqueue(const std::shared_ptr &Queue, CommandBuffer, nullptr, std::move(CGData)); NewEvent = sycl::detail::Scheduler::getInstance().addCG( - std::move(CommandGroup), Queue); + std::move(CommandGroup), Queue, /*EventNeeded=*/true); } NewEvent->setEventFromSubmittedExecCommandBuffer(true); } else if ((CurrentPartition->MSchedule.size() > 0) && @@ -946,7 +947,7 @@ exec_graph_impl::enqueue(const std::shared_ptr &Queue, .MQueue = Queue; NewEvent = sycl::detail::Scheduler::getInstance().addCG( - NodeImpl->getCGCopy(), Queue); + NodeImpl->getCGCopy(), Queue, /*EventNeeded=*/true); } else { std::vector> ScheduledEvents; for (auto &NodeImpl : CurrentPartition->MSchedule) { @@ -982,7 +983,7 @@ exec_graph_impl::enqueue(const std::shared_ptr &Queue, // dependencies are propagated in findRealDeps sycl::detail::EventImplPtr EventImpl = sycl::detail::Scheduler::getInstance().addCG( - NodeImpl->getCGCopy(), Queue); + NodeImpl->getCGCopy(), Queue, /*EventNeeded=*/true); ScheduledEvents.push_back(EventImpl); } diff --git a/sycl/source/detail/handler_impl.hpp b/sycl/source/detail/handler_impl.hpp index f50c5c94b78d4..f0df55d5e069b 100644 --- a/sycl/source/detail/handler_impl.hpp +++ b/sycl/source/detail/handler_impl.hpp @@ -31,9 +31,11 @@ enum class HandlerSubmissionState : std::uint8_t { class handler_impl { public: handler_impl(std::shared_ptr SubmissionPrimaryQueue, - std::shared_ptr SubmissionSecondaryQueue) + std::shared_ptr SubmissionSecondaryQueue, + bool EventNeeded) : MSubmissionPrimaryQueue(std::move(SubmissionPrimaryQueue)), - MSubmissionSecondaryQueue(std::move(SubmissionSecondaryQueue)){}; + MSubmissionSecondaryQueue(std::move(SubmissionSecondaryQueue)), + MEventNeeded(EventNeeded) {}; handler_impl() = default; @@ -74,6 +76,10 @@ class handler_impl { /// submission is a fallback from a previous submission. std::shared_ptr MSubmissionSecondaryQueue; + /// Bool stores information about whether the event resulting from the + /// corresponding work is required. + bool MEventNeeded = true; + // Stores auxiliary resources used by internal operations. std::vector> MAuxiliaryResources; diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 8865c342646eb..5b873039cd4a1 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -146,7 +146,8 @@ queue_impl::getExtendDependencyList(const std::vector &DepEvents, event queue_impl::memset(const std::shared_ptr &Self, void *Ptr, int Value, size_t Count, - const std::vector &DepEvents) { + const std::vector &DepEvents, + bool CallerNeedsEvent) { #if XPTI_ENABLE_INSTRUMENTATION // We need a code pointer value and we use the object ptr; if code location // information is available, we will have function name and source file @@ -173,7 +174,8 @@ event queue_impl::memset(const std::shared_ptr &Self, #endif return submitMemOpHelper( - Self, DepEvents, [&](handler &CGH) { CGH.memset(Ptr, Value, Count); }, + Self, DepEvents, CallerNeedsEvent, + [&](handler &CGH) { CGH.memset(Ptr, Value, Count); }, [](const auto &...Args) { MemoryManager::fill_usm(Args...); }, Ptr, Self, Count, Value); } @@ -194,7 +196,7 @@ void report(const code_location &CodeLoc) { event queue_impl::memcpy(const std::shared_ptr &Self, void *Dest, const void *Src, size_t Count, const std::vector &DepEvents, - const code_location &CodeLoc) { + bool CallerNeedsEvent, const code_location &CodeLoc) { #if XPTI_ENABLE_INSTRUMENTATION // We need a code pointer value and we duse the object ptr; If code location // is available, we use the source file information along with the object @@ -225,7 +227,8 @@ event queue_impl::memcpy(const std::shared_ptr &Self, PI_ERROR_INVALID_VALUE); } return submitMemOpHelper( - Self, DepEvents, [&](handler &CGH) { CGH.memcpy(Dest, Src, Count); }, + Self, DepEvents, CallerNeedsEvent, + [&](handler &CGH) { CGH.memcpy(Dest, Src, Count); }, [](const auto &...Args) { MemoryManager::copy_usm(Args...); }, Src, Self, Count, Dest); } @@ -233,9 +236,10 @@ event queue_impl::memcpy(const std::shared_ptr &Self, event queue_impl::mem_advise(const std::shared_ptr &Self, const void *Ptr, size_t Length, pi_mem_advice Advice, - const std::vector &DepEvents) { + const std::vector &DepEvents, + bool CallerNeedsEvent) { return submitMemOpHelper( - Self, DepEvents, + Self, DepEvents, CallerNeedsEvent, [&](handler &CGH) { CGH.mem_advise(Ptr, Length, Advice); }, [](const auto &...Args) { MemoryManager::advise_usm(Args...); }, Ptr, Self, Length, Advice); @@ -244,9 +248,9 @@ event queue_impl::mem_advise(const std::shared_ptr &Self, event queue_impl::memcpyToDeviceGlobal( const std::shared_ptr &Self, void *DeviceGlobalPtr, const void *Src, bool IsDeviceImageScope, size_t NumBytes, size_t Offset, - const std::vector &DepEvents) { + const std::vector &DepEvents, bool CallerNeedsEvent) { return submitMemOpHelper( - Self, DepEvents, + Self, DepEvents, CallerNeedsEvent, [&](handler &CGH) { CGH.memcpyToDeviceGlobal(DeviceGlobalPtr, Src, IsDeviceImageScope, NumBytes, Offset); @@ -260,9 +264,9 @@ event queue_impl::memcpyToDeviceGlobal( event queue_impl::memcpyFromDeviceGlobal( const std::shared_ptr &Self, void *Dest, const void *DeviceGlobalPtr, bool IsDeviceImageScope, size_t NumBytes, - size_t Offset, const std::vector &DepEvents) { + size_t Offset, const std::vector &DepEvents, bool CallerNeedsEvent) { return submitMemOpHelper( - Self, DepEvents, + Self, DepEvents, CallerNeedsEvent, [&](handler &CGH) { CGH.memcpyFromDeviceGlobal(Dest, DeviceGlobalPtr, IsDeviceImageScope, NumBytes, Offset); @@ -348,9 +352,11 @@ event queue_impl::submit_impl(const std::function &CGF, const std::shared_ptr &Self, const std::shared_ptr &PrimaryQueue, const std::shared_ptr &SecondaryQueue, + bool CallerNeedsEvent, const detail::code_location &Loc, const SubmitPostProcessF *PostProcess) { - handler Handler(Self, PrimaryQueue, SecondaryQueue, MHostQueue); + handler Handler(Self, PrimaryQueue, SecondaryQueue, MHostQueue, + CallerNeedsEvent); Handler.saveCodeLoc(Loc); { @@ -399,6 +405,7 @@ event queue_impl::submitWithHandler(const std::shared_ptr &Self, template event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, const std::vector &DepEvents, + bool CallerNeedsEvent, HandlerFuncT HandlerFunc, MemOpFuncT MemOpFunc, MemOpArgTs... MemOpArgs) { @@ -415,7 +422,8 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, // handler rather than by-passing the scheduler. if (MGraph.expired() && Scheduler::areEventsSafeForSchedulerBypass( ExpandedDepEvents, MContext)) { - if (MSupportsDiscardingPiEvents) { + if ((MDiscardEvents || !CallerNeedsEvent) && + supportsDiscardingPiEvents()) { NestedCallsTracker tracker; MemOpFunc(MemOpArgs..., getPIEvents(ExpandedDepEvents), /*PiEvent*/ nullptr, /*EventImplPtr*/ nullptr); @@ -646,10 +654,17 @@ bool queue_impl::ext_oneapi_empty() const { // the status of the last event. if (isInOrder() && !MDiscardEvents) { std::lock_guard Lock(MMutex); - return !MDefaultGraphDeps.LastEventPtr || - MDefaultGraphDeps.LastEventPtr - ->get_info() == - info::event_command_status::complete; + // If there is no last event we know that no work has been submitted, so it + // must be trivially empty. + if (!MDefaultGraphDeps.LastEventPtr) + return true; + // Otherwise, check if the last event is finished. + // Note that we fall back to the backend query if the event was discarded, + // which may happend despite the queue not being a discard event queue. + if (!MDefaultGraphDeps.LastEventPtr->isDiscarded()) + return MDefaultGraphDeps.LastEventPtr + ->get_info() == + info::event_command_status::complete; } // Check the status of the backend queue if this is not a host queue. diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index d87db1d7ef2e5..db586fda5bf3e 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -111,8 +111,6 @@ class queue_impl { MDiscardEvents( has_property()), MIsProfilingEnabled(has_property()), - MSupportsDiscardingPiEvents(MDiscardEvents && - (MHostQueue ? true : MIsInorder)), MQueueID{ MNextAvailableQueueID.fetch_add(1, std::memory_order_relaxed)} { if (has_property()) { @@ -292,8 +290,6 @@ class queue_impl { MDiscardEvents( has_property()), MIsProfilingEnabled(has_property()), - MSupportsDiscardingPiEvents(MDiscardEvents && - (MHostQueue ? true : MIsInorder)), MQueueID{ MNextAvailableQueueID.fetch_add(1, std::memory_order_relaxed)} { queue_impl_interop(PiQueue); @@ -313,9 +309,7 @@ class queue_impl { MIsInorder(has_property()), MDiscardEvents( has_property()), - MIsProfilingEnabled(has_property()), - MSupportsDiscardingPiEvents(MDiscardEvents && - (MHostQueue ? true : MIsInorder)) { + MIsProfilingEnabled(has_property()) { queue_impl_interop(PiQueue); } @@ -370,9 +364,12 @@ class queue_impl { /// \return true if this queue is a SYCL host queue. bool is_host() const { return MHostQueue; } - /// \return true if this queue has discard_events support. + /// \return true if the discard event property was set at time of creation. + bool hasDiscardEventsProperty() const { return MDiscardEvents; } + + /// \return true if this queue allows for discarded events. bool supportsDiscardingPiEvents() const { - return MSupportsDiscardingPiEvents; + return MHostQueue ? true : MIsInorder; } bool isInOrder() const { return MIsInorder; } @@ -424,10 +421,12 @@ class queue_impl { const SubmitPostProcessF *PostProcess = nullptr) { event ResEvent; try { - ResEvent = submit_impl(CGF, Self, Self, SecondQueue, Loc, PostProcess); + ResEvent = submit_impl(CGF, Self, Self, SecondQueue, + /*CallerNeedsEvent=*/true, Loc, PostProcess); } catch (...) { - ResEvent = SecondQueue->submit_impl(CGF, SecondQueue, Self, SecondQueue, - Loc, PostProcess); + ResEvent = + SecondQueue->submit_impl(CGF, SecondQueue, Self, SecondQueue, + /*CallerNeedsEvent=*/true, Loc, PostProcess); } return discard_or_return(ResEvent); } @@ -444,10 +443,19 @@ class queue_impl { const std::shared_ptr &Self, const detail::code_location &Loc, const SubmitPostProcessF *PostProcess = nullptr) { - auto ResEvent = submit_impl(CGF, Self, Self, nullptr, Loc, PostProcess); + auto ResEvent = submit_impl(CGF, Self, Self, nullptr, + /*CallerNeedsEvent=*/true, Loc, PostProcess); return discard_or_return(ResEvent); } + void submit_without_event(const std::function &CGF, + const std::shared_ptr &Self, + const detail::code_location &Loc, + const SubmitPostProcessF *PostProcess = nullptr) { + submit_impl(CGF, Self, Self, nullptr, /*CallerNeedsEvent=*/false, Loc, + PostProcess); + } + /// Performs a blocking wait for the completion of all enqueued tasks in the /// queue. /// @@ -651,9 +659,11 @@ class queue_impl { /// \param Count is a number of bytes to fill. /// \param DepEvents is a vector of events that specifies the kernel /// dependencies. + /// \param CallerNeedsEvent specifies if the caller expects a usable event. /// \return an event representing fill operation. event memset(const std::shared_ptr &Self, void *Ptr, int Value, - size_t Count, const std::vector &DepEvents); + size_t Count, const std::vector &DepEvents, + bool CallerNeedsEvent); /// Copies data from one memory region to another, both pointed by /// USM pointers. /// @@ -663,10 +673,11 @@ class queue_impl { /// \param Count is a number of bytes to copy. /// \param DepEvents is a vector of events that specifies the kernel /// dependencies. + /// \param CallerNeedsEvent specifies if the caller expects a usable event. /// \return an event representing copy operation. event memcpy(const std::shared_ptr &Self, void *Dest, const void *Src, size_t Count, - const std::vector &DepEvents, + const std::vector &DepEvents, bool CallerNeedsEvent, const code_location &CodeLoc); /// Provides additional information to the underlying runtime about how /// different allocations are used. @@ -677,10 +688,11 @@ class queue_impl { /// \param Advice is a device-defined advice for the specified allocation. /// \param DepEvents is a vector of events that specifies the kernel /// dependencies. + /// \param CallerNeedsEvent specifies if the caller expects a usable event. /// \return an event representing advise operation. event mem_advise(const std::shared_ptr &Self, const void *Ptr, size_t Length, pi_mem_advice Advice, - const std::vector &DepEvents); + const std::vector &DepEvents, bool CallerNeedsEvent); /// Puts exception to the list of asynchronous ecxeptions. /// @@ -718,13 +730,14 @@ class queue_impl { event memcpyToDeviceGlobal(const std::shared_ptr &Self, void *DeviceGlobalPtr, const void *Src, bool IsDeviceImageScope, size_t NumBytes, - size_t Offset, - const std::vector &DepEvents); + size_t Offset, const std::vector &DepEvents, + bool CallerNeedsEvent); event memcpyFromDeviceGlobal(const std::shared_ptr &Self, void *Dest, const void *DeviceGlobalPtr, bool IsDeviceImageScope, size_t NumBytes, size_t Offset, - const std::vector &DepEvents); + const std::vector &DepEvents, + bool CallerNeedsEvent); bool isProfilingFallback() { return MFallbackProfiling; } @@ -780,6 +793,14 @@ class queue_impl { // Hook to the scheduler to clean up any fusion command held on destruction. void cleanup_fusion_cmd(); + template + EventImplPtr insertHelperBarrier(const HandlerType &Handler) { + auto ResEvent = std::make_shared(Handler.MQueue); + getPlugin()->call( + Handler.MQueue->getHandleRef(), 0, nullptr, &ResEvent->getHandleRef()); + return ResEvent; + } + // template is needed for proper unit testing template void finalizeHandler(HandlerType &Handler, event &EventRet) { @@ -787,6 +808,10 @@ class queue_impl { // Accessing and changing of an event isn't atomic operation. // Hence, here is the lock for thread-safety. std::lock_guard Lock{MMutex}; + + auto &EventToBuildDeps = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr + : MExtGraphDeps.LastEventPtr; + // This dependency is needed for the following purposes: // - host tasks are handled by the runtime and cannot be implicitly // synchronized by the backend. @@ -794,11 +819,19 @@ class queue_impl { // by a host task. This dependency allows to build the enqueue order in // the RT but will not be passed to the backend. See getPIEvents in // Command. - - auto &EventToBuildDeps = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr - : MExtGraphDeps.LastEventPtr; - if (EventToBuildDeps) - Handler.depends_on(EventToBuildDeps); + if (EventToBuildDeps) { + // In the case where the last event was discarded and we are to run a + // host_task, we insert a barrier into the queue and use the resulting + // event as the dependency for the host_task. + // Note that host_task events can never be discarded, so this will not + // insert barriers between host_task enqueues. + if (EventToBuildDeps->isDiscarded() && + Handler.getType() == CG::CodeplayHostTask) + EventToBuildDeps = insertHelperBarrier(Handler); + + if (!EventToBuildDeps->isDiscarded()) + Handler.depends_on(EventToBuildDeps); + } // If there is an external event set, add it as a dependency and clear it. // We do not need to hold the lock as MLastEventMtx will ensure the last @@ -852,13 +885,15 @@ class queue_impl { /// same as Self. /// \param SecondaryQueue is a pointer to the secondary queue. This may be the /// same as Self. + /// \param CallerNeedsEvent is a boolean indicating whether the event is + /// required by the user after the call. /// \param Loc is the code location of the submit call (default argument) /// \return a SYCL event representing submitted command group. event submit_impl(const std::function &CGF, const std::shared_ptr &Self, const std::shared_ptr &PrimaryQueue, const std::shared_ptr &SecondaryQueue, - const detail::code_location &Loc, + bool CallerNeedsEvent, const detail::code_location &Loc, const SubmitPostProcessF *PostProcess); /// Helper function for submitting a memory operation with a handler. @@ -876,6 +911,8 @@ class queue_impl { /// /// \param Self is a shared_ptr to this queue. /// \param DepEvents is a vector of dependencies of the operation. + /// \param CallerNeedsEvent specifies if the caller needs an event from this + /// memory operation. /// \param HandlerFunc is a function that submits the operation with a /// handler. /// \param MemMngrFunc is a function that forwards its arguments to the @@ -888,8 +925,8 @@ class queue_impl { typename... MemMngrArgTs> event submitMemOpHelper(const std::shared_ptr &Self, const std::vector &DepEvents, - HandlerFuncT HandlerFunc, MemMngrFuncT MemMngrFunc, - MemMngrArgTs... MemOpArgs); + bool CallerNeedsEvent, HandlerFuncT HandlerFunc, + MemMngrFuncT MemMngrFunc, MemMngrArgTs... MemOpArgs); // When instrumentation is enabled emits trace event for wait begin and // returns the telemetry event generated for the wait @@ -987,12 +1024,6 @@ class queue_impl { const bool MIsProfilingEnabled; protected: - // Indicates whether the queue supports discarding PI events for tasks - // submitted to it. This condition is necessary but not sufficient, PI events - // should be discarded only if they also don't represent potential implicit - // dependencies for future tasks in other queues. - const bool MSupportsDiscardingPiEvents; - // Command graph which is associated with this queue for the purposes of // recording commands to it. std::weak_ptr MGraph{}; diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 78c52327ff289..6ea0fc569bced 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1926,11 +1926,11 @@ static std::string_view cgTypeToString(detail::CG::CGTYPE Type) { ExecCGCommand::ExecCGCommand( std::unique_ptr CommandGroup, QueueImplPtr Queue, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer, + bool EventNeeded, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, const std::vector &Dependencies) : Command(CommandType::RUN_CG, std::move(Queue), CommandBuffer, Dependencies), - MCommandGroup(std::move(CommandGroup)) { + MEventNeeded(EventNeeded), MCommandGroup(std::move(CommandGroup)) { if (MCommandGroup->getType() == detail::CG::CodeplayHostTask) { MEvent->setSubmittedQueue( static_cast(MCommandGroup.get())->MQueue); @@ -2758,11 +2758,15 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { Plugin->call(RawEvents.size(), &RawEvents[0]); } + // We can omit creating a PI event and create a "discarded" event if either + // the queue has the discard property or the command has been explicitly + // marked as not needing an event, e.g. if the user did not ask for one, and + // if the queue supports discarded PI event and there are no requirements. + bool DiscardPiEvent = (MQueue->MDiscardEvents || !MEventNeeded) && + MQueue->supportsDiscardingPiEvents() && + MCommandGroup->getRequirements().size() == 0; sycl::detail::pi::PiEvent *Event = - (MQueue->supportsDiscardingPiEvents() && - MCommandGroup->getRequirements().size() == 0) - ? nullptr - : &MEvent->getHandleRef(); + DiscardPiEvent ? nullptr : &MEvent->getHandleRef(); sycl::detail::pi::PiExtSyncPoint OutSyncPoint; sycl::detail::pi::PiExtCommandBufferCommand OutCommand = nullptr; switch (MCommandGroup->getType()) { @@ -2909,8 +2913,13 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { auto RawEvents = getPiEvents(EventImpls); flushCrossQueueDeps(EventImpls, getWorkerQueue()); - bool DiscardPiEvent = (MQueue->supportsDiscardingPiEvents() && - MCommandGroup->getRequirements().size() == 0); + // We can omit creating a PI event and create a "discarded" event if either + // the queue has the discard property or the command has been explicitly + // marked as not needing an event, e.g. if the user did not ask for one, and + // if the queue supports discarded PI event and there are no requirements. + bool DiscardPiEvent = (MQueue->MDiscardEvents || !MEventNeeded) && + MQueue->supportsDiscardingPiEvents() && + MCommandGroup->getRequirements().size() == 0; sycl::detail::pi::PiEvent *Event = DiscardPiEvent ? nullptr : &MEvent->getHandleRef(); detail::EventImplPtr EventImpl = DiscardPiEvent ? nullptr : MEvent; diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 8ba0cceee9e6a..ea1a5b5111149 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -648,6 +648,7 @@ class ExecCGCommand : public Command { public: ExecCGCommand( std::unique_ptr CommandGroup, QueueImplPtr Queue, + bool EventNeeded, sycl::detail::pi::PiExtCommandBuffer CommandBuffer = nullptr, const std::vector &Dependencies = {}); @@ -672,6 +673,11 @@ class ExecCGCommand : public Command { // necessary. KernelFusionCommand *MFusionCmd = nullptr; + // MEventNeeded is true if the command needs to produce a valid event. The + // implementation may elect to not produce events (native or SYCL) if this + // is false. + bool MEventNeeded = true; + bool producesPiEvent() const final; bool supportsPostEnqueueCleanup() const final; diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index f8c2bb27855e3..a17c45720733c 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -943,14 +943,15 @@ static void combineAccessModesOfReqs(std::vector &Reqs) { Scheduler::GraphBuildResult Scheduler::GraphBuilder::addCG( std::unique_ptr CommandGroup, const QueueImplPtr &Queue, - std::vector &ToEnqueue, + std::vector &ToEnqueue, bool EventNeeded, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, const std::vector &Dependencies) { std::vector &Reqs = CommandGroup->getRequirements(); std::vector &Events = CommandGroup->getEvents(); - auto NewCmd = std::make_unique( - std::move(CommandGroup), Queue, CommandBuffer, std::move(Dependencies)); + auto NewCmd = std::make_unique(std::move(CommandGroup), Queue, + EventNeeded, CommandBuffer, + std::move(Dependencies)); if (!NewCmd) throw runtime_error("Out of host memory", PI_ERROR_OUT_OF_HOST_MEMORY); @@ -1350,7 +1351,8 @@ Command *Scheduler::GraphBuilder::connectDepEvent( CG::CodeplayHostTask, /* Payload */ {})); ConnectCmd = new ExecCGCommand( - std::move(ConnectCG), Scheduler::getInstance().getDefaultHostQueue()); + std::move(ConnectCG), Scheduler::getInstance().getDefaultHostQueue(), + /*EventNeeded=*/true); } catch (const std::bad_alloc &) { throw runtime_error("Out of host memory", PI_ERROR_OUT_OF_HOST_MEMORY); } @@ -1624,8 +1626,8 @@ Scheduler::GraphBuilder::completeFusion(QueueImplPtr Queue, }), FusedEventDeps.end()); - auto FusedKernelCmd = - std::make_unique(std::move(FusedCG), Queue); + auto FusedKernelCmd = std::make_unique( + std::move(FusedCG), Queue, /*EventNeeded=*/true); // Inherit auxiliary resources from fused command groups Scheduler::getInstance().takeAuxiliaryResources(FusedKernelCmd->getEvent(), diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 99975edb7d649..78fd300460526 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -94,7 +94,7 @@ void Scheduler::waitForRecordToFinish(MemObjRecord *Record, EventImplPtr Scheduler::addCG( std::unique_ptr CommandGroup, const QueueImplPtr &Queue, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer, + bool EventNeeded, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, const std::vector &Dependencies) { EventImplPtr NewEvent = nullptr; const CG::CGTYPE Type = CommandGroup->getType(); @@ -130,17 +130,18 @@ EventImplPtr Scheduler::addCG( NewEvent = NewCmd->getEvent(); break; case CG::CodeplayHostTask: { - auto Result = MGraphBuilder.addCG(std::move(CommandGroup), - DefaultHostQueue, AuxiliaryCmds); + auto Result = + MGraphBuilder.addCG(std::move(CommandGroup), DefaultHostQueue, + AuxiliaryCmds, EventNeeded); NewCmd = Result.NewCmd; NewEvent = Result.NewEvent; ShouldEnqueue = Result.ShouldEnqueue; break; } default: - auto Result = MGraphBuilder.addCG(std::move(CommandGroup), - std::move(Queue), AuxiliaryCmds, - CommandBuffer, std::move(Dependencies)); + auto Result = MGraphBuilder.addCG( + std::move(CommandGroup), std::move(Queue), AuxiliaryCmds, EventNeeded, + CommandBuffer, std::move(Dependencies)); NewCmd = Result.NewCmd; NewEvent = Result.NewEvent; diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 9ce3d7d2a5f94..124fc1181116c 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -372,6 +372,7 @@ class Scheduler { /// /// \param CommandGroup is a unique_ptr to a command group to be added. /// \param Queue Queue that is registering the command-group. + /// \param EventNeeded Specifies whether an event is explicitly required. /// \param CommandBuffer Optional command buffer to enqueue to instead of /// directly to the queue. /// \param Dependencies Optional list of dependency @@ -379,6 +380,7 @@ class Scheduler { /// \return an event object to wait on for command group completion. EventImplPtr addCG(std::unique_ptr CommandGroup, const QueueImplPtr &Queue, + bool EventNeeded, sycl::detail::pi::PiExtCommandBuffer CommandBuffer = nullptr, const std::vector &Dependencies = {}); @@ -602,7 +604,7 @@ class Scheduler { /// processor right away or not. GraphBuildResult addCG( std::unique_ptr CommandGroup, const QueueImplPtr &Queue, - std::vector &ToEnqueue, + std::vector &ToEnqueue, bool EventNeeded, sycl::detail::pi::PiExtCommandBuffer CommandBuffer = nullptr, const std::vector &Dependencies = {}); diff --git a/sycl/source/enqueue_functions.cpp b/sycl/source/enqueue_functions.cpp new file mode 100644 index 0000000000000..b2e4f3f712f4b --- /dev/null +++ b/sycl/source/enqueue_functions.cpp @@ -0,0 +1,42 @@ +//==------ enqueue_functions.hpp ------- SYCL enqueue free functions -------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +namespace sycl { +inline namespace _V1 { +namespace ext::oneapi::experimental { + +__SYCL_EXPORT void memcpy(queue Q, void *Dest, const void *Src, size_t NumBytes, + const sycl::detail::code_location &CodeLoc) { + sycl::detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + auto QueueImplPtr = sycl::detail::getSyclObjImpl(Q); + QueueImplPtr->memcpy(QueueImplPtr, Dest, Src, NumBytes, {}, + /*CallerNeedsEvent=*/false, CodeLoc); +} + +__SYCL_EXPORT void memset(queue Q, void *Ptr, int Value, size_t NumBytes, + const sycl::detail::code_location &CodeLoc) { + sycl::detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + auto QueueImplPtr = sycl::detail::getSyclObjImpl(Q); + QueueImplPtr->memset(QueueImplPtr, Ptr, Value, NumBytes, {}, + /*CallerNeedsEvent=*/false); +} + +__SYCL_EXPORT void mem_advise(queue Q, void *Ptr, size_t NumBytes, int Advice, + const sycl::detail::code_location &CodeLoc) { + sycl::detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + auto QueueImplPtr = sycl::detail::getSyclObjImpl(Q); + QueueImplPtr->mem_advise(QueueImplPtr, Ptr, NumBytes, pi_mem_advice(Advice), + {}, /*CallerNeedsEvent=*/false); +} + +} // namespace ext::oneapi::experimental +} // namespace _V1 +} // namespace sycl diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index b16441e4ff146..10ce364310912 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -80,15 +80,29 @@ void *getValueFromDynamicParameter( } // namespace detail +/// TODO: Unused. Remove with ABI break. handler::handler(std::shared_ptr Queue, bool IsHost) - : handler(Queue, Queue, nullptr, IsHost) {} + : handler(Queue, IsHost, /*CallerNeedsEvent=*/true) {} +/// TODO: Unused. Remove with ABI break. handler::handler(std::shared_ptr Queue, std::shared_ptr PrimaryQueue, std::shared_ptr SecondaryQueue, bool IsHost) + : handler(Queue, PrimaryQueue, SecondaryQueue, IsHost, + /*CallerNeedsEvent=*/true) {} + +handler::handler(std::shared_ptr Queue, bool IsHost, + bool CallerNeedsEvent) + : handler(Queue, Queue, nullptr, IsHost, CallerNeedsEvent) {} + +handler::handler(std::shared_ptr Queue, + std::shared_ptr PrimaryQueue, + std::shared_ptr SecondaryQueue, + bool IsHost, bool CallerNeedsEvent) : MImpl(std::make_shared(std::move(PrimaryQueue), - std::move(SecondaryQueue))), + std::move(SecondaryQueue), + CallerNeedsEvent)), MQueue(std::move(Queue)), MIsHost(IsHost) {} handler::handler( @@ -297,8 +311,9 @@ event handler::finalize() { return Result; }; - bool DiscardEvent = false; - if (MQueue->supportsDiscardingPiEvents()) { + bool DiscardEvent = (MQueue->MDiscardEvents || !MImpl->MEventNeeded) && + MQueue->supportsDiscardingPiEvents(); + if (DiscardEvent) { // Kernel only uses assert if it's non interop one bool KernelUsesAssert = !(MKernel && MKernel->isInterop()) && @@ -311,6 +326,9 @@ event handler::finalize() { if (PI_SUCCESS != EnqueueKernel()) throw runtime_error("Enqueue process failed.", PI_ERROR_INVALID_OPERATION); + auto EventImpl = std::make_shared( + detail::event_impl::HES_Discarded); + MLastEvent = detail::createSyclObjFromImpl(EventImpl); } else { NewEvent = std::make_shared(MQueue); NewEvent->setWorkerQueue(MQueue); @@ -576,7 +594,7 @@ event handler::finalize() { } detail::EventImplPtr Event = detail::Scheduler::getInstance().addCG( - std::move(CommandGroup), std::move(MQueue)); + std::move(CommandGroup), std::move(MQueue), MImpl->MEventNeeded); MLastEvent = detail::createSyclObjFromImpl(Event); return MLastEvent; @@ -1781,5 +1799,7 @@ void handler::registerDynamicParameter( } MImpl->MDynamicParameters.emplace_back(ParamImpl.get(), ArgIndex); } + +bool handler::eventNeeded() const { return MImpl->MEventNeeded; } } // namespace _V1 } // namespace sycl diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index db3ce2f5cb1b3..9648431a5a429 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -106,39 +106,44 @@ void queue::throw_asynchronous() { impl->throw_asynchronous(); } event queue::memset(void *Ptr, int Value, size_t Count, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->memset(impl, Ptr, Value, Count, {}); + return impl->memset(impl, Ptr, Value, Count, {}, /*CallerNeedsEvent=*/true); } event queue::memset(void *Ptr, int Value, size_t Count, event DepEvent, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->memset(impl, Ptr, Value, Count, {DepEvent}); + return impl->memset(impl, Ptr, Value, Count, {DepEvent}, + /*CallerNeedsEvent=*/true); } event queue::memset(void *Ptr, int Value, size_t Count, const std::vector &DepEvents, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->memset(impl, Ptr, Value, Count, DepEvents); + return impl->memset(impl, Ptr, Value, Count, DepEvents, + /*CallerNeedsEvent=*/true); } event queue::memcpy(void *Dest, const void *Src, size_t Count, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->memcpy(impl, Dest, Src, Count, {}, CodeLoc); + return impl->memcpy(impl, Dest, Src, Count, {}, /*CallerNeedsEvent=*/true, + CodeLoc); } event queue::memcpy(void *Dest, const void *Src, size_t Count, event DepEvent, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->memcpy(impl, Dest, Src, Count, {DepEvent}, CodeLoc); + return impl->memcpy(impl, Dest, Src, Count, {DepEvent}, + /*CallerNeedsEvent=*/true, CodeLoc); } event queue::memcpy(void *Dest, const void *Src, size_t Count, const std::vector &DepEvents, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->memcpy(impl, Dest, Src, Count, DepEvents, CodeLoc); + return impl->memcpy(impl, Dest, Src, Count, DepEvents, + /*CallerNeedsEvent=*/true, CodeLoc); } event queue::mem_advise(const void *Ptr, size_t Length, pi_mem_advice Advice, @@ -150,20 +155,23 @@ event queue::mem_advise(const void *Ptr, size_t Length, pi_mem_advice Advice, event queue::mem_advise(const void *Ptr, size_t Length, int Advice, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->mem_advise(impl, Ptr, Length, pi_mem_advice(Advice), {}); + return impl->mem_advise(impl, Ptr, Length, pi_mem_advice(Advice), {}, + /*CallerNeedsEvent=*/true); } event queue::mem_advise(const void *Ptr, size_t Length, int Advice, event DepEvent, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->mem_advise(impl, Ptr, Length, pi_mem_advice(Advice), {DepEvent}); + return impl->mem_advise(impl, Ptr, Length, pi_mem_advice(Advice), {DepEvent}, + /*CallerNeedsEvent=*/true); } event queue::mem_advise(const void *Ptr, size_t Length, int Advice, const std::vector &DepEvents, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->mem_advise(impl, Ptr, Length, pi_mem_advice(Advice), DepEvents); + return impl->mem_advise(impl, Ptr, Length, pi_mem_advice(Advice), DepEvents, + /*CallerNeedsEvent=*/true); } event queue::discard_or_return(const event &Event) { @@ -184,6 +192,11 @@ event queue::submit_impl(std::function CGH, queue SecondQueue, return impl->submit(CGH, impl, SecondQueue.impl, CodeLoc); } +void queue::submit_without_event_impl(std::function CGH, + const detail::code_location &CodeLoc) { + return impl->submit_without_event(CGH, impl, CodeLoc); +} + event queue::submit_impl_and_postprocess( std::function CGH, const detail::code_location &CodeLoc, const SubmitPostProcessF &PostProcess) { @@ -225,8 +238,13 @@ getBarrierEventForInorderQueueHelper(const detail::QueueImplPtr QueueImpl) { /// \return a SYCL event object, which corresponds to the queue the command /// group is being enqueued on. event queue::ext_oneapi_submit_barrier(const detail::code_location &CodeLoc) { - if (is_in_order() && !impl->getCommandGraph() && !impl->MIsProfilingEnabled) - return getBarrierEventForInorderQueueHelper(impl); + if (is_in_order() && !impl->getCommandGraph() && !impl->MDiscardEvents && + !impl->MIsProfilingEnabled) { + event InOrderLastEvent = getBarrierEventForInorderQueueHelper(impl); + // If the last event was discarded, fall back to enqueuing a barrier. + if (!detail::getSyclObjImpl(InOrderLastEvent)->isDiscarded()) + return InOrderLastEvent; + } return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(); }, CodeLoc); } @@ -247,9 +265,13 @@ event queue::ext_oneapi_submit_barrier(const std::vector &WaitList, auto EventImpl = detail::getSyclObjImpl(Event); return !EventImpl->isContextInitialized() || EventImpl->isNOP(); }); - if (is_in_order() && !impl->getCommandGraph() && !impl->MIsProfilingEnabled && - AllEventsEmptyOrNop) - return getBarrierEventForInorderQueueHelper(impl); + if (is_in_order() && !impl->getCommandGraph() && !impl->MDiscardEvents && + !impl->MIsProfilingEnabled && AllEventsEmptyOrNop) { + event InOrderLastEvent = getBarrierEventForInorderQueueHelper(impl); + // If the last event was discarded, fall back to enqueuing a barrier. + if (!detail::getSyclObjImpl(InOrderLastEvent)->isDiscarded()) + return InOrderLastEvent; + } return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(WaitList); }, CodeLoc); @@ -321,7 +343,7 @@ event queue::memcpyToDeviceGlobal(void *DeviceGlobalPtr, const void *Src, const std::vector &DepEvents) { return impl->memcpyToDeviceGlobal(impl, DeviceGlobalPtr, Src, IsDeviceImageScope, NumBytes, Offset, - DepEvents); + DepEvents, /*CallerNeedsEvent=*/true); } event queue::memcpyFromDeviceGlobal(void *Dest, const void *DeviceGlobalPtr, @@ -330,7 +352,7 @@ event queue::memcpyFromDeviceGlobal(void *Dest, const void *DeviceGlobalPtr, const std::vector &DepEvents) { return impl->memcpyFromDeviceGlobal(impl, Dest, DeviceGlobalPtr, IsDeviceImageScope, NumBytes, Offset, - DepEvents); + DepEvents, /*CallerNeedsEvent=*/true); } bool queue::device_has(aspect Aspect) const { diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 6391a69978a56..2c97a01f87da7 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -2988,6 +2988,7 @@ _ZN4sycl3_V13ext5intel12experimental15online_compilerILNS3_15source_languageE0EE _ZN4sycl3_V13ext5intel12experimental15online_compilerILNS3_15source_languageE1EE7compileIJSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaISE_EEEEES8_IhSaIhEERKSE_DpRKT_ _ZN4sycl3_V13ext5intel12experimental9pipe_base13get_pipe_nameB5cxx11EPKv _ZN4sycl3_V13ext5intel12experimental9pipe_base17wait_non_blockingERKNS0_5eventE +_ZN4sycl3_V13ext6oneapi12experimental10mem_adviseENS0_5queueEPvmiRKNS0_6detail13code_locationE _ZN4sycl3_V13ext6oneapi10level_zero6detail11make_deviceERKNS0_8platformEm _ZN4sycl3_V13ext6oneapi12experimental12create_imageENS3_16image_mem_handleERKNS3_16image_descriptorERKNS0_5queueE _ZN4sycl3_V13ext6oneapi12experimental12create_imageENS3_16image_mem_handleERKNS3_16image_descriptorERKNS0_6deviceERKNS0_7contextE @@ -3088,6 +3089,8 @@ _ZN4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graphC1ERKNS0_5 _ZN4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graphC1ERKNS0_7contextERKNS0_6deviceERKNS0_13property_listE _ZN4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graphC2ERKNS0_5queueERKNS0_13property_listE _ZN4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graphC2ERKNS0_7contextERKNS0_6deviceERKNS0_13property_listE +_ZN4sycl3_V13ext6oneapi12experimental6memcpyENS0_5queueEPvPKvmRKNS0_6detail13code_locationE +_ZN4sycl3_V13ext6oneapi12experimental6memsetENS0_5queueEPvimRKNS0_6detail13code_locationE _ZN4sycl3_V13ext6oneapi12experimental9image_memC1ERKNS3_16image_descriptorERKNS0_5queueE _ZN4sycl3_V13ext6oneapi12experimental9image_memC1ERKNS3_16image_descriptorERKNS0_6deviceERKNS0_7contextE _ZN4sycl3_V13ext6oneapi12experimental9image_memC2ERKNS3_16image_descriptorERKNS0_5queueE @@ -3127,6 +3130,7 @@ _ZN4sycl3_V15queue20wait_and_throw_proxyERKNS0_6detail13code_locationE _ZN4sycl3_V15queue22memcpyFromDeviceGlobalEPvPKvbmmRKSt6vectorINS0_5eventESaIS6_EE _ZN4sycl3_V15queue25ext_oneapi_submit_barrierERKNS0_6detail13code_locationE _ZN4sycl3_V15queue25ext_oneapi_submit_barrierERKSt6vectorINS0_5eventESaIS3_EERKNS0_6detail13code_locationE +_ZN4sycl3_V15queue25submit_without_event_implESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationE _ZN4sycl3_V15queue27submit_impl_and_postprocessESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationERKS2_IFvbbRNS0_5eventEEE _ZN4sycl3_V15queue27submit_impl_and_postprocessESt8functionIFvRNS0_7handlerEEES1_RKNS0_6detail13code_locationERKS2_IFvbbRNS0_5eventEEE _ZN4sycl3_V15queue29ext_oneapi_set_external_eventERKNS0_5eventE @@ -3526,10 +3530,14 @@ _ZN4sycl3_V17handler8finalizeEv _ZN4sycl3_V17handler8prefetchEPKvm _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_3ext6oneapi12experimental6detail10graph_implEE _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_6detail10queue_implEES5_S5_b +_ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_6detail10queue_implEES5_S5_bb _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_6detail10queue_implEEb +_ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_6detail10queue_implEEbb _ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_3ext6oneapi12experimental6detail10graph_implEE _ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_6detail10queue_implEES5_S5_b +_ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_6detail10queue_implEES5_S5_bb _ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_6detail10queue_implEEb +_ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_6detail10queue_implEEbb _ZN4sycl3_V17samplerC1ENS0_29coordinate_normalization_modeENS0_15addressing_modeENS0_14filtering_modeERKNS0_13property_listE _ZN4sycl3_V17samplerC1EP11_cl_samplerRKNS0_7contextE _ZN4sycl3_V17samplerC2ENS0_29coordinate_normalization_modeENS0_15addressing_modeENS0_14filtering_modeERKNS0_13property_listE @@ -4108,6 +4116,7 @@ _ZNK4sycl3_V17context8get_infoINS0_4info7context32atomic_memory_scope_capabiliti _ZNK4sycl3_V17context8get_infoINS0_4info7context7devicesEEENS0_6detail20is_context_info_descIT_E11return_typeEv _ZNK4sycl3_V17context8get_infoINS0_4info7context8platformEEENS0_6detail20is_context_info_descIT_E11return_typeEv _ZNK4sycl3_V17context9getNativeEv +_ZNK4sycl3_V17handler11eventNeededEv _ZNK4sycl3_V17handler15getCommandGraphEv _ZNK4sycl3_V17handler17getContextImplPtrEv _ZNK4sycl3_V17handler27isStateExplicitKernelBundleEv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index bcfdab110778d..f27a5bbab639c 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -570,7 +570,9 @@ ??0half@host_half_impl@detail@_V1@sycl@@QEAA@G@Z ??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vgraph_impl@detail@experimental@oneapi@ext@_V1@sycl@@@std@@@Z ??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@00_N@Z +??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@00_N1@Z ??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_N@Z +??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_N1@Z ??0host_selector@_V1@sycl@@QEAA@$$QEAV012@@Z ??0host_selector@_V1@sycl@@QEAA@AEBV012@@Z ??0host_selector@_V1@sycl@@QEAA@XZ @@ -3968,6 +3970,7 @@ ?end_recording@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAXAEAVqueue@67@@Z ?end_recording@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAXAEBV?$vector@Vqueue@_V1@sycl@@V?$allocator@Vqueue@_V1@sycl@@@std@@@std@@@Z ?end_recording@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAXXZ +?eventNeeded@handler@_V1@sycl@@AEBA_NXZ ?ext_codeplay_supports_fusion@queue@_V1@sycl@@QEBA_NXZ ?ext_intel_read_host_pipe@handler@_V1@sycl@@AEAAXAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAX_K_N@Z ?ext_intel_read_host_pipe@handler@_V1@sycl@@AEAAXVstring_view@detail@23@PEAX_K_N@Z @@ -4329,11 +4332,13 @@ ?map_external_memory_array@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@Uinterop_mem_handle@12345@AEBUimage_descriptor@12345@AEBVdevice@45@AEBVcontext@45@@Z ?map_external_memory_array@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@Uinterop_mem_handle@12345@AEBUimage_descriptor@12345@AEBVqueue@45@@Z ?markBufferAsInternal@detail@_V1@sycl@@YAXAEBV?$shared_ptr@Vbuffer_impl@detail@_V1@sycl@@@std@@@Z +?mem_advise@experimental@oneapi@ext@_V1@sycl@@YAXVqueue@45@PEAX_KHAEBUcode_location@detail@45@@Z ?mem_advise@handler@_V1@sycl@@QEAAXPEBX_KH@Z ?mem_advise@queue@_V1@sycl@@QEAA?AVevent@23@PEBX_KHAEBUcode_location@detail@23@@Z ?mem_advise@queue@_V1@sycl@@QEAA?AVevent@23@PEBX_KHAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z ?mem_advise@queue@_V1@sycl@@QEAA?AVevent@23@PEBX_KHV423@AEBUcode_location@detail@23@@Z ?mem_advise@queue@_V1@sycl@@QEAA?AVevent@23@PEBX_KW4_pi_mem_advice@@AEBUcode_location@detail@23@@Z +?memcpy@experimental@oneapi@ext@_V1@sycl@@YAXVqueue@45@PEAXPEBX_KAEBUcode_location@detail@45@@Z ?memcpy@handler@_V1@sycl@@QEAAXPEAXPEBX_K@Z ?memcpy@queue@_V1@sycl@@QEAA?AVevent@23@PEAXPEBX_KAEBUcode_location@detail@23@@Z ?memcpy@queue@_V1@sycl@@QEAA?AVevent@23@PEAXPEBX_KAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z @@ -4344,6 +4349,7 @@ ?memcpyToDeviceGlobal@handler@_V1@sycl@@AEAAXPEBX0_N_K2@Z ?memcpyToDeviceGlobal@queue@_V1@sycl@@AEAA?AVevent@23@PEAXPEBX_N_K3AEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@@Z ?memcpyToHostOnlyDeviceGlobal@handler@_V1@sycl@@AEAAXPEBX0_K_N11@Z +?memset@experimental@oneapi@ext@_V1@sycl@@YAXVqueue@45@PEAXH_KAEBUcode_location@detail@45@@Z ?memset@handler@_V1@sycl@@QEAAXPEAXH_K@Z ?memset@queue@_V1@sycl@@QEAA?AVevent@23@PEAXH_KAEBUcode_location@detail@23@@Z ?memset@queue@_V1@sycl@@QEAA?AVevent@23@PEAXH_KAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z @@ -4429,6 +4435,7 @@ ?submit_impl@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@V123@AEBUcode_location@detail@23@@Z ?submit_impl_and_postprocess@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBUcode_location@detail@23@AEBV?$function@$$A6AX_N0AEAVevent@_V1@sycl@@@Z@6@@Z ?submit_impl_and_postprocess@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@V123@AEBUcode_location@detail@23@AEBV?$function@$$A6AX_N0AEAVevent@_V1@sycl@@@Z@6@@Z +?submit_without_event_impl@queue@_V1@sycl@@AEAAXV?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBUcode_location@detail@23@@Z ?supportsUSMFill2D@handler@_V1@sycl@@AEAA_NXZ ?supportsUSMMemcpy2D@handler@_V1@sycl@@AEAA_NXZ ?supportsUSMMemset2D@handler@_V1@sycl@@AEAA_NXZ diff --git a/sycl/unittests/Extensions/CMakeLists.txt b/sycl/unittests/Extensions/CMakeLists.txt index 491fa49225a81..ee39a80625d85 100644 --- a/sycl/unittests/Extensions/CMakeLists.txt +++ b/sycl/unittests/Extensions/CMakeLists.txt @@ -10,6 +10,8 @@ add_sycl_unittest(ExtensionsTests OBJECT USMP2P.cpp CompositeDevice.cpp OneAPIProd.cpp + EnqueueFunctionsEvents.cpp + DiscardEvent.cpp ) add_subdirectory(CommandGraph) diff --git a/sycl/unittests/Extensions/DiscardEvent.cpp b/sycl/unittests/Extensions/DiscardEvent.cpp new file mode 100644 index 0000000000000..dc729c74084e0 --- /dev/null +++ b/sycl/unittests/Extensions/DiscardEvent.cpp @@ -0,0 +1,83 @@ +//==------------------------- DiscardEvent.cpp -----------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +#include + +#include +#include +#include + +using namespace sycl; + +namespace oneapiext = ext::oneapi::experimental; + +namespace { + +thread_local size_t counter_piEnqueueKernelLaunch = 0; +inline pi_result redefined_piEnqueueKernelLaunch(pi_queue, pi_kernel, pi_uint32, + const size_t *, const size_t *, + const size_t *, pi_uint32, + const pi_event *, + pi_event *event) { + ++counter_piEnqueueKernelLaunch; + EXPECT_EQ(event, nullptr); + return PI_SUCCESS; +} + +thread_local size_t counter_piEnqueueEventsWaitWithBarrier = 0; +thread_local std::chrono::time_point + timestamp_piEnqueueEventsWaitWithBarrier; +inline pi_result after_piEnqueueEventsWaitWithBarrier(pi_queue, pi_uint32, + const pi_event *, + pi_event *) { + ++counter_piEnqueueEventsWaitWithBarrier; + timestamp_piEnqueueEventsWaitWithBarrier = std::chrono::steady_clock::now(); + return PI_SUCCESS; +} + +class DiscardEventTests : public ::testing::Test { +public: + DiscardEventTests() + : Mock{}, Q{context(Mock.getPlatform()), default_selector_v, + property::queue::in_order{}} {} + +protected: + void SetUp() override { + counter_piEnqueueKernelLaunch = 0; + counter_piEnqueueEventsWaitWithBarrier = 0; + } + + unittest::PiMock Mock; + queue Q; +}; + +TEST_F(DiscardEventTests, BarrierBeforeHostTask) { + // Special test for case where host_task need an event after, so a barrier is + // enqueued to create a usable event. + Mock.redefine( + redefined_piEnqueueKernelLaunch); + Mock.redefineAfter( + after_piEnqueueEventsWaitWithBarrier); + + oneapiext::single_task>(Q, []() {}); + + std::chrono::time_point HostTaskTimestamp; + Q.submit([&](handler &CGH) { + CGH.host_task( + [&]() { HostTaskTimestamp = std::chrono::steady_clock::now(); }); + }).wait(); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); + ASSERT_EQ(counter_piEnqueueEventsWaitWithBarrier, size_t{1}); + ASSERT_TRUE(HostTaskTimestamp > timestamp_piEnqueueEventsWaitWithBarrier); +} + +} // namespace diff --git a/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp b/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp new file mode 100644 index 0000000000000..842e3cf271216 --- /dev/null +++ b/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp @@ -0,0 +1,474 @@ +//==-------------------- EnqueueFunctionsEvents.cpp ------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Tests the behavior of enqueue free functions when events can be discarded. + +#include +#include + +#include + +#include +#include +#include +#include + +using namespace sycl; + +namespace oneapiext = ext::oneapi::experimental; + +namespace { + +inline pi_result after_piKernelGetInfo(pi_kernel kernel, + pi_kernel_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret) { + constexpr char MockKernel[] = "TestKernel"; + if (param_name == PI_KERNEL_INFO_FUNCTION_NAME) { + if (param_value) { + assert(param_value_size == sizeof(MockKernel)); + std::memcpy(param_value, MockKernel, sizeof(MockKernel)); + } + if (param_value_size_ret) + *param_value_size_ret = sizeof(MockKernel); + } + return PI_SUCCESS; +} + +thread_local size_t counter_piEnqueueKernelLaunch = 0; +inline pi_result redefined_piEnqueueKernelLaunch(pi_queue, pi_kernel, pi_uint32, + const size_t *, const size_t *, + const size_t *, pi_uint32, + const pi_event *, + pi_event *event) { + ++counter_piEnqueueKernelLaunch; + EXPECT_EQ(event, nullptr); + return PI_SUCCESS; +} + +thread_local size_t counter_piextUSMEnqueueMemcpy = 0; +inline pi_result redefined_piextUSMEnqueueMemcpy(pi_queue, pi_bool, void *, + const void *, size_t, + pi_uint32, const pi_event *, + pi_event *event) { + ++counter_piextUSMEnqueueMemcpy; + EXPECT_EQ(event, nullptr); + return PI_SUCCESS; +} + +thread_local size_t counter_piextUSMEnqueueMemset = 0; +inline pi_result redefined_piextUSMEnqueueMemset(pi_queue, void *, pi_int32, + size_t, pi_uint32, + const pi_event *, + pi_event *event) { + ++counter_piextUSMEnqueueMemset; + EXPECT_EQ(event, nullptr); + return PI_SUCCESS; +} + +thread_local size_t counter_piextUSMEnqueuePrefetch = 0; +inline pi_result redefined_piextUSMEnqueuePrefetch(pi_queue, const void *, + size_t, + pi_usm_migration_flags, + pi_uint32, const pi_event *, + pi_event *event) { + ++counter_piextUSMEnqueuePrefetch; + EXPECT_EQ(event, nullptr); + return PI_SUCCESS; +} + +thread_local size_t counter_piextUSMEnqueueMemAdvise = 0; +inline pi_result redefined_piextUSMEnqueueMemAdvise(pi_queue, const void *, + size_t, pi_mem_advice, + pi_event *event) { + ++counter_piextUSMEnqueueMemAdvise; + EXPECT_EQ(event, nullptr); + return PI_SUCCESS; +} + +thread_local size_t counter_piEnqueueEventsWaitWithBarrier = 0; +thread_local std::chrono::time_point + timestamp_piEnqueueEventsWaitWithBarrier; +inline pi_result after_piEnqueueEventsWaitWithBarrier(pi_queue, pi_uint32, + const pi_event *, + pi_event *) { + ++counter_piEnqueueEventsWaitWithBarrier; + timestamp_piEnqueueEventsWaitWithBarrier = std::chrono::steady_clock::now(); + return PI_SUCCESS; +} + +class EnqueueFunctionsEventsTests : public ::testing::Test { +public: + EnqueueFunctionsEventsTests() + : Mock{}, Q{context(Mock.getPlatform()), default_selector_v, + property::queue::in_order{}} {} + +protected: + void SetUp() override { + counter_piEnqueueKernelLaunch = 0; + counter_piextUSMEnqueueMemcpy = 0; + counter_piextUSMEnqueueMemset = 0; + counter_piextUSMEnqueuePrefetch = 0; + counter_piextUSMEnqueueMemAdvise = 0; + counter_piEnqueueEventsWaitWithBarrier = 0; + } + + unittest::PiMock Mock; + queue Q; +}; + +TEST_F(EnqueueFunctionsEventsTests, SubmitSingleTaskNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + + oneapiext::submit(Q, [&](handler &CGH) { + oneapiext::single_task>(CGH, []() {}); + }); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); +} + +TEST_F(EnqueueFunctionsEventsTests, SingleTaskShortcutNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + + oneapiext::single_task>(Q, []() {}); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); +} + +TEST_F(EnqueueFunctionsEventsTests, SubmitSingleTaskKernelNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + Mock.redefineAfter(after_piKernelGetInfo); + + auto KID = get_kernel_id>(); + auto KB = get_kernel_bundle( + Q.get_context(), std::vector{KID}); + + ASSERT_TRUE(KB.has_kernel(KID)); + + auto Kernel = KB.get_kernel(KID); + oneapiext::submit(Q, + [&](handler &CGH) { oneapiext::single_task(CGH, Kernel); }); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); +} + +TEST_F(EnqueueFunctionsEventsTests, SingleTaskShortcutKernelNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + Mock.redefineAfter(after_piKernelGetInfo); + + auto KID = get_kernel_id>(); + auto KB = get_kernel_bundle( + Q.get_context(), std::vector{KID}); + + ASSERT_TRUE(KB.has_kernel(KID)); + + auto Kernel = KB.get_kernel(KID); + + oneapiext::single_task(Q, Kernel); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); +} + +TEST_F(EnqueueFunctionsEventsTests, SubmitRangeParallelForNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + + oneapiext::submit(Q, [&](handler &CGH) { + oneapiext::parallel_for>(CGH, range<1>{32}, [](item<1>) {}); + }); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); +} + +TEST_F(EnqueueFunctionsEventsTests, RangeParallelForShortcutNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + + oneapiext::parallel_for>(Q, range<1>{32}, [](item<1>) {}); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); +} + +TEST_F(EnqueueFunctionsEventsTests, SubmitRangeParallelForKernelNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + Mock.redefineAfter(after_piKernelGetInfo); + + auto KID = get_kernel_id>(); + auto KB = get_kernel_bundle( + Q.get_context(), std::vector{KID}); + + ASSERT_TRUE(KB.has_kernel(KID)); + + auto Kernel = KB.get_kernel(KID); + oneapiext::submit(Q, [&](handler &CGH) { + oneapiext::parallel_for(CGH, range<1>{32}, Kernel); + }); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); +} + +TEST_F(EnqueueFunctionsEventsTests, RangeParallelForShortcutKernelNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + Mock.redefineAfter(after_piKernelGetInfo); + + auto KID = get_kernel_id>(); + auto KB = get_kernel_bundle( + Q.get_context(), std::vector{KID}); + + ASSERT_TRUE(KB.has_kernel(KID)); + + auto Kernel = KB.get_kernel(KID); + + oneapiext::parallel_for(Q, range<1>{32}, Kernel); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); +} + +TEST_F(EnqueueFunctionsEventsTests, SubmitNDLaunchNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + + oneapiext::submit(Q, [&](handler &CGH) { + oneapiext::nd_launch>( + CGH, nd_range<1>{range<1>{32}, range<1>{32}}, [](nd_item<1>) {}); + }); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); +} + +TEST_F(EnqueueFunctionsEventsTests, NDLaunchShortcutNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + + oneapiext::nd_launch>(Q, nd_range<1>{range<1>{32}, range<1>{32}}, + [](nd_item<1>) {}); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); +} + +TEST_F(EnqueueFunctionsEventsTests, SubmitNDLaunchKernelNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + Mock.redefineAfter(after_piKernelGetInfo); + + auto KID = get_kernel_id>(); + auto KB = get_kernel_bundle( + Q.get_context(), std::vector{KID}); + + ASSERT_TRUE(KB.has_kernel(KID)); + + auto Kernel = KB.get_kernel(KID); + oneapiext::submit(Q, [&](handler &CGH) { + oneapiext::nd_launch(CGH, nd_range<1>{range<1>{32}, range<1>{32}}, Kernel); + }); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); +} + +TEST_F(EnqueueFunctionsEventsTests, NDLaunchShortcutKernelNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + Mock.redefineAfter(after_piKernelGetInfo); + + auto KID = get_kernel_id>(); + auto KB = get_kernel_bundle( + Q.get_context(), std::vector{KID}); + + ASSERT_TRUE(KB.has_kernel(KID)); + + auto Kernel = KB.get_kernel(KID); + + oneapiext::nd_launch(Q, nd_range<1>{range<1>{32}, range<1>{32}}, Kernel); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); +} + +TEST_F(EnqueueFunctionsEventsTests, SubmitMemcpyNoEvent) { + Mock.redefine( + redefined_piextUSMEnqueueMemcpy); + + constexpr size_t N = 1024; + int *Src = malloc_shared(N, Q); + int *Dst = malloc_shared(N, Q); + + oneapiext::submit(Q, [&](handler &CGH) { + oneapiext::memcpy(CGH, Src, Dst, sizeof(int) * N); + }); + + ASSERT_EQ(counter_piextUSMEnqueueMemcpy, size_t{1}); + + free(Src, Q); + free(Dst, Q); +} + +TEST_F(EnqueueFunctionsEventsTests, MemcpyShortcutNoEvent) { + Mock.redefine( + redefined_piextUSMEnqueueMemcpy); + + constexpr size_t N = 1024; + int *Src = malloc_shared(N, Q); + int *Dst = malloc_shared(N, Q); + + oneapiext::memcpy(Q, Src, Dst, sizeof(int) * N); + + ASSERT_EQ(counter_piextUSMEnqueueMemcpy, size_t{1}); + + free(Src, Q); + free(Dst, Q); +} + +TEST_F(EnqueueFunctionsEventsTests, SubmitCopyNoEvent) { + Mock.redefine( + redefined_piextUSMEnqueueMemcpy); + + constexpr size_t N = 1024; + int *Src = malloc_shared(N, Q); + int *Dst = malloc_shared(N, Q); + + oneapiext::submit(Q, + [&](handler &CGH) { oneapiext::copy(CGH, Dst, Src, N); }); + + ASSERT_EQ(counter_piextUSMEnqueueMemcpy, size_t{1}); + + free(Src, Q); + free(Dst, Q); +} + +TEST_F(EnqueueFunctionsEventsTests, CopyShortcutNoEvent) { + Mock.redefine( + redefined_piextUSMEnqueueMemcpy); + + constexpr size_t N = 1024; + int *Src = malloc_shared(N, Q); + int *Dst = malloc_shared(N, Q); + + oneapiext::memcpy(Q, Dst, Src, N); + + ASSERT_EQ(counter_piextUSMEnqueueMemcpy, size_t{1}); + + free(Src, Q); + free(Dst, Q); +} + +TEST_F(EnqueueFunctionsEventsTests, SubmitMemsetNoEvent) { + Mock.redefine( + redefined_piextUSMEnqueueMemset); + + constexpr size_t N = 1024; + int *Dst = malloc_shared(N, Q); + + oneapiext::submit(Q, [&](handler &CGH) { + oneapiext::memset(CGH, Dst, int{1}, sizeof(int) * N); + }); + + ASSERT_EQ(counter_piextUSMEnqueueMemset, size_t{1}); + + free(Dst, Q); +} + +TEST_F(EnqueueFunctionsEventsTests, MemsetShortcutNoEvent) { + Mock.redefine( + redefined_piextUSMEnqueueMemset); + + constexpr size_t N = 1024; + int *Dst = malloc_shared(N, Q); + + oneapiext::memset(Q, Dst, 1, sizeof(int) * N); + + ASSERT_EQ(counter_piextUSMEnqueueMemset, size_t{1}); + + free(Dst, Q); +} + +TEST_F(EnqueueFunctionsEventsTests, SubmitPrefetchNoEvent) { + Mock.redefine( + redefined_piextUSMEnqueuePrefetch); + + constexpr size_t N = 1024; + int *Dst = malloc_shared(N, Q); + + oneapiext::submit( + Q, [&](handler &CGH) { oneapiext::prefetch(CGH, Dst, sizeof(int) * N); }); + + ASSERT_EQ(counter_piextUSMEnqueuePrefetch, size_t{1}); + + free(Dst, Q); +} + +TEST_F(EnqueueFunctionsEventsTests, PrefetchShortcutNoEvent) { + Mock.redefine( + redefined_piextUSMEnqueuePrefetch); + + constexpr size_t N = 1024; + int *Dst = malloc_shared(N, Q); + + oneapiext::prefetch(Q, Dst, sizeof(int) * N); + + ASSERT_EQ(counter_piextUSMEnqueuePrefetch, size_t{1}); + + free(Dst, Q); +} + +TEST_F(EnqueueFunctionsEventsTests, SubmitMemAdviseNoEvent) { + Mock.redefine( + redefined_piextUSMEnqueueMemAdvise); + + constexpr size_t N = 1024; + int *Dst = malloc_shared(N, Q); + + oneapiext::submit(Q, [&](handler &CGH) { + oneapiext::mem_advise(CGH, Dst, sizeof(int) * N, 1); + }); + + ASSERT_EQ(counter_piextUSMEnqueueMemAdvise, size_t{1}); + + free(Dst, Q); +} + +TEST_F(EnqueueFunctionsEventsTests, MemAdviseShortcutNoEvent) { + Mock.redefine( + redefined_piextUSMEnqueueMemAdvise); + + constexpr size_t N = 1024; + int *Dst = malloc_shared(N, Q); + + oneapiext::mem_advise(Q, Dst, sizeof(int) * N, 1); + + ASSERT_EQ(counter_piextUSMEnqueueMemAdvise, size_t{1}); + + free(Dst, Q); +} + +TEST_F(EnqueueFunctionsEventsTests, BarrierBeforeHostTask) { + // Special test for case where host_task need an event after, so a barrier is + // enqueued to create a usable event. + Mock.redefine( + redefined_piEnqueueKernelLaunch); + Mock.redefineAfter( + after_piEnqueueEventsWaitWithBarrier); + + oneapiext::single_task>(Q, []() {}); + + std::chrono::time_point HostTaskTimestamp; + Q.submit([&](handler &CGH) { + CGH.host_task( + [&]() { HostTaskTimestamp = std::chrono::steady_clock::now(); }); + }).wait(); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); + ASSERT_EQ(counter_piEnqueueEventsWaitWithBarrier, size_t{1}); + ASSERT_TRUE(HostTaskTimestamp > timestamp_piEnqueueEventsWaitWithBarrier); +} + +} // namespace diff --git a/sycl/unittests/helpers/PiMockPlugin.hpp b/sycl/unittests/helpers/PiMockPlugin.hpp index 56803e7eab5bb..ca29b9bd6aa1e 100644 --- a/sycl/unittests/helpers/PiMockPlugin.hpp +++ b/sycl/unittests/helpers/PiMockPlugin.hpp @@ -1023,7 +1023,8 @@ inline pi_result mock_piEnqueueKernelLaunch( const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1032,7 +1033,8 @@ inline pi_result mock_piextEnqueueCooperativeKernelLaunch( const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1040,14 +1042,16 @@ inline pi_result mock_piEnqueueEventsWait(pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } inline pi_result mock_piEnqueueEventsWaitWithBarrier( pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1056,7 +1060,8 @@ mock_piEnqueueMemBufferRead(pi_queue queue, pi_mem buffer, pi_bool blocking_read, size_t offset, size_t size, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1067,7 +1072,8 @@ inline pi_result mock_piEnqueueMemBufferReadRect( size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1076,7 +1082,8 @@ mock_piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, size_t offset, size_t size, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1087,7 +1094,8 @@ inline pi_result mock_piEnqueueMemBufferWriteRect( size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1097,7 +1105,8 @@ mock_piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer, size_t dst_offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1108,7 +1117,8 @@ inline pi_result mock_piEnqueueMemBufferCopyRect( size_t dst_row_pitch, size_t dst_slice_pitch, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1119,7 +1129,8 @@ inline pi_result mock_piEnqueueMemBufferFill(pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1128,7 +1139,8 @@ inline pi_result mock_piEnqueueMemImageRead( pi_image_offset origin, pi_image_region region, size_t row_pitch, size_t slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1139,7 +1151,8 @@ mock_piEnqueueMemImageWrite(pi_queue command_queue, pi_mem image, size_t input_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1149,7 +1162,8 @@ mock_piEnqueueMemImageCopy(pi_queue command_queue, pi_mem src_image, pi_image_offset dst_origin, pi_image_region region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1159,7 +1173,8 @@ mock_piEnqueueMemImageFill(pi_queue command_queue, pi_mem image, const size_t *region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1170,7 +1185,8 @@ inline pi_result mock_piEnqueueMemBufferMap(pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event, void **ret_map) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); auto parentDummyHandle = reinterpret_cast(buffer); *ret_map = (void *)(parentDummyHandle->MData); @@ -1182,7 +1198,8 @@ inline pi_result mock_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1247,7 +1264,8 @@ inline pi_result mock_piextUSMEnqueueMemset(pi_queue queue, void *ptr, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1257,7 +1275,8 @@ inline pi_result mock_piextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1267,7 +1286,8 @@ inline pi_result mock_piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1275,7 +1295,8 @@ inline pi_result mock_piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr, size_t length, pi_mem_advice advice, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1318,7 +1339,8 @@ inline pi_result mock_piextEnqueueDeviceGlobalVariableWrite( pi_bool blocking_write, size_t count, size_t offset, const void *src, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1326,7 +1348,8 @@ inline pi_result mock_piextEnqueueDeviceGlobalVariableRead( pi_queue queue, pi_program program, const char *name, pi_bool blocking_read, size_t count, size_t offset, void *dst, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1526,7 +1549,8 @@ inline pi_result mock_piextEnqueueReadHostPipe( pi_queue queue, pi_program program, const char *pipe_symbol, pi_bool blocking, void *ptr, size_t size, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1534,7 +1558,8 @@ inline pi_result mock_piextEnqueueWriteHostPipe( pi_queue queue, pi_program program, const char *pipe_symbol, pi_bool blocking, void *ptr, size_t size, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } diff --git a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp index 35e353780d450..b4d4e7cdb7535 100644 --- a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp +++ b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp @@ -111,7 +111,7 @@ class MockHandler : public sycl::handler { public: MockHandler(std::shared_ptr Queue) - : sycl::handler(Queue, /* IsHost */ false) {} + : sycl::handler(Queue, /* IsHost */ false, /*CallerNeedsEvent*/ true) {} std::unique_ptr finalize() { auto CGH = static_cast(this); diff --git a/sycl/unittests/scheduler/AccessorDefaultCtor.cpp b/sycl/unittests/scheduler/AccessorDefaultCtor.cpp index 473ddcefe75fa..bf200ec660fb9 100644 --- a/sycl/unittests/scheduler/AccessorDefaultCtor.cpp +++ b/sycl/unittests/scheduler/AccessorDefaultCtor.cpp @@ -21,7 +21,8 @@ TEST_F(SchedulerTest, AccDefaultCtorDoesntAffectDepGraph) { std::vector ToEnqueue; - MockHandlerCustomFinalize MockCGH(QueueDevImpl, false); + MockHandlerCustomFinalize MockCGH(QueueDevImpl, false, + /*CallerNeedsEvent=*/true); sycl::accessor B; @@ -33,8 +34,8 @@ TEST_F(SchedulerTest, AccDefaultCtorDoesntAffectDepGraph) { std::unique_ptr CmdGroup = MockCGH.finalize(); - detail::Command *NewCmd = - MS.addCG(std::move(CmdGroup), QueueDevImpl, ToEnqueue); + detail::Command *NewCmd = MS.addCG(std::move(CmdGroup), QueueDevImpl, + ToEnqueue, /*EventNeeded=*/true); // if MDeps is empty, accessor built from default ctor does not affect // dependency graph in accordance with SYCL 2020 diff --git a/sycl/unittests/scheduler/Commands.cpp b/sycl/unittests/scheduler/Commands.cpp index a995800643421..bd0df10d1309a 100644 --- a/sycl/unittests/scheduler/Commands.cpp +++ b/sycl/unittests/scheduler/Commands.cpp @@ -81,6 +81,7 @@ TEST_F(SchedulerTest, WaitEmptyEventWithBarrier) { std::unique_ptr CommandGroup(new detail::CGBarrier( std::move(Arg), detail::CG::StorageInitHelper({}, {}, {}, {}, {}), detail::CG::CGTYPE::BarrierWaitlist, {})); - MS.Scheduler::addCG(std::move(CommandGroup), QueueImpl); + MS.Scheduler::addCG(std::move(CommandGroup), QueueImpl, + /*EventNeeded=*/true); } } diff --git a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp index 414f58c6f177c..2e54057e434d6 100644 --- a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp +++ b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp @@ -60,7 +60,8 @@ class DependsOnTests : public ::testing::Test { std::vector ToEnqueue; // Emulating processing of command group function - MockHandlerCustomFinalize MockCGH(QueueDevImpl, false); + MockHandlerCustomFinalize MockCGH(QueueDevImpl, false, + /*CallerNeedsEvent=*/true); for (auto EventImpl : Events) MockCGH.depends_on(detail::createSyclObjFromImpl(EventImpl)); @@ -84,7 +85,7 @@ class DependsOnTests : public ::testing::Test { detail::Command *NewCmd = MS.addCG( std::move(CmdGroup), Type == TestCGType::HOST_TASK ? MS.getDefaultHostQueue() : QueueDevImpl, - ToEnqueue); + ToEnqueue, /*EventNeeded=*/true); EXPECT_EQ(ToEnqueue.size(), 0u); return NewCmd; } diff --git a/sycl/unittests/scheduler/GraphCleanup.cpp b/sycl/unittests/scheduler/GraphCleanup.cpp index 9bf4e37eea0db..8206728b2b221 100644 --- a/sycl/unittests/scheduler/GraphCleanup.cpp +++ b/sycl/unittests/scheduler/GraphCleanup.cpp @@ -106,7 +106,8 @@ static void checkCleanupOnEnqueue(MockScheduler &MS, /*SharedPtrStorage*/ {}, /*Requirements*/ {&MockReq}, /*Events*/ {}))}; - detail::EventImplPtr Event = MS.addCG(std::move(CG), QueueImpl); + detail::EventImplPtr Event = + MS.addCG(std::move(CG), QueueImpl, /*EventNeeded=*/true); auto *Cmd = static_cast(Event->getCommand()); verifyCleanup(Record, AllocaCmd, MockCmd, CommandDeleted); @@ -330,7 +331,8 @@ TEST_F(SchedulerTest, StreamBufferDeallocation) { AttachSchedulerWrapper AttachScheduler{MSPtr}; detail::EventImplPtr EventImplPtr; { - MockHandlerCustomFinalize MockCGH(QueueImplPtr, false); + MockHandlerCustomFinalize MockCGH(QueueImplPtr, false, + /*CallerNeedsEvent=*/true); kernel_bundle KernelBundle = sycl::get_kernel_bundle( QueueImplPtr->get_context()); @@ -341,7 +343,8 @@ TEST_F(SchedulerTest, StreamBufferDeallocation) { MockCGH.single_task>([] {}); std::unique_ptr CG = MockCGH.finalize(); - EventImplPtr = MSPtr->addCG(std::move(CG), QueueImplPtr); + EventImplPtr = + MSPtr->addCG(std::move(CG), QueueImplPtr, /*EventNeeded=*/true); } // The buffers should have been released with graph cleanup once the work is @@ -391,7 +394,8 @@ TEST_F(SchedulerTest, AuxiliaryResourcesDeallocation) { detail::EventImplPtr EventImplPtr; bool MockAuxResourceDeleted = false; { - MockHandlerCustomFinalize MockCGH(QueueImplPtr, false); + MockHandlerCustomFinalize MockCGH(QueueImplPtr, false, + /*CallerNeedsEvent=*/true); kernel_bundle KernelBundle = sycl::get_kernel_bundle( QueueImplPtr->get_context()); @@ -410,7 +414,8 @@ TEST_F(SchedulerTest, AuxiliaryResourcesDeallocation) { MockCGH.single_task>([] {}); std::unique_ptr CG = MockCGH.finalize(); - EventImplPtr = MSPtr->addCG(std::move(CG), QueueImplPtr); + EventImplPtr = + MSPtr->addCG(std::move(CG), QueueImplPtr, /*EventNeeded=*/true); } EventCompleted = false; diff --git a/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp b/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp index ab37a667fc90a..3f97ffb003adc 100644 --- a/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp +++ b/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp @@ -19,10 +19,21 @@ using namespace sycl; using ::testing::An; +class MockQueueImpl : public sycl::detail::queue_impl { +public: + MockQueueImpl(const sycl::detail::DeviceImplPtr &Device, + const sycl::async_handler &AsyncHandler, + const sycl::property_list &PropList) + : sycl::detail::queue_impl(Device, AsyncHandler, PropList) {} + using sycl::detail::queue_impl::finalizeHandler; +}; + // Define type with the only methods called by finalizeHandler class LimitedHandler { public: - LimitedHandler(sycl::detail::CG::CGTYPE CGType) : MCGType(CGType) {} + LimitedHandler(sycl::detail::CG::CGTYPE CGType, + std::shared_ptr Queue) + : MCGType(CGType), MQueue(Queue) {} virtual ~LimitedHandler() {} virtual void depends_on(const sycl::detail::EventImplPtr &) {} @@ -38,14 +49,16 @@ class LimitedHandler { sycl::detail::CG::CGTYPE getType() { return MCGType; } sycl::detail::CG::CGTYPE MCGType; + std::shared_ptr MQueue; }; // Needed to use EXPECT_CALL to verify depends_on that originally appends lst // event as dependency to the new CG class LimitedHandlerSimulation : public LimitedHandler { public: - LimitedHandlerSimulation(sycl::detail::CG::CGTYPE CGType) - : LimitedHandler(CGType) {} + LimitedHandlerSimulation(sycl::detail::CG::CGTYPE CGType, + std::shared_ptr Queue) + : LimitedHandler(CGType, Queue) {} MOCK_METHOD1(depends_on, void(const sycl::detail::EventImplPtr &)); MOCK_METHOD1(depends_on, void(event Event)); @@ -53,15 +66,6 @@ class LimitedHandlerSimulation : public LimitedHandler { void(const std::vector &Events)); }; -class MockQueueImpl : public sycl::detail::queue_impl { -public: - MockQueueImpl(const sycl::detail::DeviceImplPtr &Device, - const sycl::async_handler &AsyncHandler, - const sycl::property_list &PropList) - : sycl::detail::queue_impl(Device, AsyncHandler, PropList) {} - using sycl::detail::queue_impl::finalizeHandler; -}; - // Only check events dependency in queue_impl::finalizeHandler TEST_F(SchedulerTest, InOrderQueueSyncCheck) { sycl::unittest::PiMock Mock; @@ -76,13 +80,15 @@ TEST_F(SchedulerTest, InOrderQueueSyncCheck) { // previous task, this is needed to properly sync blocking & blocked tasks. sycl::event Event; { - LimitedHandlerSimulation MockCGH{detail::CG::CGTYPE::CodeplayHostTask}; + LimitedHandlerSimulation MockCGH{detail::CG::CGTYPE::CodeplayHostTask, + Queue}; EXPECT_CALL(MockCGH, depends_on(An())) .Times(0); Queue->finalizeHandler(MockCGH, Event); } { - LimitedHandlerSimulation MockCGH{detail::CG::CGTYPE::CodeplayHostTask}; + LimitedHandlerSimulation MockCGH{detail::CG::CGTYPE::CodeplayHostTask, + Queue}; EXPECT_CALL(MockCGH, depends_on(An())) .Times(1); Queue->finalizeHandler(MockCGH, Event); diff --git a/sycl/unittests/scheduler/KernelFusion.cpp b/sycl/unittests/scheduler/KernelFusion.cpp index 8b45c03e37f1f..1db16cbda1493 100644 --- a/sycl/unittests/scheduler/KernelFusion.cpp +++ b/sycl/unittests/scheduler/KernelFusion.cpp @@ -22,7 +22,8 @@ template detail::Command *CreateTaskCommand(MockScheduler &MS, detail::QueueImplPtr DevQueue, buffer &buf) { - MockHandlerCustomFinalize MockCGH(DevQueue, false); + MockHandlerCustomFinalize MockCGH(DevQueue, false, + /*CallerNeedsEvent=*/true); auto acc = buf.get_access(static_cast(MockCGH)); @@ -36,7 +37,8 @@ detail::Command *CreateTaskCommand(MockScheduler &MS, auto CmdGrp = MockCGH.finalize(); std::vector ToEnqueue; - detail::Command *NewCmd = MS.addCG(std::move(CmdGrp), DevQueue, ToEnqueue); + detail::Command *NewCmd = + MS.addCG(std::move(CmdGrp), DevQueue, ToEnqueue, /*EventNeeded=*/true); EXPECT_EQ(ToEnqueue.size(), 0u); return NewCmd; } diff --git a/sycl/unittests/scheduler/QueueFlushing.cpp b/sycl/unittests/scheduler/QueueFlushing.cpp index c97428b9d55c6..c27e4d672e0fa 100644 --- a/sycl/unittests/scheduler/QueueFlushing.cpp +++ b/sycl/unittests/scheduler/QueueFlushing.cpp @@ -147,7 +147,8 @@ TEST_F(SchedulerTest, QueueFlushing) { /*SharedPtrStorage*/ {}, /*Requirements*/ {}, /*Events*/ {}))}; - detail::ExecCGCommand ExecCGCmd{std::move(CG), QueueImplA}; + detail::ExecCGCommand ExecCGCmd{std::move(CG), QueueImplA, + /*EventNeeded=*/true}; MockReq.MDims = 1; (void)ExecCGCmd.addDep(detail::DepDesc(&AllocaCmd, &MockReq, &AllocaCmd), ToCleanUp); diff --git a/sycl/unittests/scheduler/SchedulerTestUtils.hpp b/sycl/unittests/scheduler/SchedulerTestUtils.hpp index 1d7fa2075d0da..b1c667c5c40ca 100644 --- a/sycl/unittests/scheduler/SchedulerTestUtils.hpp +++ b/sycl/unittests/scheduler/SchedulerTestUtils.hpp @@ -194,11 +194,12 @@ class MockScheduler : public sycl::detail::Scheduler { return MGraphBuilder.addEmptyCmd(Cmd, Reqs, Queue, Reason, ToEnqueue); } - sycl::detail::Command * - addCG(std::unique_ptr CommandGroup, - sycl::detail::QueueImplPtr Queue, - std::vector &ToEnqueue) { - return MGraphBuilder.addCG(std::move(CommandGroup), Queue, ToEnqueue) + sycl::detail::Command *addCG(std::unique_ptr CommandGroup, + sycl::detail::QueueImplPtr Queue, + std::vector &ToEnqueue, + bool EventNeeded) { + return MGraphBuilder + .addCG(std::move(CommandGroup), Queue, ToEnqueue, EventNeeded) .NewCmd; } @@ -225,8 +226,9 @@ sycl::detail::Requirement getMockRequirement(const MemObjT &MemObj) { class MockHandler : public sycl::handler { public: - MockHandler(std::shared_ptr Queue, bool IsHost) - : sycl::handler(Queue, IsHost) {} + MockHandler(std::shared_ptr Queue, bool IsHost, + bool CallerNeedsEvent) + : sycl::handler(Queue, IsHost, CallerNeedsEvent) {} // Methods using sycl::handler::addReduction; using sycl::handler::getType; @@ -292,8 +294,8 @@ class MockHandler : public sycl::handler { class MockHandlerCustomFinalize : public MockHandler { public: MockHandlerCustomFinalize(std::shared_ptr Queue, - bool IsHost) - : MockHandler(Queue, IsHost) {} + bool IsHost, bool CallerNeedsEvent) + : MockHandler(Queue, IsHost, CallerNeedsEvent) {} std::unique_ptr finalize() { std::unique_ptr CommandGroup; diff --git a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp index 4564028110341..7e76027c05431 100644 --- a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp +++ b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp @@ -20,8 +20,9 @@ inline constexpr auto DisableCleanupName = class MockHandlerStreamInit : public MockHandler { public: - MockHandlerStreamInit(std::shared_ptr Queue, bool IsHost) - : MockHandler(Queue, IsHost) {} + MockHandlerStreamInit(std::shared_ptr Queue, bool IsHost, + bool CallerNeedsEvent) + : MockHandler(Queue, IsHost, CallerNeedsEvent) {} std::unique_ptr finalize() { std::unique_ptr CommandGroup; switch (getType()) { @@ -85,7 +86,7 @@ TEST_F(SchedulerTest, StreamInitDependencyOnHost) { /*PropList=*/{})); // Emulating processing of command group function - MockHandlerStreamInit MockCGH(HQueueImpl, true); + MockHandlerStreamInit MockCGH(HQueueImpl, true, /*CallerNeedsEvent=*/true); MockCGH.setType(detail::CG::Kernel); auto EmptyKernel = [](sycl::nd_item<1>) {}; @@ -118,7 +119,8 @@ TEST_F(SchedulerTest, StreamInitDependencyOnHost) { MockScheduler MS; std::vector AuxCmds; - detail::Command *NewCmd = MS.addCG(std::move(MainCG), HQueueImpl, AuxCmds); + detail::Command *NewCmd = + MS.addCG(std::move(MainCG), HQueueImpl, AuxCmds, /*EventNeeded=*/true); ASSERT_TRUE(!!NewCmd) << "Failed to add command group into scheduler"; ASSERT_GT(NewCmd->MDeps.size(), 0u) << "No deps appeared in the new exec kernel command"; From 8a445538d182bd7f31706d7afc515b2826105cc6 Mon Sep 17 00:00:00 2001 From: Lorenc Bushi <113361374+lbushi25@users.noreply.github.com> Date: Mon, 1 Jul 2024 08:40:49 -0400 Subject: [PATCH 31/40] [SYCL] Fix assertion failure in E2E marray test (#14234) This PR fixes a GPU accuracy bug by upscaling the error-tolerance to a double type if the GPU supports 64-bit floating point arithmetic. --- sycl/test-e2e/Basic/built-ins/helpers.hpp | 28 ++++++++++++++++------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/sycl/test-e2e/Basic/built-ins/helpers.hpp b/sycl/test-e2e/Basic/built-ins/helpers.hpp index 03a7c720e9afd..724e417c4d6e0 100644 --- a/sycl/test-e2e/Basic/built-ins/helpers.hpp +++ b/sycl/test-e2e/Basic/built-ins/helpers.hpp @@ -33,16 +33,28 @@ void test(bool CheckDevice, double delta, FuncTy F, ExpectedTy Expected, sycl::buffer SuccessBuf{1}; + sycl::queue q; + sycl::device dev = q.get_device(); // Make sure we don't use fp64 on devices that don't support it. - sycl::detail::get_elem_type_t d(delta); - - sycl::queue{}.submit([&](sycl::handler &cgh) { + const bool fp64 = dev.has(sycl::aspect::fp64); + q.submit([&](sycl::handler &cgh) { sycl::accessor Success{SuccessBuf, cgh}; - cgh.single_task([=]() { - auto R = F(Args...); - static_assert(std::is_same_v); - Success[0] = equal(R, Expected, d); - }); + if (fp64) { + cgh.single_task([=]() { + auto R = F(Args...); + static_assert(std::is_same_v); + // use double precision error tolerance when fp64 supported + Success[0] = equal(R, Expected, delta); + }); + } else { + // downscale the error tolerance when fp64 is not supported + sycl::detail::get_elem_type_t d(delta); + cgh.single_task([=]() { + auto R = F(Args...); + static_assert(std::is_same_v); + Success[0] = equal(R, Expected, d); + }); + } }); assert(sycl::host_accessor{SuccessBuf}[0]); } From 6271db86e9b62bbda8f5e0487a8da8df8db2e382 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Jul 2024 14:37:31 +0000 Subject: [PATCH 32/40] Bump the github-actions group with 2 updates (#14365) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps the github-actions group with 2 updates: [github/codeql-action](https://github.com/github/codeql-action) and [softprops/action-gh-release](https://github.com/softprops/action-gh-release). Updates `github/codeql-action` from 3.25.7 to 3.25.11
Changelog

Sourced from github/codeql-action's changelog.

CodeQL Action Changelog

See the releases page for the relevant changes to the CodeQL CLI and language packs.

Note that the only difference between v2 and v3 of the CodeQL Action is the node version they support, with v3 running on node 20 while we continue to release v2 to support running on node 16. For example 3.22.11 was the first v3 release and is functionally identical to 2.22.11. This approach ensures an easy way to track exactly which features are included in different versions, indicated by the minor and patch version numbers.

[UNRELEASED]

No user facing changes.

3.25.11 - 28 Jun 2024

  • Avoid failing the workflow run if there is an error while uploading debug artifacts. #2349
  • Update default CodeQL bundle version to 2.17.6. #2352

3.25.10 - 13 Jun 2024

  • Update default CodeQL bundle version to 2.17.5. #2327

3.25.9 - 12 Jun 2024

  • Avoid failing database creation if the database folder already exists and contains some unexpected files. Requires CodeQL 2.18.0 or higher. #2330
  • The init Action will attempt to clean up the database cluster directory before creating a new database and at the end of the job. This will help to avoid issues where the database cluster directory is left in an inconsistent state. #2332

3.25.8 - 04 Jun 2024

  • Update default CodeQL bundle version to 2.17.4. #2321

3.25.7 - 31 May 2024

  • We are rolling out a feature in May/June 2024 that will reduce the Actions cache usage of the Action by keeping only the newest TRAP cache for each language. #2306

3.25.6 - 20 May 2024

  • Update default CodeQL bundle version to 2.17.3. #2295

3.25.5 - 13 May 2024

  • Add a compatibility matrix of supported CodeQL Action, CodeQL CLI, and GitHub Enterprise Server versions to the https://github.com/github/codeql-action/blob/main/README.md. #2273
  • Avoid printing out a warning for a missing on.push trigger when the CodeQL Action is triggered via a workflow_call event. #2274
  • The tools: latest input to the init Action has been renamed to tools: linked. This option specifies that the Action should use the tools shipped at the same time as the Action. The old name will continue to work for backwards compatibility, but we recommend that new workflows use the new name. #2281

3.25.4 - 08 May 2024

  • Update default CodeQL bundle version to 2.17.2. #2270

3.25.3 - 25 Apr 2024

  • Update default CodeQL bundle version to 2.17.1. #2247
  • Workflows running on macos-latest using CodeQL CLI versions before v2.15.1 will need to either upgrade their CLI version to v2.15.1 or newer, or change the platform to an Intel MacOS runner, such as macos-12. ARM machines with SIP disabled, including the newest macos-latest image, are unsupported for CLI versions before 2.15.1. #2261

... (truncated)

Commits
  • b611370 Merge pull request #2357 from github/update-v3.25.11-de945755c
  • 3e6431f Update changelog for v3.25.11
  • de94575 Merge pull request #2352 from github/update-bundle/codeql-bundle-v2.17.6
  • a32d305 Add changelog note
  • 9ccc995 Update default bundle to codeql-bundle-v2.17.6
  • 9b7c22c Merge pull request #2351 from github/dependabot/npm_and_yarn/npm-6791eaa26c
  • 9cf3243 Rebuild
  • 1895b29 Update checked-in dependencies
  • 9dcfde9 Bump the npm group with 2 updates
  • 8723b5b Merge pull request #2350 from github/angelapwen/add-exclude-pr-check-param
  • Additional commits viewable in compare view

Updates `softprops/action-gh-release` from 2.0.5 to 2.0.6
Release notes

Sourced from softprops/action-gh-release's releases.

v2.0.6

maintenance release with updated dependencies

Changelog

Sourced from softprops/action-gh-release's changelog.

2.0.6

  • maintenance release with updated dependencies

2.0.5

2.0.4

2.0.3

  • Declare make_latest as an input field in action.yml #419

2.0.2

  • Revisit approach to #384 making unresolved pattern failures opt-in #417

2.0.1

2.0.0

  • 2.0.0!? this release corrects a disjunction between git tag versions used in the marketplace and versions list this file. Previous versions should have really been 1.*. Going forward this should be better aligned.
  • Upgrade action.yml declaration to node20 to address deprecations

0.1.15

  • Upgrade to action.yml declaration to node16 to address deprecations
  • Upgrade dependencies
  • Add asset output as a JSON array containing information about the uploaded assets

0.1.14

  • provides an new workflow input option generate_release_notes which when set to true will automatically generate release notes for you based on GitHub activity #179. Please see the GitHub docs for this feature for more information

0.1.13

  • fix issue with multiple runs concatenating release bodies #145

0.1.12

  • fix bug leading to empty strings subsituted for inputs users don't provide breaking api calls #144

... (truncated)

Commits
  • a74c6b7 update changelog
  • b909f76 update dist/index.js
  • e49d08f chore(deps): bump glob from 8.0.3 to 10.4.2
  • f12ad25 chore(deps): bump @​octokit/plugin-throttling from 4.3.2 to 9.3.0
  • 7039a82 chore: release 2.0.6
  • f9c2b6c chore: update deps and run build
  • 73738a6 chore(deps): bump node dep and @types/node
  • a500a35 Bump ts-jest from 29.0.3 to 29.1.4 (#459)
  • See full diff in compare view

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore major version` will close this group update PR and stop Dependabot creating any more for the specific dependency's major version (unless you unignore this specific dependency's major version or upgrade to it yourself) - `@dependabot ignore minor version` will close this group update PR and stop Dependabot creating any more for the specific dependency's minor version (unless you unignore this specific dependency's minor version or upgrade to it yourself) - `@dependabot ignore ` will close this group update PR and stop Dependabot creating any more for the specific dependency (unless you unignore this specific dependency or upgrade to it yourself) - `@dependabot unignore ` will remove all of the ignore conditions of the specified dependency - `@dependabot unignore ` will remove the ignore condition of the specified dependency and ignore conditions
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/scorecard.yml | 2 +- .github/workflows/sycl-nightly.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml index 9f8ea3499f696..896a2ea8c183a 100644 --- a/.github/workflows/scorecard.yml +++ b/.github/workflows/scorecard.yml @@ -57,6 +57,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@f079b8493333aace61c81488f8bd40919487bd9f # v3.25.7 + uses: github/codeql-action/upload-sarif@b611370bb5703a7efb587f9d136a52ea24c5c38c # v3.25.11 with: sarif_file: results.sarif diff --git a/.github/workflows/sycl-nightly.yml b/.github/workflows/sycl-nightly.yml index fc0b90be7990a..32a7814fa1c5c 100644 --- a/.github/workflows/sycl-nightly.yml +++ b/.github/workflows/sycl-nightly.yml @@ -141,7 +141,7 @@ jobs: echo "TAG=$(date +'%Y-%m-%d')-${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT" fi - name: Upload binaries - uses: softprops/action-gh-release@69320dbe05506a9a39fc8ae11030b214ec2d1f87 + uses: softprops/action-gh-release@a74c6b72af54cfa997e81df42d94703d6313a2d0 with: files: | sycl_linux.tar.gz From 9ed54582e3fcd69b941dc039adc8eff4ef521215 Mon Sep 17 00:00:00 2001 From: Nick Sarnie Date: Mon, 1 Jul 2024 10:45:20 -0400 Subject: [PATCH 33/40] [SYCL][E2E] Disable flaky test host_task_last.cpp on Gen12 Linux (#14352) https://github.com/intel/llvm/issues/14350 --------- Signed-off-by: Sarnie, Nick --- sycl/test-e2e/Graph/Explicit/host_task_last.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sycl/test-e2e/Graph/Explicit/host_task_last.cpp b/sycl/test-e2e/Graph/Explicit/host_task_last.cpp index 34df0750b5366..5371ea1df3708 100644 --- a/sycl/test-e2e/Graph/Explicit/host_task_last.cpp +++ b/sycl/test-e2e/Graph/Explicit/host_task_last.cpp @@ -2,8 +2,10 @@ // RUN: %{run} %t.out // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// Disabled due to https://github.com/intel/llvm/issues/14350 // Extra run to check for immediate-command-list in Level Zero -// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// xRUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} // REQUIRES: aspect-usm_shared_allocations From d52d7798e123874f697e9f70870f6999488bc144 Mon Sep 17 00:00:00 2001 From: aelovikov-intel Date: Mon, 1 Jul 2024 08:23:17 -0700 Subject: [PATCH 34/40] [SYCL] Don't throw in `device_impl::has` (#14355) 1) It isn't right 2) We need this change to get rid of deprecated `sycl::exception::get_cl_code` --- sycl/source/detail/device_impl.cpp | 4 ++-- sycl/source/device.cpp | 13 +++---------- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index ef02558bba55a..cd29ad1eec64d 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -747,8 +747,8 @@ bool device_impl::has(aspect Aspect) const { return call_successful && support; } } - throw runtime_error("This device aspect has not been implemented yet.", - PI_ERROR_INVALID_DEVICE); + + return false; // This device aspect has not been implemented yet. } std::shared_ptr device_impl::getHostDeviceImpl() { diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index 70aa37aad26a2..20df5cf47256a 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -156,16 +156,9 @@ device::get_info_impl() const { #undef __SYCL_ASPECT }; - auto UnsupportedAspects = std::remove_if( - DeviceAspects.begin(), DeviceAspects.end(), [&](aspect Aspect) { - try { - return !impl->has(Aspect); - } catch (const runtime_error &ex) { - if (ex.get_cl_code() == PI_ERROR_INVALID_DEVICE) - return true; - throw; - } - }); + auto UnsupportedAspects = + std::remove_if(DeviceAspects.begin(), DeviceAspects.end(), + [&](aspect Aspect) { return !impl->has(Aspect); }); DeviceAspects.erase(UnsupportedAspects, DeviceAspects.end()); From 088bea6cd85135a7b007fde4e8c1e1c92ec25cac Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Mon, 1 Jul 2024 17:31:45 +0200 Subject: [PATCH 35/40] [SYCL][Docs] Add sycl_ext_oneapi_virtual_mem extension and implementation (#8954) This commit adds the sycl_ext_oneapi_virtual_mem experimental extension for reserving and mapping virtual address ranges. Accompanying it is the implementation in the SYCL runtime, together with CUDA and Level Zero backend support for the corresponding features. --------- Signed-off-by: Larsen, Steffen --- .../llvm/SYCLLowerIR/DeviceConfigFile.td | 3 +- .../sycl_ext_oneapi_virtual_mem.asciidoc | 398 ++++++++++++++++++ sycl/include/sycl/detail/pi.def | 12 + sycl/include/sycl/detail/pi.h | 145 ++++++- sycl/include/sycl/detail/pi.hpp | 2 + sycl/include/sycl/device_aspect_macros.hpp | 10 + .../ext/oneapi/virtual_mem/physical_mem.hpp | 81 ++++ .../ext/oneapi/virtual_mem/virtual_mem.hpp | 61 +++ sycl/include/sycl/info/aspects.def | 1 + sycl/include/sycl/sycl.hpp | 2 + sycl/plugins/cuda/pi_cuda.cpp | 63 +++ sycl/plugins/cuda/pi_cuda.hpp | 5 + sycl/plugins/hip/pi_hip.cpp | 63 +++ sycl/plugins/hip/pi_hip.hpp | 5 + sycl/plugins/level_zero/pi_level_zero.cpp | 138 ++++++ sycl/plugins/native_cpu/pi_native_cpu.cpp | 63 +++ sycl/plugins/native_cpu/pi_native_cpu.hpp | 5 + sycl/plugins/opencl/pi_opencl.cpp | 63 +++ sycl/plugins/unified_runtime/pi2ur.hpp | 217 ++++++++++ .../unified_runtime/pi_unified_runtime.cpp | 66 +++ sycl/source/CMakeLists.txt | 2 + sycl/source/detail/device_impl.cpp | 8 + sycl/source/detail/physical_mem_impl.hpp | 95 +++++ sycl/source/feature_test.hpp.in | 1 + sycl/source/physical_mem.cpp | 38 ++ sycl/source/virtual_mem.cpp | 183 ++++++++ .../VirtualMem/vector_with_virtual_mem.cpp | 236 +++++++++++ sycl/test/abi/pi_cuda_symbol_check.dump | 10 + sycl/test/abi/pi_hip_symbol_check.dump | 10 + sycl/test/abi/pi_level_zero_symbol_check.dump | 10 + sycl/test/abi/pi_nativecpu_symbol_check.dump | 10 + sycl/test/abi/pi_opencl_symbol_check.dump | 10 + sycl/test/abi/sycl_symbols_linux.dump | 13 + sycl/test/abi/sycl_symbols_windows.dump | 27 +- sycl/unittests/helpers/PiMockPlugin.hpp | 55 +++ 35 files changed, 2108 insertions(+), 3 deletions(-) create mode 100644 sycl/doc/extensions/experimental/sycl_ext_oneapi_virtual_mem.asciidoc create mode 100644 sycl/include/sycl/ext/oneapi/virtual_mem/physical_mem.hpp create mode 100644 sycl/include/sycl/ext/oneapi/virtual_mem/virtual_mem.hpp create mode 100644 sycl/source/detail/physical_mem_impl.hpp create mode 100644 sycl/source/physical_mem.cpp create mode 100644 sycl/source/virtual_mem.cpp create mode 100644 sycl/test-e2e/VirtualMem/vector_with_virtual_mem.cpp diff --git a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td index 38d5f2512a1c4..54357d1377c77 100644 --- a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td +++ b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td @@ -82,6 +82,7 @@ def AspectExt_intel_fpga_task_sequence : Aspect<"ext_intel_fpga_task_sequence">; def AspectExt_oneapi_limited_graph : Aspect<"ext_oneapi_limited_graph">; def AspectExt_oneapi_private_alloca : Aspect<"ext_oneapi_private_alloca">; def AspectExt_oneapi_queue_profiling_tag : Aspect<"ext_oneapi_queue_profiling_tag">; +def AspectExt_oneapi_virtual_mem : Aspect<"ext_oneapi_virtual_mem">; // Deprecated aspects def AspectInt64_base_atomics : Aspect<"int64_base_atomics">; def AspectInt64_extended_atomics : Aspect<"int64_extended_atomics">; @@ -139,7 +140,7 @@ def : TargetInfo<"__TestAspectList", AspectExt_oneapi_ballot_group, AspectExt_oneapi_fixed_size_group, AspectExt_oneapi_opportunistic_group, AspectExt_oneapi_tangle_group, AspectExt_intel_matrix, AspectExt_oneapi_is_composite, AspectExt_oneapi_is_component, AspectExt_oneapi_graph, AspectExt_intel_fpga_task_sequence, AspectExt_oneapi_limited_graph, - AspectExt_oneapi_private_alloca, AspectExt_oneapi_queue_profiling_tag], + AspectExt_oneapi_private_alloca, AspectExt_oneapi_queue_profiling_tag, AspectExt_oneapi_virtual_mem], []>; // This definition serves the only purpose of testing whether the deprecated aspect list defined in here and in SYCL RT // match. diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_virtual_mem.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_virtual_mem.asciidoc new file mode 100644 index 0000000000000..72a6e1ed14f55 --- /dev/null +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_virtual_mem.asciidoc @@ -0,0 +1,398 @@ += sycl_ext_oneapi_virtual_mem + +:source-highlighter: coderay +:coderay-linenums-mode: table + +// This section needs to be after the document title. +:doctype: book +:toc2: +:toc: left +:encoding: utf-8 +:lang: en +:dpcpp: pass:[DPC++] +:endnote: —{nbsp}end{nbsp}note + +// Set the default source code type in this document to C++, +// for syntax highlighting purposes. This is needed because +// docbook uses c++ and html5 uses cpp. +:language: {basebackend@docbook:c++:cpp} + + +== Notice + +[%hardbreaks] +Copyright (C) 2023 Intel Corporation. All rights reserved. + +Khronos(R) is a registered trademark and SYCL(TM) and SPIR(TM) are trademarks +of The Khronos Group Inc. OpenCL(TM) is a trademark of Apple Inc. used by +permission by Khronos. + + +== Contact + +To report problems with this extension, please open a new issue at: + +https://github.com/intel/llvm/issues + + +== Dependencies + +This extension is written against the SYCL 2020 revision 8 specification. All +references below to the "core SYCL specification" or to section numbers in the +SYCL specification refer to that revision. + + +== Status + +This is an experimental extension specification, intended to provide early +access to features and gather community feedback. Interfaces defined in this +specification are implemented in {dpcpp}, but they are not finalized and may +change incompatibly in future versions of {dpcpp} without prior notice. +*Shipping software products should not rely on APIs defined in this +specification.* + + +== Backend support status + +The APIs in this extension may be used only on a device that has +`aspect::ext_oneapi_virtual_mem`. The application must check that the devices +in the corresponding context have this aspect before using any of the APIs +introduced in this extension. If the application fails to do this, the +implementation throws a synchronous exception with the +`errc::feature_not_supported` error code. + +== Overview + +This extension adds the notion of "virtual memory ranges" to SYCL, introducing +a way to map an address range onto multiple allocations of physical memory, +allowing users to avoid expensive reallocations and potentially running out of +device memory while relocating the corresponding memory. + + +== Specification + +=== Feature test macro + +This extension provides a feature-test macro as described in the core SYCL +specification. An implementation supporting this extension must predefine the +macro `SYCL_EXT_ONEAPI_VIRTUAL_MEM` to one of the values defined in the table +below. Applications can test for the existence of this macro to determine if +the implementation supports this feature, or applications can test the macro's +value to determine which of the extension's features the implementation +supports. + +[%header,cols="1,5"] +|=== +|Value +|Description + +|1 +|The APIs of this experimental extension are not versioned, so the + feature-test macro always has this value. +|=== + + +=== Device aspect + +Support for the features introduced in this extension can be queried using the +new `aspect::ext_oneapi_virtual_mem` defined as: + +```c++ +namespace sycl { + +enum class aspect : /* unspecified */ { + ... + ext_oneapi_virtual_mem +} + +} // namespace sycl +``` + + +=== Memory granularity + +Working with virtual address ranges and the underlying physical memory requires +the user to align and adjust in accordance with a specified minimum granularity. + +The interfaces make the distinction between device granularity, which is the +granularity required for physical memory allocations, and context granularity, +which is the granularity required for virtual memory range reservations. + +The queries provide both a minimum and a recommended granularity. The minimum +device granularity is the smallest granularity that is supported for physical +memory allocations, and the minimum context granularity is the smallest +granularity that is supported from virtual memory range reservations. However, +the recommended granularity may be larger than these minimums and may provide +better performance. + +The interfaces for querying these granularities are defined as: + +```c++ +namespace sycl::ext::oneapi::experimental { + +enum class granularity_mode : /*unspecified*/ { + minimum, + recommended +}; + +size_t get_mem_granularity(const device &syclDevice, const context &syclContext, + granularity_mode mode = granularity_mode::recommended); + +size_t get_mem_granularity(const context &syclContext, + granularity_mode mode = granularity_mode::recommended); + +} // namespace sycl::ext::oneapi::experimental +``` + +[frame="topbot",options="header,footer"] +|===================== +|Function |Description + +|`size_t get_mem_granularity(const device &syclDevice, const context &syclContext, granularity_mode mode = granularity_mode::recommended)` | +Returns the granularity of physical memory allocations on `syclDevice` in the +`syclContext`. The `mode` argument specifies whether the query is for the +minimum or recommended granularity. + +If `syclDevice` does not have `aspect::ext_oneapi_virtual_mem` the call throws +an exception with `errc::feature_not_supported`. + +|`size_t get_mem_granularity(const context &syclContext, granularity_mode mode = granularity_mode::recommended)` | +Returns the granularity of virtual memory range reservations in the +`syclContext`. The `mode` argument specifies whether the query is for the +minimum or recommended granularity. + +If any device in `syclContext` does not have `aspect::ext_oneapi_virtual_mem` +the call throws an exception with `errc::feature_not_supported`. + +|===================== + +=== Reserving virtual address ranges + +Virtual address ranges are represented by a `uintptr_t` and a number of bytes +reserved for it. The `uintptr_t` must be aligned in accordance with the minimum +granularity of the corresponding `context`, as queried through +`get_mem_granularity`, and likewise the number of bytes must be a multiple of +this granularity. It is the responsibility of the user to manage the +constituents of any virtual address range they reserve. + +The interfaces for reserving, freeing, and manipulating the access mode of a +virtual address range are defined as: + +```c++ +namespace sycl::ext::oneapi::experimental { + +uintptr_t reserve_virtual_mem(uintptr_t start, size_t numBytes, const context &syclContext); +uintptr_t reserve_virtual_mem(size_t numBytes, const context &syclContext); + +void free_virtual_mem(uintptr_t ptr, size_t numBytes, const context &syclContext); + +} // namespace sycl::ext::oneapi::experimental +``` + +[frame="topbot",options="header,footer"] +|===================== +|Function |Description + +|`uintptr_t reserve_virtual_mem(uintptr_t start, size_t numBytes, const context &syclContext)` | +Reserves a virtual memory range in `syclContext` with `numBytes` bytes. + +`start` specifies the requested start of the new virtual memory range +reservation. If the implementation is unable to reserve the virtual memory range +at the specified address, the implementation will pick another suitable address. + +`start` must be aligned in accordance with the minimum granularity for +`syclContext`, as returned by a call to `get_mem_granularity`. Likewise, +`numBytes` must be a multiple of the minimum granularity. Attempting to call +this function without meeting these requirements results in undefined behavior. + +If any of the devices in `syclContext` do not have +`aspect::ext_oneapi_virtual_mem` the call throws an exception with +`errc::feature_not_supported`. + +|`uintptr_t reserve_virtual_mem(size_t numBytes, const context &syclContext)` | +Same as `reserve_virtual_mem(0, numBytes, syclContext)`. + +|`void free_virtual_mem(uintptr_t ptr, size_t numBytes, const context &syclContext)` | +Frees a virtual memory range specified by `ptr` and `numBytes`. `ptr` must be +the same as returned by a call to `reserve_virtual_mem` and `numBytes` must be +the same as the size of the range specified in the reservation call. + +The virtual memory range must not currently be mapped to physical memory. A call +to this function with a mapped virtual memory range results in undefined +behavior. + +|===================== + + +=== Physical memory representation + +:crs: https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#sec:reference-semantics + +To represent the underlying physical device memory a virtual address is mapped +to, the `physical_mem` class is added. This new class is defined as: + +```c++ +namespace sycl::ext::oneapi::experimental { + +enum class address_access_mode : /*unspecified*/ { + none, + read, + read_write +}; + +class physical_mem { +public: + physical_mem(const device &syclDevice, const context &syclContext, size_t numBytes); + physical_mem(const queue &syclQueue, size_t numBytes); + + /* -- common interface members -- */ + + void *map(uintptr_t ptr, size_t numBytes, address_access_mode mode, size_t offset = 0) const; + + context get_context() const; + device get_device() const; + + size_t size() const noexcept; +}; + +} // namespace sycl::ext::oneapi::experimental +``` + +`physical_mem` has common reference semantics, as described in +{crs}[section 4.5.2. Common reference semantics]. + +[frame="topbot",options="header,footer"] +|============================ +|Member function |Description + +|`physical_mem(const device &syclDevice, const context &syclContext, size_t numBytes)` | +Constructs a `physical_mem` instance using the `syclDevice` provided. This +device must either be contained by `syclContext` or it must be a descendent +device of some device that is contained by that context, otherwise this function +throws a synchronous exception with the `errc::invalid` error code. + +This will allocate `numBytes` of physical memory on the device. `numBytes` must +be a multiple of the granularity for `syclDevice`, as returned by a call to +`get_mem_granularity`. + +If `syclDevice` does not have `aspect::ext_oneapi_virtual_mem` the call throws +an exception with `errc::feature_not_supported`. + +If the constructor is unable to allocate the required memory on `syclDevice`, +the call throws an exception with `errc::memory_allocation`. + +|`physical_mem(const queue &syclQueue, size_t numBytes)` | +Same as `physical_mem(syclQueue.get_device(), syclQueue.get_context, numBytes)`. + +|`void *map(uintptr_t ptr, size_t numBytes, address_access_mode mode, size_t offset = 0)` | +Maps a virtual memory range, specified by `ptr` and `numBytes`, to the physical +memory corresponding to this instance of `physical_mem`, starting at an offset +of `offset` bytes. + +It is required that `offset + numBytes` is less than or equal to `size()` and +that `ptr`, `numBytes` and `offset` are all multiples of the minimum granularity +for the device associated with this instance of `physical_mem`. + +If `mode` is `address_access_mode::read` or `address_access_mode::read_write` +the returned pointer is accessible after the call as read-only or read-write +respectively. Otherwise, it is considered inaccessible and accessing it will +result in undefined behavior. + +The returned pointer is equivalent to `reinterpret_cast(ptr)`. + +Writing to any address in the virtual memory range with access mode set to +`access_mode::read` results in undefined behavior. + +An accessible pointer behaves the same as a pointer to device USM memory and can +be used in place of a device USM pointer in any interface accepting one. + +A virtual memory range cannot be simultaneously mapped to more than one +physical memory region. Likewise, multiple virtual memory ranges cannot be +mapped onto the same physical memory region. Attempting to violate either of +these restrictions will result in undefined behavior. + +|`context get_context() const` | +Returns the SYCL context associated with the instance of `physical_mem`. + +|`device get_device() const` | +Returns the SYCL device associated with the instance of `physical_mem`. + +|`size_t size() const` | +Returns the size of the corresponding physical memory in bytes. + +|============================ + +Virtual memory address ranges are mapped to the a `physical_mem` through the +`map` member functions, where the access mode can also be specified. +To further get or set the access mode of a mapped virtual address range, the +user does not need to know the associated `physical_mem` and can just call the +following free functions. + +```c++ +namespace sycl::ext::oneapi::experimental { + +void set_access_mode(const void *ptr, size_t numBytes, address_access_mode mode, const context &syclContext); + +address_access_mode get_access_mode(const void *ptr, size_t numBytes, const context &syclContext); + +void unmap(const void *ptr, size_t numBytes, const context &syclContext); + +} // namespace sycl::ext::oneapi::experimental +``` + +[frame="topbot",options="header,footer"] +|===================== +|Function |Description + +|`void set_access_mode(const void *ptr, size_t numBytes, address_access_mode mode, const context &syclContext)` | +Changes the access mode of a mapped virtual memory range specified by `ptr` and +`numBytes`. + +If `mode` is `address_access_mode::read` or `address_access_mode::read_write` +`ptr` pointer is accessible after the call as read-only or read-write +respectively. Otherwise, it is considered inaccessible and accessing it will +result in undefined behavior. + +The virtual memory range specified by `ptr` and `numBytes` must be a sub-range +of virtual memory ranges previously mapped to `physical_mem`. `ptr` +must be aligned to the minimum memory granularity of the device associated with +the `physical_mem` the range is mapped to and `numBytes` must be a multiple of +the minimum memory granularity of the device associated with the `physical_mem` +the range is mapped to. + +Writing to any address in the virtual memory range with access mode set to +`address_access_mode::read` results in undefined behavior. + +An accessible pointer behaves the same as a pointer to device USM memory and can +be used in place of a device USM pointer in any interface accepting one. + +|`address_access_mode get_access_mode(const void *ptr, size_t numBytes, const context &syclContext)` | +Returns the access mode of the mapped virtual memory range specified by `ptr` +and `numBytes`. + +The virtual memory range specified by `ptr` and `numBytes` must be a sub-range +of virtual memory ranges previously mapped to `physical_mem`. `ptr` +must be aligned to the minimum memory granularity of the device associated with +the `physical_mem` the range is mapped to and `numBytes` must be a multiple of +the minimum memory granularity of the device associated with the `physical_mem` +the range is mapped to. + +|`void unmap(const void *ptr, size_t numBytes, const device &syclDevice, const context &syclContext)` | +Unmaps the range specified by `ptr` and `numBytes`. The range must have been +mapped through a call to `physical_mem::map()` prior to calling this. The range +must not be a proper sub-range of a previously mapped range. `syclContext` must +be the same as the context returned by the `get_context()` member function on +the `physical_mem` the address range is currently mapped to. + +After this call, the full range will again be ready to be mapped through a call +to `physical_mem::map()`. + +[_Note:_ Unmapping ranges that span multiple contiguous mapped ranges is not +supported. Doing so will result in undefined behavior. This restriction may be +lifted in the future. _{endnote}_] + +[_Note:_ The destructor for `physical_mem` will not unmap ranges mapped to it. +As such, the user must call `unmap` on ranges mapped to `physical_mem` objects +prior to their destruction. _{endnote}_] + +|===================== \ No newline at end of file diff --git a/sycl/include/sycl/detail/pi.def b/sycl/include/sycl/detail/pi.def index 995579d612afb..3090b2d488ee0 100644 --- a/sycl/include/sycl/detail/pi.def +++ b/sycl/include/sycl/detail/pi.def @@ -215,4 +215,16 @@ _PI_API(piextDestroyExternalSemaphore) _PI_API(piextWaitExternalSemaphore) _PI_API(piextSignalExternalSemaphore) +// Virtual memory +_PI_API(piextVirtualMemGranularityGetInfo) +_PI_API(piextPhysicalMemCreate) +_PI_API(piextPhysicalMemRetain) +_PI_API(piextPhysicalMemRelease) +_PI_API(piextVirtualMemReserve) +_PI_API(piextVirtualMemFree) +_PI_API(piextVirtualMemMap) +_PI_API(piextVirtualMemUnmap) +_PI_API(piextVirtualMemSetAccess) +_PI_API(piextVirtualMemGetInfo) + #undef _PI_API diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 79d67791ffc8d..ce7d34ef75899 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -191,9 +191,13 @@ // `win32_nt_dx12_resource` value. // the `pi_external_semaphore_handle_type` enum now has a new // `win32_nt_dx12_fence` value. +// 15.54 Added piextVirtualMem* functions, and piextPhysicalMem* functions, +// PI_EXT_ONEAPI_DEVICE_INFO_SUPPORTS_VIRTUAL_MEM device info descriptor, +// _pi_virtual_mem_granularity_info enum, _pi_virtual_mem_info enum and +// pi_virtual_access_flags bit flags. #define _PI_H_VERSION_MAJOR 15 -#define _PI_H_VERSION_MINOR 53 +#define _PI_H_VERSION_MINOR 54 #define _PI_STRING_HELPER(a) #a #define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b) @@ -505,6 +509,9 @@ typedef enum { // Timestamp enqueue PI_EXT_ONEAPI_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT = 0x2011D, + + // Virtual memory support + PI_EXT_ONEAPI_DEVICE_INFO_SUPPORTS_VIRTUAL_MEM = 0x2011E, } _pi_device_info; typedef enum { @@ -756,6 +763,15 @@ typedef enum { PI_SAMPLER_CUBEMAP_FILTER_MODE_SEAMLESS = 0x1143, } _pi_sampler_cubemap_filter_mode; +typedef enum { + PI_EXT_ONEAPI_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM = 0x30100, + PI_EXT_ONEAPI_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED = 0x30101, +} _pi_virtual_mem_granularity_info; + +typedef enum { + PI_EXT_ONEAPI_VIRTUAL_MEM_INFO_ACCESS_MODE = 0x30200, +} _pi_virtual_mem_info; + using pi_context_properties = intptr_t; using pi_device_exec_capabilities = pi_bitfield; @@ -848,6 +864,10 @@ constexpr pi_queue_properties PI_EXT_QUEUE_FLAG_SUBMISSION_NO_IMMEDIATE = (1 << constexpr pi_queue_properties PI_EXT_QUEUE_FLAG_SUBMISSION_IMMEDIATE = (1 << 8); // clang-format on +using pi_virtual_access_flags = pi_bitfield; +constexpr pi_virtual_access_flags PI_VIRTUAL_ACCESS_FLAG_RW = (1 << 0); +constexpr pi_virtual_access_flags PI_VIRTUAL_ACCESS_FLAG_READ_ONLY = (1 << 1); + typedef enum { // No preference for SLM or data cache. PI_EXT_KERNEL_EXEC_INFO_CACHE_DEFAULT = 0x0, @@ -889,6 +909,8 @@ using pi_program_binary_type = _pi_program_binary_type; using pi_kernel_info = _pi_kernel_info; using pi_profiling_info = _pi_profiling_info; using pi_kernel_cache_config = _pi_kernel_cache_config; +using pi_virtual_mem_granularity_info = _pi_virtual_mem_granularity_info; +using pi_virtual_mem_info = _pi_virtual_mem_info; using pi_image_copy_flags = _pi_image_copy_flags; @@ -1241,6 +1263,7 @@ struct _pi_program; struct _pi_kernel; struct _pi_event; struct _pi_sampler; +struct _pi_physical_mem; using pi_platform = _pi_platform *; using pi_device = _pi_device *; @@ -1255,6 +1278,7 @@ using pi_image_handle = pi_uint64; using pi_image_mem_handle = void *; using pi_interop_mem_handle = pi_uint64; using pi_interop_semaphore_handle = pi_uint64; +using pi_physical_mem = _pi_physical_mem *; typedef struct { pi_image_channel_order image_channel_order; @@ -2338,6 +2362,125 @@ pi_result piextEnqueueDeviceGlobalVariableRead( size_t count, size_t offset, void *dst, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event); +/// +/// Virtual memory +/// + +/// API for getting information about the minimum and recommended granularity +/// of physical and virtual memory. +/// +/// \param context is the context to get the granularity from. +/// \param device is the device to get the granularity from. +/// \param param_name is the type of query to perform. +/// \param param_value_size is the size of the result in bytes. +/// \param param_value is the result. +/// \param param_value_size_ret is how many bytes were written. +__SYCL_EXPORT pi_result piextVirtualMemGranularityGetInfo( + pi_context context, pi_device device, + pi_virtual_mem_granularity_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret); + +/// API for creating a physical memory handle that virtual memory can be mapped +/// to. +/// +/// \param context is the context within which the physical memory is allocated. +/// \param device is the device the physical memory is on. +/// \param mem_size is the size of physical memory to allocate. This must be a +/// multiple of the minimum virtual memory granularity. +/// \param ret_physical_mem is the handle for the resulting physical memory. +__SYCL_EXPORT pi_result +piextPhysicalMemCreate(pi_context context, pi_device device, size_t mem_size, + pi_physical_mem *ret_physical_mem); + +/// API for retaining a physical memory handle. +/// +/// \param physical_mem is the handle for the physical memory to retain. +__SYCL_EXPORT pi_result piextPhysicalMemRetain(pi_physical_mem physical_mem); + +/// API for releasing a physical memory handle. +/// +/// \param physical_mem is the handle for the physical memory to free. +__SYCL_EXPORT pi_result piextPhysicalMemRelease(pi_physical_mem physical_mem); + +/// API for reserving a virtual memory range. +/// +/// \param context is the context within which the virtual memory range is +/// reserved. +/// \param start is a pointer to the start of the region to reserve. If nullptr +/// the implementation selects a start address. +/// \param range_size is the size of the virtual address range to reserve in +/// bytes. +/// \param ret_ptr is the pointer to the start of the resulting virtual memory +/// range. +__SYCL_EXPORT pi_result piextVirtualMemReserve(pi_context context, + const void *start, + size_t range_size, + void **ret_ptr); + +/// API for freeing a virtual memory range. +/// +/// \param context is the context within which the virtual memory range is +/// reserved. +/// \param ptr is the pointer to the start of the virtual memory range. +/// \param range_size is the size of the virtual address range. +__SYCL_EXPORT pi_result piextVirtualMemFree(pi_context context, const void *ptr, + size_t range_size); + +/// API for mapping a virtual memory range to a a physical memory allocation at +/// a given offset. +/// +/// \param context is the context within which both the virtual memory range is +/// reserved and the physical memory is allocated. +/// \param ptr is the pointer to the start of the virtual memory range. +/// \param range_size is the size of the virtual address range. +/// \param physical_mem is the handle for the physical memory to map ptr to. +/// \param offset is the offset into physical_mem in bytes to map ptr to. +/// \param flags is the access flags to set for the mapping. +__SYCL_EXPORT pi_result piextVirtualMemMap(pi_context context, const void *ptr, + size_t range_size, + pi_physical_mem physical_mem, + size_t offset, + pi_virtual_access_flags flags); + +/// API for unmapping a virtual memory range previously mapped in a context. +/// After a call to this function, the virtual memory range is left in a state +/// ready to be remapped. +/// +/// \param context is the context within which the virtual memory range is +/// currently mapped. +/// \param ptr is the pointer to the start of the virtual memory range. +/// \param range_size is the size of the virtual address range in bytes. +__SYCL_EXPORT pi_result piextVirtualMemUnmap(pi_context context, + const void *ptr, + size_t range_size); + +/// API for setting the access mode of a mapped virtual memory range. +/// +/// \param context is the context within which the virtual memory range is +/// currently mapped. +/// \param ptr is the pointer to the start of the virtual memory range. +/// \param range_size is the size of the virtual address range in bytes. +/// \param flags is the access flags to set for the mapped virtual access range. +__SYCL_EXPORT pi_result piextVirtualMemSetAccess(pi_context context, + const void *ptr, + size_t range_size, + pi_virtual_access_flags flags); + +/// API for getting info about a mapped virtual memory range. +/// +/// \param context is the context within which the virtual memory range is +/// currently mapped. +/// \param ptr is the pointer to the start of the virtual memory range. +/// \param range_size is the size of the virtual address range in bytes. +/// \param param_name is the type of query to perform. +/// \param param_value_size is the size of the result in bytes. +/// \param param_value is the result. +/// \param param_value_size_ret is how many bytes were written. +__SYCL_EXPORT pi_result +piextVirtualMemGetInfo(pi_context context, const void *ptr, size_t range_size, + pi_virtual_mem_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret); + /// /// Plugin /// diff --git a/sycl/include/sycl/detail/pi.hpp b/sycl/include/sycl/detail/pi.hpp index 3500c576bb599..1fe21d36a8aaa 100644 --- a/sycl/include/sycl/detail/pi.hpp +++ b/sycl/include/sycl/detail/pi.hpp @@ -146,6 +146,8 @@ using PiExternalMemDescriptor = ::pi_external_mem_descriptor; using PiExternalSemaphoreDescriptor = ::pi_external_semaphore_descriptor; using PiImageOffset = ::pi_image_offset_struct; using PiImageRegion = ::pi_image_region_struct; +using PiPhysicalMem = ::pi_physical_mem; +using PiVirtualAccessFlags = ::pi_virtual_access_flags; __SYCL_EXPORT void contextSetExtendedDeleter(const sycl::context &constext, pi_context_extended_deleter func, diff --git a/sycl/include/sycl/device_aspect_macros.hpp b/sycl/include/sycl/device_aspect_macros.hpp index df6c827de60f2..d756b0a62e88a 100644 --- a/sycl/include/sycl/device_aspect_macros.hpp +++ b/sycl/include/sycl/device_aspect_macros.hpp @@ -381,6 +381,11 @@ #define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_queue_profiling_tag__ 0 #endif +#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_virtual_mem__ +// __SYCL_ASPECT(ext_oneapi_virtual_mem, 74) +#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_virtual_mem__ 0 +#endif + #ifndef __SYCL_ANY_DEVICE_HAS_host__ // __SYCL_ASPECT(host, 0) #define __SYCL_ANY_DEVICE_HAS_host__ 0 @@ -750,3 +755,8 @@ // __SYCL_ASPECT(ext_oneapi_queue_profiling_tag, 73) #define __SYCL_ANY_DEVICE_HAS_ext_oneapi_queue_profiling_tag__ 0 #endif + +#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_virtual_mem__ +// __SYCL_ASPECT(ext_oneapi_virtual_mem, 74) +#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_virtual_mem__ 0 +#endif diff --git a/sycl/include/sycl/ext/oneapi/virtual_mem/physical_mem.hpp b/sycl/include/sycl/ext/oneapi/virtual_mem/physical_mem.hpp new file mode 100644 index 0000000000000..24d371fe8c6fd --- /dev/null +++ b/sycl/include/sycl/ext/oneapi/virtual_mem/physical_mem.hpp @@ -0,0 +1,81 @@ +//==--- physical_mem.hpp - sycl_ext_oneapi_virtual_mem physical_mem class --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace sycl { +inline namespace _V1 { + +namespace detail { +class physical_mem_impl; +} // namespace detail + +namespace ext::oneapi::experimental { + +enum class address_access_mode : char { none = 0, read = 1, read_write = 2 }; + +class __SYCL_EXPORT physical_mem + : public sycl::detail::OwnerLessBase { +public: + physical_mem(const device &SyclDevice, const context &SyclContext, + size_t NumBytes); + + physical_mem(const queue &SyclQueue, size_t NumBytes) + : physical_mem(SyclQueue.get_device(), SyclQueue.get_context(), + NumBytes) {} + + physical_mem(const physical_mem &rhs) = default; + physical_mem(physical_mem &&rhs) = default; + + physical_mem &operator=(const physical_mem &rhs) = default; + physical_mem &operator=(physical_mem &&rhs) = default; + + ~physical_mem() noexcept(false) {}; + + bool operator==(const physical_mem &rhs) const { return impl == rhs.impl; } + bool operator!=(const physical_mem &rhs) const { return !(*this == rhs); } + + void *map(uintptr_t Ptr, size_t NumBytes, address_access_mode Mode, + size_t Offset = 0) const; + + context get_context() const; + device get_device() const; + + size_t size() const noexcept; + +private: + std::shared_ptr impl; + + template + friend decltype(Obj::impl) + sycl::detail::getSyclObjImpl(const Obj &SyclObject); + + template + friend T sycl::detail::createSyclObjFromImpl(decltype(T::impl) ImplObj); +}; + +} // namespace ext::oneapi::experimental +} // namespace _V1 +} // namespace sycl + +namespace std { +template <> struct hash { + size_t operator()( + const sycl::ext::oneapi::experimental::physical_mem &PhysicalMem) const { + return hash>()( + sycl::detail::getSyclObjImpl(PhysicalMem)); + } +}; +} // namespace std diff --git a/sycl/include/sycl/ext/oneapi/virtual_mem/virtual_mem.hpp b/sycl/include/sycl/ext/oneapi/virtual_mem/virtual_mem.hpp new file mode 100644 index 0000000000000..74a42354eaa01 --- /dev/null +++ b/sycl/include/sycl/ext/oneapi/virtual_mem/virtual_mem.hpp @@ -0,0 +1,61 @@ +//==- virtual_mem.hpp - sycl_ext_oneapi_virtual_mem virtual mem free funcs -==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include + +namespace sycl { +inline namespace _V1 { +namespace ext::oneapi::experimental { + +enum class granularity_mode : char { + minimum = 0, + recommended = 1, +}; + +__SYCL_EXPORT size_t +get_mem_granularity(const device &SyclDevice, const context &SyclContext, + granularity_mode Mode = granularity_mode::recommended); + +__SYCL_EXPORT size_t +get_mem_granularity(const context &SyclContext, + granularity_mode Mode = granularity_mode::recommended); + +__SYCL_EXPORT uintptr_t reserve_virtual_mem(uintptr_t Start, size_t NumBytes, + const context &SyclContext); + +inline uintptr_t reserve_virtual_mem(size_t NumBytes, + const context &SyclContext) { + return reserve_virtual_mem(0, NumBytes, SyclContext); +} + +__SYCL_EXPORT void free_virtual_mem(uintptr_t Ptr, size_t NumBytes, + const context &SyclContext); + +__SYCL_EXPORT void set_access_mode(const void *Ptr, size_t NumBytes, + address_access_mode Mode, + const context &SyclContext); + +__SYCL_EXPORT address_access_mode get_access_mode(const void *Ptr, + size_t NumBytes, + const context &SyclContext); + +__SYCL_EXPORT void unmap(const void *Ptr, size_t NumBytes, + const context &SyclContext); + +} // Namespace ext::oneapi::experimental +} // namespace _V1 +} // Namespace sycl diff --git a/sycl/include/sycl/info/aspects.def b/sycl/include/sycl/info/aspects.def index 2d9cee1351d7a..3b744a89dbb90 100644 --- a/sycl/include/sycl/info/aspects.def +++ b/sycl/include/sycl/info/aspects.def @@ -68,3 +68,4 @@ __SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d, 70) __SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d_usm, 71) __SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d, 72) __SYCL_ASPECT(ext_oneapi_queue_profiling_tag, 73) +__SYCL_ASPECT(ext_oneapi_virtual_mem, 74) diff --git a/sycl/include/sycl/sycl.hpp b/sycl/include/sycl/sycl.hpp index 53a60381f0b8d..16b5e8f0f6c40 100644 --- a/sycl/include/sycl/sycl.hpp +++ b/sycl/include/sycl/sycl.hpp @@ -111,4 +111,6 @@ #include #include #include +#include +#include #include diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 0077b245905db..1628b1537fae5 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -1298,6 +1298,69 @@ pi_result piextPeerAccessGetInfo(pi_device command_device, ParamValueSizeRet); } +pi_result +piextVirtualMemGranularityGetInfo(pi_context context, pi_device device, + pi_virtual_mem_granularity_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { + return pi2ur::piextVirtualMemGranularityGetInfo(context, device, param_name, + param_value_size, param_value, + param_value_size_ret); +} + +pi_result piextPhysicalMemCreate(pi_context context, pi_device device, + size_t mem_size, + pi_physical_mem *ret_physical_mem) { + return pi2ur::piextPhysicalMemCreate(context, device, mem_size, + ret_physical_mem); +} + +pi_result piextPhysicalMemRetain(pi_physical_mem physical_mem) { + return pi2ur::piextPhysicalMemRetain(physical_mem); +} + +pi_result piextPhysicalMemRelease(pi_physical_mem physical_mem) { + return pi2ur::piextPhysicalMemRelease(physical_mem); +} + +pi_result piextVirtualMemReserve(pi_context context, const void *start, + size_t range_size, void **ret_ptr) { + return pi2ur::piextVirtualMemReserve(context, start, range_size, ret_ptr); +} + +pi_result piextVirtualMemFree(pi_context context, const void *ptr, + size_t range_size) { + return pi2ur::piextVirtualMemFree(context, ptr, range_size); +} + +pi_result piextVirtualMemMap(pi_context context, const void *ptr, + size_t range_size, pi_physical_mem physical_mem, + size_t offset, pi_virtual_access_flags flags) { + return pi2ur::piextVirtualMemMap(context, ptr, range_size, physical_mem, + offset, flags); +} + +pi_result piextVirtualMemUnmap(pi_context context, const void *ptr, + size_t range_size) { + return pi2ur::piextVirtualMemUnmap(context, ptr, range_size); +} + +pi_result piextVirtualMemSetAccess(pi_context context, const void *ptr, + size_t range_size, + pi_virtual_access_flags flags) { + return pi2ur::piextVirtualMemSetAccess(context, ptr, range_size, flags); +} + +pi_result piextVirtualMemGetInfo(pi_context context, const void *ptr, + size_t range_size, + pi_virtual_mem_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { + return pi2ur::piextVirtualMemGetInfo(context, ptr, range_size, param_name, + param_value_size, param_value, + param_value_size_ret); +} + const char SupportedVersion[] = _PI_CUDA_PLUGIN_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { diff --git a/sycl/plugins/cuda/pi_cuda.hpp b/sycl/plugins/cuda/pi_cuda.hpp index 2b5d77b26ea9d..8c5112f4cc9d1 100644 --- a/sycl/plugins/cuda/pi_cuda.hpp +++ b/sycl/plugins/cuda/pi_cuda.hpp @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -81,4 +82,8 @@ struct _pi_ext_command_buffer : ur_exp_command_buffer_handle_t_ { using ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_; }; +struct _pi_physical_mem : ur_physical_mem_handle_t_ { + using ur_physical_mem_handle_t_::ur_physical_mem_handle_t_; +}; + #endif // PI_CUDA_HPP diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 33b7388f9c884..c3324463690eb 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -1301,6 +1301,69 @@ pi_result piextPeerAccessGetInfo(pi_device command_device, ParamValueSizeRet); } +pi_result +piextVirtualMemGranularityGetInfo(pi_context context, pi_device device, + pi_virtual_mem_granularity_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { + return pi2ur::piextVirtualMemGranularityGetInfo(context, device, param_name, + param_value_size, param_value, + param_value_size_ret); +} + +pi_result piextPhysicalMemCreate(pi_context context, pi_device device, + size_t mem_size, + pi_physical_mem *ret_physical_mem) { + return pi2ur::piextPhysicalMemCreate(context, device, mem_size, + ret_physical_mem); +} + +pi_result piextPhysicalMemRetain(pi_physical_mem physical_mem) { + return pi2ur::piextPhysicalMemRetain(physical_mem); +} + +pi_result piextPhysicalMemRelease(pi_physical_mem physical_mem) { + return pi2ur::piextPhysicalMemRelease(physical_mem); +} + +pi_result piextVirtualMemReserve(pi_context context, const void *start, + size_t range_size, void **ret_ptr) { + return pi2ur::piextVirtualMemReserve(context, start, range_size, ret_ptr); +} + +pi_result piextVirtualMemFree(pi_context context, const void *ptr, + size_t range_size) { + return pi2ur::piextVirtualMemFree(context, ptr, range_size); +} + +pi_result piextVirtualMemMap(pi_context context, const void *ptr, + size_t range_size, pi_physical_mem physical_mem, + size_t offset, pi_virtual_access_flags flags) { + return pi2ur::piextVirtualMemMap(context, ptr, range_size, physical_mem, + offset, flags); +} + +pi_result piextVirtualMemUnmap(pi_context context, const void *ptr, + size_t range_size) { + return pi2ur::piextVirtualMemUnmap(context, ptr, range_size); +} + +pi_result piextVirtualMemSetAccess(pi_context context, const void *ptr, + size_t range_size, + pi_virtual_access_flags flags) { + return pi2ur::piextVirtualMemSetAccess(context, ptr, range_size, flags); +} + +pi_result piextVirtualMemGetInfo(pi_context context, const void *ptr, + size_t range_size, + pi_virtual_mem_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { + return pi2ur::piextVirtualMemGetInfo(context, ptr, range_size, param_name, + param_value_size, param_value, + param_value_size_ret); +} + const char SupportedVersion[] = _PI_HIP_PLUGIN_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { diff --git a/sycl/plugins/hip/pi_hip.hpp b/sycl/plugins/hip/pi_hip.hpp index 018d069f5fe7f..bec26c9866fdb 100644 --- a/sycl/plugins/hip/pi_hip.hpp +++ b/sycl/plugins/hip/pi_hip.hpp @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -94,4 +95,8 @@ struct _pi_ext_command_buffer : ur_exp_command_buffer_handle_t_ { using ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_; }; +struct _pi_physical_mem : ur_physical_mem_handle_t_ { + using ur_physical_mem_handle_t_::ur_physical_mem_handle_t_; +}; + #endif // PI_HIP_HPP diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index f88e8c1ed3cd3..bab365effe85f 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -1424,6 +1424,144 @@ piextCommandBufferReleaseCommand(pi_ext_command_buffer_command Command) { return pi2ur::piextCommandBufferReleaseCommand(Command); } +/// API for getting information about the minimum and recommended granularity +/// of physical and virtual memory. +/// +/// \param Context is the context to get the granularity from. +/// \param Device is the device to get the granularity from. +/// \param MemSize is the potentially unadjusted size to get granularity for. +/// \param ParamName is the type of query to perform. +/// \param ParamValueSize is the size of the result in bytes. +/// \param ParamValue is the result. +/// \param ParamValueSizeRet is how many bytes were written. +pi_result +piextVirtualMemGranularityGetInfo(pi_context Context, pi_device Device, + pi_virtual_mem_granularity_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piextVirtualMemGranularityGetInfo(Context, Device, ParamName, + ParamValueSize, ParamValue, + ParamValueSizeRet); +} + +/// API for creating a physical memory handle that virtual memory can be mapped +/// to. +/// +/// \param Context is the context within which the physical memory is allocated. +/// \param Device is the device the physical memory is on. +/// \param MemSize is the size of physical memory to allocate. This must be a +/// multiple of the minimum virtual memory granularity. +/// \param RetPhysicalMem is the handle for the resulting physical memory. +pi_result piextPhysicalMemCreate(pi_context Context, pi_device Device, + size_t MemSize, + pi_physical_mem *RetPhysicalMem) { + return pi2ur::piextPhysicalMemCreate(Context, Device, MemSize, + RetPhysicalMem); +} + +/// API for retaining a physical memory handle. +/// +/// \param PhysicalMem is the handle for the physical memory to retain. +pi_result piextPhysicalMemRetain(pi_physical_mem PhysicalMem) { + return pi2ur::piextPhysicalMemRetain(PhysicalMem); +} + +/// API for releasing a physical memory handle. +/// +/// \param PhysicalMem is the handle for the physical memory to free. +pi_result piextPhysicalMemRelease(pi_physical_mem PhysicalMem) { + return pi2ur::piextPhysicalMemRelease(PhysicalMem); +} + +/// API for reserving a virtual memory range. +/// +/// \param Context is the context within which the virtual memory range is +/// reserved. +/// \param Start is a pointer to the start of the region to reserve. If nullptr +/// the implementation selects a start address. +/// \param RangeSize is the size of the virtual address range to reserve in +/// bytes. +/// \param RetPtr is the pointer to the start of the resulting virtual memory +/// range. +pi_result piextVirtualMemReserve(pi_context Context, const void *Start, + size_t RangeSize, void **RetPtr) { + return pi2ur::piextVirtualMemReserve(Context, Start, RangeSize, RetPtr); +} + +/// API for freeing a virtual memory range. +/// +/// \param Context is the context within which the virtual memory range is +/// reserved. +/// \param Ptr is the pointer to the start of the virtual memory range. +/// \param RangeSize is the size of the virtual address range. +pi_result piextVirtualMemFree(pi_context Context, const void *Ptr, + size_t RangeSize) { + return pi2ur::piextVirtualMemFree(Context, Ptr, RangeSize); +} + +/// API for mapping a virtual memory range to a a physical memory allocation at +/// a given offset. +/// +/// \param Context is the context within which both the virtual memory range is +/// reserved and the physical memory is allocated. +/// \param Ptr is the pointer to the start of the virtual memory range. +/// \param RangeSize is the size of the virtual address range. +/// \param PhysicalMem is the handle for the physical memory to map Ptr to. +/// \param Offset is the offset into PhysicalMem in bytes to map Ptr to. +/// \param Flags is the access flags to set for the mapping. +pi_result piextVirtualMemMap(pi_context Context, const void *Ptr, + size_t RangeSize, pi_physical_mem PhysicalMem, + size_t Offset, pi_virtual_access_flags Flags) { + return pi2ur::piextVirtualMemMap(Context, Ptr, RangeSize, PhysicalMem, Offset, + Flags); +} + +/// API for unmapping a virtual memory range previously mapped in a context. +/// After a call to this function, the virtual memory range is left in a state +/// ready to be remapped. +/// +/// \param Context is the context within which the virtual memory range is +/// currently mapped. +/// \param Ptr is the pointer to the start of the virtual memory range. +/// \param RangeSize is the size of the virtual address range in bytes. +pi_result piextVirtualMemUnmap(pi_context Context, const void *Ptr, + size_t RangeSize) { + return pi2ur::piextVirtualMemUnmap(Context, Ptr, RangeSize); +} + +/// API for setting the access mode of a mapped virtual memory range. +/// +/// \param Context is the context within which the virtual memory range is +/// currently mapped. +/// \param Ptr is the pointer to the start of the virtual memory range. +/// \param RangeSize is the size of the virtual address range in bytes. +/// \param Flags is the access flags to set for the mapped virtual access range. +pi_result piextVirtualMemSetAccess(pi_context Context, const void *Ptr, + size_t RangeSize, + pi_virtual_access_flags Flags) { + return pi2ur::piextVirtualMemSetAccess(Context, Ptr, RangeSize, Flags); +} + +/// API for getting info about a mapped virtual memory range. +/// +/// \param Context is the context within which the virtual memory range is +/// currently mapped. +/// \param Ptr is the pointer to the start of the virtual memory range. +/// \param RangeSize is the size of the virtual address range in bytes. +/// \param ParamName is the type of query to perform. +/// \param ParamValueSize is the size of the result in bytes. +/// \param ParamValue is the result. +/// \param ParamValueSizeRet is how many bytes were written. +pi_result piextVirtualMemGetInfo(pi_context Context, const void *Ptr, + size_t RangeSize, + pi_virtual_mem_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piextVirtualMemGetInfo(Context, Ptr, RangeSize, ParamName, + ParamValueSize, ParamValue, + ParamValueSizeRet); +} + const char SupportedVersion[] = _PI_LEVEL_ZERO_PLUGIN_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { // missing diff --git a/sycl/plugins/native_cpu/pi_native_cpu.cpp b/sycl/plugins/native_cpu/pi_native_cpu.cpp index d867caea5e23d..2276e9f78f7ea 100644 --- a/sycl/plugins/native_cpu/pi_native_cpu.cpp +++ b/sycl/plugins/native_cpu/pi_native_cpu.cpp @@ -1321,6 +1321,69 @@ pi_result piextKernelSuggestMaxCooperativeGroupCount( return PI_ERROR_UNSUPPORTED_FEATURE; } +pi_result +piextVirtualMemGranularityGetInfo(pi_context context, pi_device device, + pi_virtual_mem_granularity_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { + return pi2ur::piextVirtualMemGranularityGetInfo(context, device, param_name, + param_value_size, param_value, + param_value_size_ret); +} + +pi_result piextPhysicalMemCreate(pi_context context, pi_device device, + size_t mem_size, + pi_physical_mem *ret_physical_mem) { + return pi2ur::piextPhysicalMemCreate(context, device, mem_size, + ret_physical_mem); +} + +pi_result piextPhysicalMemRetain(pi_physical_mem physical_mem) { + return pi2ur::piextPhysicalMemRetain(physical_mem); +} + +pi_result piextPhysicalMemRelease(pi_physical_mem physical_mem) { + return pi2ur::piextPhysicalMemRelease(physical_mem); +} + +pi_result piextVirtualMemReserve(pi_context context, const void *start, + size_t range_size, void **ret_ptr) { + return pi2ur::piextVirtualMemReserve(context, start, range_size, ret_ptr); +} + +pi_result piextVirtualMemFree(pi_context context, const void *ptr, + size_t range_size) { + return pi2ur::piextVirtualMemFree(context, ptr, range_size); +} + +pi_result piextVirtualMemMap(pi_context context, const void *ptr, + size_t range_size, pi_physical_mem physical_mem, + size_t offset, pi_virtual_access_flags flags) { + return pi2ur::piextVirtualMemMap(context, ptr, range_size, physical_mem, + offset, flags); +} + +pi_result piextVirtualMemUnmap(pi_context context, const void *ptr, + size_t range_size) { + return pi2ur::piextVirtualMemUnmap(context, ptr, range_size); +} + +pi_result piextVirtualMemSetAccess(pi_context context, const void *ptr, + size_t range_size, + pi_virtual_access_flags flags) { + return pi2ur::piextVirtualMemSetAccess(context, ptr, range_size, flags); +} + +pi_result piextVirtualMemGetInfo(pi_context context, const void *ptr, + size_t range_size, + pi_virtual_mem_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { + return pi2ur::piextVirtualMemGetInfo(context, ptr, range_size, param_name, + param_value_size, param_value, + param_value_size_ret); +} + // Initialize function table with stubs. #define _PI_API(api) \ (PluginInit->PiFunctionTable).api = (decltype(&::api))(&api); diff --git a/sycl/plugins/native_cpu/pi_native_cpu.hpp b/sycl/plugins/native_cpu/pi_native_cpu.hpp index 1d92580997b76..287b3c03115b6 100644 --- a/sycl/plugins/native_cpu/pi_native_cpu.hpp +++ b/sycl/plugins/native_cpu/pi_native_cpu.hpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -43,3 +44,7 @@ struct _pi_program : ur_program_handle_t_ { struct _pi_queue : ur_queue_handle_t_ { using ur_queue_handle_t_::ur_queue_handle_t_; }; + +struct _pi_physical_mem : ur_physical_mem_handle_t_ { + using ur_physical_mem_handle_t_::ur_physical_mem_handle_t_; +}; diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 1fef329d179af..1d340b5685f4e 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1228,6 +1228,69 @@ pi_result piextPeerAccessGetInfo(pi_device command_device, ParamValueSizeRet); } +pi_result +piextVirtualMemGranularityGetInfo(pi_context Context, pi_device Device, + pi_virtual_mem_granularity_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piextVirtualMemGranularityGetInfo(Context, Device, ParamName, + ParamValueSize, ParamValue, + ParamValueSizeRet); +} + +pi_result piextPhysicalMemCreate(pi_context Context, pi_device Device, + size_t MemSize, + pi_physical_mem *RetPhysicalMem) { + return pi2ur::piextPhysicalMemCreate(Context, Device, MemSize, + RetPhysicalMem); +} + +pi_result piextPhysicalMemRetain(pi_physical_mem PhysicalMem) { + return pi2ur::piextPhysicalMemRetain(PhysicalMem); +} + +pi_result piextPhysicalMemRelease(pi_physical_mem PhysicalMem) { + return pi2ur::piextPhysicalMemRelease(PhysicalMem); +} + +pi_result piextVirtualMemReserve(pi_context Context, const void *Start, + size_t RangeSize, void **RetPtr) { + return pi2ur::piextVirtualMemReserve(Context, Start, RangeSize, RetPtr); +} + +pi_result piextVirtualMemFree(pi_context Context, const void *Ptr, + size_t RangeSize) { + return pi2ur::piextVirtualMemFree(Context, Ptr, RangeSize); +} + +pi_result piextVirtualMemMap(pi_context Context, const void *Ptr, + size_t RangeSize, pi_physical_mem PhysicalMem, + size_t Offset, pi_virtual_access_flags Flags) { + return pi2ur::piextVirtualMemMap(Context, Ptr, RangeSize, PhysicalMem, Offset, + Flags); +} + +pi_result piextVirtualMemUnmap(pi_context Context, const void *Ptr, + size_t RangeSize) { + return pi2ur::piextVirtualMemUnmap(Context, Ptr, RangeSize); +} + +pi_result piextVirtualMemSetAccess(pi_context Context, const void *Ptr, + size_t RangeSize, + pi_virtual_access_flags Flags) { + return pi2ur::piextVirtualMemSetAccess(Context, Ptr, RangeSize, Flags); +} + +pi_result piextVirtualMemGetInfo(pi_context Context, const void *Ptr, + size_t RangeSize, + pi_virtual_mem_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piextVirtualMemGetInfo(Context, Ptr, RangeSize, ParamName, + ParamValueSize, ParamValue, + ParamValueSizeRet); +} + pi_result piTearDown(void *PluginParameter) { return pi2ur::piTearDown(PluginParameter); } diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 3ee63a025593b..f22e672d84423 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -678,6 +678,31 @@ inline pi_result ur2piSamplerInfoValue(ur_sampler_info_t ParamName, } } +inline pi_result ur2piVirtualMemInfoValue(ur_virtual_mem_info_t ParamName, + size_t ParamValueSizePI, + size_t *ParamValueSizeUR, + void *ParamValue) { + + ConvertHelper Value(ParamValueSizePI, ParamValue, ParamValueSizeUR); + switch (ParamName) { + case UR_VIRTUAL_MEM_INFO_ACCESS_MODE: { + auto ConvertFunc = [](ur_virtual_mem_access_flags_t UrValue) { + pi_virtual_access_flags PiValue = 0; + if (UrValue & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE) + PiValue |= PI_VIRTUAL_ACCESS_FLAG_RW; + if (UrValue & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY) + PiValue |= PI_VIRTUAL_ACCESS_FLAG_READ_ONLY; + return PiValue; + }; + return Value + .convert( + ConvertFunc); + } + default: + return PI_SUCCESS; + } +} + // Translate UR device info values to PI info values inline pi_result ur2piUSMAllocInfoValue(ur_usm_alloc_info_t ParamName, size_t ParamValueSizePI, @@ -1311,6 +1336,8 @@ inline pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, PI_TO_UR_MAP_DEVICE_INFO( PI_EXT_ONEAPI_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP) + PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_SUPPORTS_VIRTUAL_MEM, + UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT) #undef PI_TO_UR_MAP_DEVICE_INFO default: return PI_ERROR_UNKNOWN; @@ -5665,4 +5692,194 @@ inline pi_result piextSignalExternalSemaphore( // Bindless Images Extension /////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// +// Virtual Memory + +inline pi_result +piextVirtualMemGranularityGetInfo(pi_context Context, pi_device Device, + pi_virtual_mem_granularity_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + ur_device_handle_t UrDevice = reinterpret_cast(Device); + + ur_virtual_mem_granularity_info_t InfoType{}; + switch (ParamName) { + case PI_EXT_ONEAPI_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM: + InfoType = UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM; + break; + case PI_EXT_ONEAPI_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED: + InfoType = UR_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED; + break; + default: + return PI_ERROR_UNKNOWN; + } + + HANDLE_ERRORS(urVirtualMemGranularityGetInfo(UrContext, UrDevice, InfoType, + ParamValueSize, ParamValue, + ParamValueSizeRet)); + + return PI_SUCCESS; +} + +inline pi_result piextPhysicalMemCreate(pi_context Context, pi_device Device, + size_t MemSize, + pi_physical_mem *RetPhyscialMem) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + ur_device_handle_t UrDevice = reinterpret_cast(Device); + + ur_physical_mem_handle_t *UrPhysicalMem = + reinterpret_cast(RetPhyscialMem); + + HANDLE_ERRORS(urPhysicalMemCreate(UrContext, UrDevice, MemSize, nullptr, + UrPhysicalMem)); + + return PI_SUCCESS; +} + +inline pi_result piextPhysicalMemRetain(pi_physical_mem PhysicalMem) { + PI_ASSERT(PhysicalMem, PI_ERROR_INVALID_ARG_VALUE); + + ur_physical_mem_handle_t UrPhysicalMem = + reinterpret_cast(PhysicalMem); + + HANDLE_ERRORS(urPhysicalMemRetain(UrPhysicalMem)); + + return PI_SUCCESS; +} + +inline pi_result piextPhysicalMemRelease(pi_physical_mem PhysicalMem) { + + ur_physical_mem_handle_t UrPhysicalMem = + reinterpret_cast(PhysicalMem); + + HANDLE_ERRORS(urPhysicalMemRelease(UrPhysicalMem)); + + return PI_SUCCESS; +} + +inline pi_result piextVirtualMemReserve(pi_context Context, const void *Start, + size_t RangeSize, void **RetPtr) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(RetPtr, PI_ERROR_INVALID_ARG_VALUE); + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + + HANDLE_ERRORS(urVirtualMemReserve(UrContext, Start, RangeSize, RetPtr)); + + return PI_SUCCESS; +} + +inline pi_result piextVirtualMemFree(pi_context Context, const void *Ptr, + size_t RangeSize) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Ptr, PI_ERROR_INVALID_ARG_VALUE); + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + + HANDLE_ERRORS(urVirtualMemFree(UrContext, Ptr, RangeSize)); + + return PI_SUCCESS; +} + +inline pi_result piextVirtualMemSetAccess(pi_context Context, const void *Ptr, + size_t RangeSize, + pi_virtual_access_flags Flags) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Ptr, PI_ERROR_INVALID_ARG_VALUE); + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + + ur_virtual_mem_access_flags_t UrFlags = 0; + if (Flags & PI_VIRTUAL_ACCESS_FLAG_RW) + UrFlags |= UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE; + if (Flags & PI_VIRTUAL_ACCESS_FLAG_READ_ONLY) + UrFlags |= UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY; + + HANDLE_ERRORS(urVirtualMemSetAccess(UrContext, Ptr, RangeSize, UrFlags)); + + return PI_SUCCESS; +} + +inline pi_result piextVirtualMemMap(pi_context Context, const void *Ptr, + size_t RangeSize, + pi_physical_mem PhysicalMem, size_t Offset, + pi_virtual_access_flags Flags) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Ptr, PI_ERROR_INVALID_ARG_VALUE); + PI_ASSERT(PhysicalMem, PI_ERROR_INVALID_ARG_VALUE); + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + ur_physical_mem_handle_t UrPhysicalMem = + reinterpret_cast(PhysicalMem); + + ur_virtual_mem_access_flags_t UrFlags = 0; + if (Flags & PI_VIRTUAL_ACCESS_FLAG_RW) + UrFlags |= UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE; + if (Flags & PI_VIRTUAL_ACCESS_FLAG_READ_ONLY) + UrFlags |= UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY; + + HANDLE_ERRORS(urVirtualMemMap(UrContext, Ptr, RangeSize, UrPhysicalMem, + Offset, UrFlags)); + + return PI_SUCCESS; +} + +inline pi_result piextVirtualMemUnmap(pi_context Context, const void *Ptr, + size_t RangeSize) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Ptr, PI_ERROR_INVALID_ARG_VALUE); + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + + HANDLE_ERRORS(urVirtualMemUnmap(UrContext, Ptr, RangeSize)); + + return PI_SUCCESS; +} + +inline pi_result piextVirtualMemGetInfo(pi_context Context, const void *Ptr, + size_t RangeSize, + pi_virtual_mem_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Ptr, PI_ERROR_INVALID_ARG_VALUE); + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + + ur_virtual_mem_info_t InfoType{}; + switch (ParamName) { + case PI_EXT_ONEAPI_VIRTUAL_MEM_INFO_ACCESS_MODE: + InfoType = UR_VIRTUAL_MEM_INFO_ACCESS_MODE; + break; + default: + return PI_ERROR_UNKNOWN; + } + + HANDLE_ERRORS(urVirtualMemGetInfo(UrContext, Ptr, RangeSize, InfoType, + ParamValueSize, ParamValue, + ParamValueSizeRet)); + ur2piVirtualMemInfoValue(InfoType, ParamValueSize, &ParamValueSize, + ParamValue); + + return PI_SUCCESS; +} + +// Virtual Memory +/////////////////////////////////////////////////////////////////////////////// + } // namespace pi2ur diff --git a/sycl/plugins/unified_runtime/pi_unified_runtime.cpp b/sycl/plugins/unified_runtime/pi_unified_runtime.cpp index 30ba9a7afc8b1..7e268199bba77 100644 --- a/sycl/plugins/unified_runtime/pi_unified_runtime.cpp +++ b/sycl/plugins/unified_runtime/pi_unified_runtime.cpp @@ -1189,6 +1189,72 @@ piextCommandBufferReleaseCommand(pi_ext_command_buffer_command Command) { return pi2ur::piextCommandBufferReleaseCommand(Command); } +__SYCL_EXPORT pi_result piextVirtualMemGranularityGetInfo( + pi_context Context, pi_device Device, + pi_virtual_mem_granularity_info ParamName, size_t ParamValueSize, + void *ParamValue, size_t *ParamValueSizeRet) { + return pi2ur::piextVirtualMemGranularityGetInfo(Context, Device, ParamName, + ParamValueSize, ParamValue, + ParamValueSizeRet); +} + +__SYCL_EXPORT pi_result +piextPhysicalMemCreate(pi_context Context, pi_device Device, size_t MemSize, + pi_physical_mem *RetPhsycialMem) { + return pi2ur::piextPhysicalMemCreate(Context, Device, MemSize, + RetPhsycialMem); +} + +__SYCL_EXPORT pi_result piextPhysicalMemRetain(pi_physical_mem PhysicalMem) { + return pi2ur::piextPhysicalMemRetain(PhysicalMem); +} + +__SYCL_EXPORT pi_result piextPhysicalMemRelease(pi_physical_mem PhysicalMem) { + return pi2ur::piextPhysicalMemRelease(PhysicalMem); +} + +__SYCL_EXPORT pi_result piextVirtualMemReserve(pi_context Context, + const void *Start, + size_t RangeSize, + void **RetPtr) { + return pi2ur::piextVirtualMemReserve(Context, Start, RangeSize, RetPtr); +} + +__SYCL_EXPORT pi_result piextVirtualMemFree(pi_context Context, const void *Ptr, + size_t RangeSize) { + return pi2ur::piextVirtualMemFree(Context, Ptr, RangeSize); +} + +__SYCL_EXPORT pi_result +piextVirtualMemSetAccess(pi_context Context, const void *Ptr, size_t RangeSize, + pi_virtual_access_flags Flags) { + return pi2ur::piextVirtualMemSetAccess(Context, Ptr, RangeSize, Flags); +} + +__SYCL_EXPORT pi_result piextVirtualMemMap(pi_context Context, const void *Ptr, + size_t RangeSize, + pi_physical_mem PhysicalMem, + size_t Offset, + pi_virtual_access_flags Flags) { + return pi2ur::piextVirtualMemMap(Context, Ptr, RangeSize, PhysicalMem, Offset, + Flags); +} + +__SYCL_EXPORT pi_result piextVirtualMemUnmap(pi_context Context, + const void *Ptr, + size_t RangeSize) { + return pi2ur::piextVirtualMemUnmap(Context, Ptr, RangeSize); +} + +__SYCL_EXPORT pi_result +piextVirtualMemGetInfo(pi_context Context, const void *Ptr, size_t RangeSize, + pi_virtual_mem_info ParamName, size_t ParamValueSize, + void *ParamValue, size_t *ParamValueSizeRet) { + return pi2ur::piextVirtualMemGetInfo(Context, Ptr, RangeSize, ParamName, + ParamValueSize, ParamValue, + ParamValueSizeRet); +} + __SYCL_EXPORT pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, uint64_t *HostTime) { diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index 74497db20c9f1..f915ef4e2cb8e 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -257,11 +257,13 @@ set(SYCL_COMMON_SOURCES "interop_handle.cpp" "kernel.cpp" "kernel_bundle.cpp" + "physical_mem.cpp" "platform.cpp" "queue.cpp" "sampler.cpp" "stream.cpp" "spirv_ops.cpp" + "virtual_mem.cpp" "$<$:detail/windows_pi.cpp>" "$<$,$>:detail/posix_pi.cpp>" ) diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index cd29ad1eec64d..8547a40d4b999 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -746,6 +746,14 @@ bool device_impl::has(aspect Aspect) const { sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; return call_successful && support; } + case aspect::ext_oneapi_virtual_mem: { + pi_bool support = PI_FALSE; + bool call_successful = + getPlugin()->call_nocheck( + MDevice, PI_EXT_ONEAPI_DEVICE_INFO_SUPPORTS_VIRTUAL_MEM, + sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + return call_successful && support; + } } return false; // This device aspect has not been implemented yet. diff --git a/sycl/source/detail/physical_mem_impl.hpp b/sycl/source/detail/physical_mem_impl.hpp new file mode 100644 index 0000000000000..9fb38f1202257 --- /dev/null +++ b/sycl/source/detail/physical_mem_impl.hpp @@ -0,0 +1,95 @@ +//==- physical_mem_impl.hpp - sycl_ext_oneapi_virtual_mem physical_mem impl ==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace sycl { +inline namespace _V1 { +namespace detail { + +inline sycl::detail::pi::PiVirtualAccessFlags AccessModeToVirtualAccessFlags( + ext::oneapi::experimental::address_access_mode Mode) { + switch (Mode) { + case ext::oneapi::experimental::address_access_mode::read: + return PI_VIRTUAL_ACCESS_FLAG_READ_ONLY; + case ext::oneapi::experimental::address_access_mode::read_write: + return PI_VIRTUAL_ACCESS_FLAG_RW; + case ext::oneapi::experimental::address_access_mode::none: + return 0; + default: + throw sycl::exception(make_error_code(errc::invalid), + "Invalid address_access_mode."); + } +} + +class physical_mem_impl { +public: + physical_mem_impl(const device &SyclDevice, const context &SyclContext, + size_t NumBytes) + : MDevice(getSyclObjImpl(SyclDevice)), + MContext(getSyclObjImpl(SyclContext)), MNumBytes(NumBytes) { + const PluginPtr &Plugin = MContext->getPlugin(); + + auto Err = Plugin->call_nocheck( + MContext->getHandleRef(), MDevice->getHandleRef(), MNumBytes, + &MPhysicalMem); + + if (Err == PI_ERROR_OUT_OF_RESOURCES || Err == PI_ERROR_OUT_OF_HOST_MEMORY) + throw sycl::exception(make_error_code(errc::memory_allocation), + "Failed to allocate physical memory."); + Plugin->checkPiResult(Err); + } + + ~physical_mem_impl() noexcept(false) { + const PluginPtr &Plugin = MContext->getPlugin(); + Plugin->call(MPhysicalMem); + } + + void *map(uintptr_t Ptr, size_t NumBytes, + ext::oneapi::experimental::address_access_mode Mode, + size_t Offset) const { + sycl::detail::pi::PiVirtualAccessFlags AccessFlags = + AccessModeToVirtualAccessFlags(Mode); + const PluginPtr &Plugin = MContext->getPlugin(); + void *ResultPtr = reinterpret_cast(Ptr); + Plugin->call( + MContext->getHandleRef(), ResultPtr, NumBytes, MPhysicalMem, Offset, + AccessFlags); + return ResultPtr; + } + + context get_context() const { + return createSyclObjFromImpl(MContext); + } + device get_device() const { return createSyclObjFromImpl(MDevice); } + size_t size() const noexcept { return MNumBytes; } + + sycl::detail::pi::PiPhysicalMem &getHandleRef() { return MPhysicalMem; } + const sycl::detail::pi::PiPhysicalMem &getHandleRef() const { + return MPhysicalMem; + } + +private: + sycl::detail::pi::PiPhysicalMem MPhysicalMem = nullptr; + const std::shared_ptr MDevice; + const std::shared_ptr MContext; + const size_t MNumBytes; +}; + +} // namespace detail +} // namespace _V1 +} // namespace sycl diff --git a/sycl/source/feature_test.hpp.in b/sycl/source/feature_test.hpp.in index ce88520fe50dd..f7e023c718462 100644 --- a/sycl/source/feature_test.hpp.in +++ b/sycl/source/feature_test.hpp.in @@ -86,6 +86,7 @@ inline namespace _V1 { #define SYCL_EXT_ONEAPI_ANNOTATED_ARG 1 #define SYCL_EXT_ONEAPI_ANNOTATED_PTR 1 #define SYCL_EXT_ONEAPI_COPY_OPTIMIZE 1 +#define SYCL_EXT_ONEAPI_VIRTUAL_MEM 1 #define SYCL_EXT_ONEAPI_USM_MALLOC_PROPERTIES 1 #cmakedefine01 SYCL_ENABLE_KERNEL_FUSION #if SYCL_ENABLE_KERNEL_FUSION diff --git a/sycl/source/physical_mem.cpp b/sycl/source/physical_mem.cpp new file mode 100644 index 0000000000000..d9d6073a68e89 --- /dev/null +++ b/sycl/source/physical_mem.cpp @@ -0,0 +1,38 @@ +//==--- physical_mem.cpp - sycl_ext_oneapi_virtual_mem physical_mem class --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +namespace sycl { +inline namespace _V1 { +namespace ext::oneapi::experimental { + +physical_mem::physical_mem(const device &SyclDevice, const context &SyclContext, + size_t NumBytes) { + if (!SyclDevice.has(aspect::ext_oneapi_virtual_mem)) + throw sycl::exception( + sycl::make_error_code(sycl::errc::feature_not_supported), + "Device does not support aspect::ext_oneapi_virtual_mem."); + + impl = std::make_shared( + SyclDevice, SyclContext, NumBytes); +} + +void *physical_mem::map(uintptr_t Ptr, size_t NumBytes, + address_access_mode Mode, size_t Offset) const { + return impl->map(Ptr, NumBytes, Mode, Offset); +} + +context physical_mem::get_context() const { return impl->get_context(); } +device physical_mem::get_device() const { return impl->get_device(); } +size_t physical_mem::size() const noexcept { return impl->size(); } + +} // namespace ext::oneapi::experimental +} // namespace _V1 +} // namespace sycl diff --git a/sycl/source/virtual_mem.cpp b/sycl/source/virtual_mem.cpp new file mode 100644 index 0000000000000..8cdc5ffba0223 --- /dev/null +++ b/sycl/source/virtual_mem.cpp @@ -0,0 +1,183 @@ +//==- virtual_mem.cpp - sycl_ext_oneapi_virtual_mem virtual mem free funcs -==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include + +// System headers for querying page-size. +#ifdef _WIN32 +#include +#else +#include +#endif + +namespace sycl { +inline namespace _V1 { +namespace ext::oneapi::experimental { + +__SYCL_EXPORT size_t get_mem_granularity(const device &SyclDevice, + const context &SyclContext, + granularity_mode Mode) { + if (!SyclDevice.has(aspect::ext_oneapi_virtual_mem)) + throw sycl::exception( + sycl::make_error_code(sycl::errc::feature_not_supported), + "Device does not support aspect::ext_oneapi_virtual_mem."); + + pi_virtual_mem_granularity_info GranularityQuery = [=]() { + switch (Mode) { + case granularity_mode::minimum: + return PI_EXT_ONEAPI_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM; + case granularity_mode::recommended: + return PI_EXT_ONEAPI_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED; + } + throw sycl::exception(sycl::make_error_code(sycl::errc::invalid), + "Unrecognized granularity mode."); + }(); + + std::shared_ptr DeviceImpl = + sycl::detail::getSyclObjImpl(SyclDevice); + std::shared_ptr ContextImpl = + sycl::detail::getSyclObjImpl(SyclContext); + const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin(); +#ifndef NDEBUG + size_t InfoOutputSize; + Plugin->call( + ContextImpl->getHandleRef(), DeviceImpl->getHandleRef(), GranularityQuery, + 0, nullptr, &InfoOutputSize); + assert(InfoOutputSize == sizeof(size_t) && + "Unexpected output size of granularity info query."); +#endif // NDEBUG + size_t Granularity = 0; + Plugin->call( + ContextImpl->getHandleRef(), DeviceImpl->getHandleRef(), GranularityQuery, + sizeof(size_t), &Granularity, nullptr); + return Granularity; +} + +__SYCL_EXPORT size_t get_mem_granularity(const context &SyclContext, + granularity_mode Mode) { + const std::vector Devices = SyclContext.get_devices(); + if (!std::all_of(Devices.cbegin(), Devices.cend(), [](const device &Dev) { + return Dev.has(aspect::ext_oneapi_virtual_mem); + })) { + throw sycl::exception( + sycl::make_error_code(sycl::errc::feature_not_supported), + "One or more devices in the context does not support " + "aspect::ext_oneapi_virtual_mem."); + } + + // CUDA only needs page-size granularity. + if (SyclContext.get_backend() == backend::ext_oneapi_cuda) { +#ifdef _WIN32 + SYSTEM_INFO SystemInfo; + GetSystemInfo(&SystemInfo); + return static_cast(SystemInfo.dwPageSize); +#else + return static_cast(sysconf(_SC_PAGESIZE)); +#endif + } + + // Otherwise, we find the least common multiple of granularity of the devices + // in the context. + size_t LCMGranularity = get_mem_granularity(Devices[0], SyclContext, Mode); + for (size_t I = 1; I < Devices.size(); ++I) { + size_t DevGranularity = get_mem_granularity(Devices[I], SyclContext, Mode); + size_t GCD = LCMGranularity; + size_t Rem = DevGranularity % GCD; + while (Rem != 0) { + std::swap(GCD, Rem); + Rem %= GCD; + } + LCMGranularity *= DevGranularity / GCD; + } + return LCMGranularity; +} + +__SYCL_EXPORT uintptr_t reserve_virtual_mem(uintptr_t Start, size_t NumBytes, + const context &SyclContext) { + std::vector Devs = SyclContext.get_devices(); + if (std::any_of(Devs.cbegin(), Devs.cend(), [](const device &Dev) { + return !Dev.has(aspect::ext_oneapi_virtual_mem); + })) + throw sycl::exception( + sycl::make_error_code(sycl::errc::feature_not_supported), + "One or more devices in the supplied context does not support " + "aspect::ext_oneapi_virtual_mem."); + + std::shared_ptr ContextImpl = + sycl::detail::getSyclObjImpl(SyclContext); + const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin(); + void *OutPtr = nullptr; + Plugin->call( + ContextImpl->getHandleRef(), reinterpret_cast(Start), NumBytes, + &OutPtr); + return reinterpret_cast(OutPtr); +} + +__SYCL_EXPORT void free_virtual_mem(uintptr_t Ptr, size_t NumBytes, + const context &SyclContext) { + std::shared_ptr ContextImpl = + sycl::detail::getSyclObjImpl(SyclContext); + const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin(); + Plugin->call( + ContextImpl->getHandleRef(), reinterpret_cast(Ptr), NumBytes); +} + +__SYCL_EXPORT void set_access_mode(const void *Ptr, size_t NumBytes, + address_access_mode Mode, + const context &SyclContext) { + sycl::detail::pi::PiVirtualAccessFlags AccessFlags = + sycl::detail::AccessModeToVirtualAccessFlags(Mode); + std::shared_ptr ContextImpl = + sycl::detail::getSyclObjImpl(SyclContext); + const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin(); + Plugin->call( + ContextImpl->getHandleRef(), Ptr, NumBytes, AccessFlags); +} + +__SYCL_EXPORT address_access_mode get_access_mode(const void *Ptr, + size_t NumBytes, + const context &SyclContext) { + std::shared_ptr ContextImpl = + sycl::detail::getSyclObjImpl(SyclContext); + const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin(); +#ifndef NDEBUG + size_t InfoOutputSize; + Plugin->call( + ContextImpl->getHandleRef(), Ptr, NumBytes, + PI_EXT_ONEAPI_VIRTUAL_MEM_INFO_ACCESS_MODE, 0, nullptr, &InfoOutputSize); + assert(InfoOutputSize == sizeof(sycl::detail::pi::PiVirtualAccessFlags) && + "Unexpected output size of access mode info query."); +#endif // NDEBUG + sycl::detail::pi::PiVirtualAccessFlags AccessFlags; + Plugin->call( + ContextImpl->getHandleRef(), Ptr, NumBytes, + PI_EXT_ONEAPI_VIRTUAL_MEM_INFO_ACCESS_MODE, + sizeof(sycl::detail::pi::PiVirtualAccessFlags), &AccessFlags, nullptr); + + if (AccessFlags & PI_VIRTUAL_ACCESS_FLAG_RW) + return address_access_mode::read_write; + if (AccessFlags & PI_VIRTUAL_ACCESS_FLAG_READ_ONLY) + return address_access_mode::read; + return address_access_mode::none; +} + +__SYCL_EXPORT void unmap(const void *Ptr, size_t NumBytes, + const context &SyclContext) { + std::shared_ptr ContextImpl = + sycl::detail::getSyclObjImpl(SyclContext); + const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin(); + Plugin->call( + ContextImpl->getHandleRef(), Ptr, NumBytes); +} + +} // Namespace ext::oneapi::experimental +} // namespace _V1 +} // Namespace sycl diff --git a/sycl/test-e2e/VirtualMem/vector_with_virtual_mem.cpp b/sycl/test-e2e/VirtualMem/vector_with_virtual_mem.cpp new file mode 100644 index 0000000000000..cbbcf52e3ab25 --- /dev/null +++ b/sycl/test-e2e/VirtualMem/vector_with_virtual_mem.cpp @@ -0,0 +1,236 @@ +// REQUIRES: aspect-ext_oneapi_virtual_mem, usm_shared_allocations + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include +#include + +#include +#include +#include + +namespace syclext = sycl::ext::oneapi::experimental; + +// Find the least common multiple of the context and device granularities. This +// value can be used for aligning both physical memory allocations and for +// reserving virtual memory ranges. +size_t GetLCMGranularity(const sycl::device &Dev, const sycl::context &Ctx) { + size_t CtxGranularity = syclext::get_mem_granularity(MContext); + size_t DevGranularity = syclext::get_mem_granularity(MDevice, MContext); + + size_t GCD = CtxGranularity; + size_t Rem = DevGranularity % GCD; + while (Rem != 0) { + std::swap(GCD, Rem); + Rem %= GCD; + } + return (DevGranularity / GCD) * LCMGranularity; +} + +template class VirtualVector { +public: + VirtualVector(sycl::queue &Q) + : MDevice{Q.get_device()}, MContext{Q.get_context()}, + MGranularity{GetLCMGranularity(MDevice, MContext)} {}; + + ~VirtualVector() { + // Free all mapped ranges. + unmap_all(); + for (const VirtualAddressRange &VARange : MVARanges) + syclext::free_virtual_mem(VARange.Ptr, VARange.Size, MContext); + // Physical memory allocations will be freed when the physical_mem objects + // die with MPhysicalMemMappings. + } + + void reserve(size_t NewSize) { + // If we already have more memory than required, we can return. + size_t NewByteSize = sizeof(T) * NewSize; + if (NewByteSize <= MByteSize) { + MSize = NewSize; + return; + } + + // Align the size by the granularity. + size_t AlignedNewByteSize = AlignByteSize(NewByteSize); + size_t AlignedNewVARangeSize = AlignedNewByteSize - MByteSize; + + // Try to reserve virtual memory at the end of the existing one. + uintptr_t CurrentEnd = reinterpret_cast(MBasePtr) + MByteSize; + uintptr_t NewVAPtr = syclext::reserve_virtual_mem( + CurrentEnd, AlignedNewVARangeSize, MContext); + + // If we failed to get a ptr to the end of the current range, we need to + // recreate the whole range. + if (CurrentEnd && NewVAPtr != CurrentEnd) { + // First we need to free the virtual address range we just reserved. + syclext::free_virtual_mem(NewVAPtr, AlignedNewVARangeSize, MContext); + + // Recreate the full range and update the new VA ptr. CurrentEnd is no + // longer valid after this call. + NewVAPtr = RecreateAddressRange(AlignedNewByteSize); + } else { + // Otherwise we need to register the new range. + MVARanges.emplace_back(NewVAPtr, AlignedNewVARangeSize); + + // If there was no base pointer previously, this is now the new base. + if (!MBasePtr) + MBasePtr = reinterpret_cast(NewVAPtr); + } + + // Create new physical memory allocation and map the new range to it. + syclext::physical_mem NewPhysicalMem{MDevice, MContext, + AlignedNewVARangeSize}; + void *MappedPtr = + NewPhysicalMem.map(NewVAPtr, AlignedNewVARangeSize, + syclext::address_access_mode::read_write); + MPhysicalMemMappings.push_back( + std::make_pair(std::move(NewPhysicalMem), MappedPtr)); + + // Update the byte size of the vector. + MSize = NewSize; + MByteSize = AlignedNewByteSize; + } + + size_t size() const noexcept { return MSize; } + T *data() const noexcept { return MBasePtr; } + +private: + size_t AlignByteSize(size_t UnalignedByteSize) const { + return ((UnalignedByteSize + MGranularity - 1) / MGranularity) * + MGranularity; + } + + void unmap_all() { + for (std::pair &Mapping : + MPhysicalMemMappings) { + if (Mapping.second == 0) + continue; + syclext::unmap(Mapping.second, Mapping.first.size(), MContext); + Mapping.second = 0; + } + } + + uintptr_t RecreateAddressRange(size_t AlignedNewByteSize) { + // Reserve the full range. + uintptr_t NewFullVAPtr = + syclext::reserve_virtual_mem(AlignedNewByteSize, MContext); + + // Unmap the old virtual address ranges. + unmap_all(); + + // Remap all existing ranges. + uintptr_t NewEnd = NewFullVAPtr; + for (std::pair &Mapping : + MPhysicalMemMappings) { + Mapping.second = + Mapping.first.map(NewEnd, Mapping.first.size(), + syclext::address_access_mode::read_write); + NewEnd += Mapping.first.size(); + } + + // Free the old ranges. + for (const VirtualAddressRange &VARange : MVARanges) + syclext::free_virtual_mem(VARange.Ptr, VARange.Size, MContext); + + // Insert the newly reserved range to the saved ranges. + MVARanges.clear(); + MVARanges.emplace_back(NewFullVAPtr, AlignedNewByteSize); + + // Update the base pointer to point to the new start. + MBasePtr = reinterpret_cast(NewFullVAPtr); + + // Return the new end of the mapped ranges. + return NewEnd; + } + + struct VirtualAddressRange { + VirtualAddressRange(uintptr_t Ptr, size_t Size) : Ptr{Ptr}, Size{Size} {} + + uintptr_t Ptr; + size_t Size; + }; + + sycl::device MDevice; + sycl::context MContext; + + std::vector MVARanges; + std::vector> MPhysicalMemMappings; + + T *MBasePtr = nullptr; + size_t MSize = 0; + size_t MByteSize = 0; + + const size_t MGranularity = 0; +}; + +static constexpr size_t NumIters = 10; +static constexpr size_t WriteValueOffset = 42; +static constexpr size_t NumWorkItems = 512; + +int main() { + sycl::queue Q; + + VirtualVector Vec(Q); + + // To better test the functionality, try to allocate below the granularity + // but enough to require more memory for some iterations. + size_t SizeIncrement = 11; + size_t MinSizeGran = + syclext::get_mem_granularity(Q.get_device(), Q.get_context()) / + sizeof(int); + SizeIncrement = std::max(MinSizeGran / 2 - 1, SizeIncrement); + + // Each work-item will work on multiple elements. + size_t NumElemsPerWI = 1 + (SizeIncrement - 1) / NumWorkItems; + + for (size_t I = 0; I < NumIters; ++I) { + // Increment the size of the vector. + size_t NewVecSize = (I + 1) * SizeIncrement; + Vec.reserve(NewVecSize); + assert(Vec.size() == NewVecSize); + + // Populate to the new memory + int *VecDataPtr = Vec.data(); + size_t StartOffset = I * SizeIncrement; + size_t IterWriteValueOffset = WriteValueOffset * (I + 1); + Q.parallel_for(sycl::range<1>{NumWorkItems}, [=](sycl::item<1> Idx) { + for (size_t J = 0; J < NumElemsPerWI; ++J) { + size_t LoopIdx = J * Idx.get_range(0) + Idx; + size_t OffsetIdx = StartOffset + LoopIdx; + if (OffsetIdx < NewVecSize) + VecDataPtr[OffsetIdx] = LoopIdx + IterWriteValueOffset; + } + }).wait_and_throw(); + + // Copy back the values and verify. + int *CopyBack = sycl::malloc_shared(NewVecSize, Q); + + // TODO: Level-zero (excluding on PVC) does not currently allow copy across + // virtual memory ranges, even if they are consequtive. + syclext::architecture DevArch = + Q.get_device().get_info(); + if (Q.get_backend() == sycl::backend::ext_oneapi_level_zero && + DevArch != syclext::architecture::intel_gpu_pvc && + DevArch != syclext::architecture::intel_gpu_pvc_vg) { + Q.parallel_for(sycl::range<1>{NewVecSize}, [=](sycl::id<1> Idx) { + CopyBack[Idx] = VecDataPtr[Idx]; + }).wait_and_throw(); + } else { + Q.copy(VecDataPtr, CopyBack, NewVecSize).wait_and_throw(); + } + + for (size_t J = 0; J < NewVecSize; ++J) { + int ExpectedVal = + J % SizeIncrement + WriteValueOffset * (J / SizeIncrement + 1); + if (CopyBack[J] != ExpectedVal) { + std::cout << "Comparison failed at index " << J << ": " << CopyBack[J] + << " != " << ExpectedVal << std::endl; + return 1; + } + } + sycl::free(CopyBack, Q); + } + + return 0; +} diff --git a/sycl/test/abi/pi_cuda_symbol_check.dump b/sycl/test/abi/pi_cuda_symbol_check.dump index d3047c6bb1cd0..e6b19e97d1b87 100644 --- a/sycl/test/abi/pi_cuda_symbol_check.dump +++ b/sycl/test/abi/pi_cuda_symbol_check.dump @@ -146,6 +146,9 @@ piextMemSampledImageHandleDestroy piextMemUnsampledImageCreate piextMemUnsampledImageHandleDestroy piextPeerAccessGetInfo +piextPhysicalMemCreate +piextPhysicalMemRelease +piextPhysicalMemRetain piextPlatformCreateWithNativeHandle piextPlatformGetNativeHandle piextPluginGetOpaqueData @@ -171,4 +174,11 @@ piextUSMImport piextUSMPitchedAlloc piextUSMRelease piextUSMSharedAlloc +piextVirtualMemFree +piextVirtualMemGetInfo +piextVirtualMemGranularityGetInfo +piextVirtualMemMap +piextVirtualMemReserve +piextVirtualMemSetAccess +piextVirtualMemUnmap piextWaitExternalSemaphore diff --git a/sycl/test/abi/pi_hip_symbol_check.dump b/sycl/test/abi/pi_hip_symbol_check.dump index c83b4a4ba6122..530ad95722494 100644 --- a/sycl/test/abi/pi_hip_symbol_check.dump +++ b/sycl/test/abi/pi_hip_symbol_check.dump @@ -146,6 +146,9 @@ piextMemSampledImageHandleDestroy piextMemUnsampledImageCreate piextMemUnsampledImageHandleDestroy piextPeerAccessGetInfo +piextPhysicalMemCreate +piextPhysicalMemRelease +piextPhysicalMemRetain piextPlatformCreateWithNativeHandle piextPlatformGetNativeHandle piextPluginGetOpaqueData @@ -171,4 +174,11 @@ piextUSMImport piextUSMPitchedAlloc piextUSMRelease piextUSMSharedAlloc +piextVirtualMemFree +piextVirtualMemGetInfo +piextVirtualMemGranularityGetInfo +piextVirtualMemMap +piextVirtualMemReserve +piextVirtualMemSetAccess +piextVirtualMemUnmap piextWaitExternalSemaphore diff --git a/sycl/test/abi/pi_level_zero_symbol_check.dump b/sycl/test/abi/pi_level_zero_symbol_check.dump index d6cc82870c669..93cd4c4de10bb 100644 --- a/sycl/test/abi/pi_level_zero_symbol_check.dump +++ b/sycl/test/abi/pi_level_zero_symbol_check.dump @@ -145,6 +145,9 @@ piextMemSampledImageHandleDestroy piextMemUnsampledImageCreate piextMemUnsampledImageHandleDestroy piextPeerAccessGetInfo +piextPhysicalMemCreate +piextPhysicalMemRelease +piextPhysicalMemRetain piextPlatformCreateWithNativeHandle piextPlatformGetNativeHandle piextPluginGetOpaqueData @@ -170,4 +173,11 @@ piextUSMImport piextUSMPitchedAlloc piextUSMRelease piextUSMSharedAlloc +piextVirtualMemFree +piextVirtualMemGetInfo +piextVirtualMemGranularityGetInfo +piextVirtualMemMap +piextVirtualMemReserve +piextVirtualMemSetAccess +piextVirtualMemUnmap piextWaitExternalSemaphore diff --git a/sycl/test/abi/pi_nativecpu_symbol_check.dump b/sycl/test/abi/pi_nativecpu_symbol_check.dump index 850e6d22fdb72..c63f579ca6b53 100644 --- a/sycl/test/abi/pi_nativecpu_symbol_check.dump +++ b/sycl/test/abi/pi_nativecpu_symbol_check.dump @@ -146,6 +146,9 @@ piextMemSampledImageHandleDestroy piextMemUnsampledImageCreate piextMemUnsampledImageHandleDestroy piextPeerAccessGetInfo +piextPhysicalMemCreate +piextPhysicalMemRelease +piextPhysicalMemRetain piextPlatformCreateWithNativeHandle piextPlatformGetNativeHandle piextPluginGetOpaqueData @@ -171,4 +174,11 @@ piextUSMImport piextUSMPitchedAlloc piextUSMRelease piextUSMSharedAlloc +piextVirtualMemFree +piextVirtualMemGetInfo +piextVirtualMemGranularityGetInfo +piextVirtualMemMap +piextVirtualMemReserve +piextVirtualMemSetAccess +piextVirtualMemUnmap piextWaitExternalSemaphore diff --git a/sycl/test/abi/pi_opencl_symbol_check.dump b/sycl/test/abi/pi_opencl_symbol_check.dump index daaf7bbee5de5..8807d1647ebdc 100644 --- a/sycl/test/abi/pi_opencl_symbol_check.dump +++ b/sycl/test/abi/pi_opencl_symbol_check.dump @@ -133,6 +133,9 @@ piextMemGetNativeHandle piextMemImageAllocate piextMemImageCopy piextMemImageCreateWithNativeHandle +piextPhysicalMemCreate +piextPhysicalMemRelease +piextPhysicalMemRetain piextMemImageFree piextMemImageGetInfo piextMemImportOpaqueFD @@ -170,4 +173,11 @@ piextUSMImport piextUSMPitchedAlloc piextUSMRelease piextUSMSharedAlloc +piextVirtualMemFree +piextVirtualMemGetInfo +piextVirtualMemGranularityGetInfo +piextVirtualMemMap +piextVirtualMemReserve +piextVirtualMemSetAccess +piextVirtualMemUnmap piextWaitExternalSemaphore diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 2c97a01f87da7..99fb95d92fa72 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -2990,6 +2990,15 @@ _ZN4sycl3_V13ext5intel12experimental9pipe_base13get_pipe_nameB5cxx11EPKv _ZN4sycl3_V13ext5intel12experimental9pipe_base17wait_non_blockingERKNS0_5eventE _ZN4sycl3_V13ext6oneapi12experimental10mem_adviseENS0_5queueEPvmiRKNS0_6detail13code_locationE _ZN4sycl3_V13ext6oneapi10level_zero6detail11make_deviceERKNS0_8platformEm +_ZN4sycl3_V13ext6oneapi12experimental12physical_memC1ERKNS0_6deviceERKNS0_7contextEm +_ZN4sycl3_V13ext6oneapi12experimental12physical_memC2ERKNS0_6deviceERKNS0_7contextEm +_ZN4sycl3_V13ext6oneapi12experimental15get_access_modeEPKvmRKNS0_7contextE +_ZN4sycl3_V13ext6oneapi12experimental15set_access_modeEPKvmNS3_19address_access_modeERKNS0_7contextE +_ZN4sycl3_V13ext6oneapi12experimental16free_virtual_memEmmRKNS0_7contextE +_ZN4sycl3_V13ext6oneapi12experimental19get_mem_granularityERKNS0_6deviceERKNS0_7contextENS3_16granularity_modeE +_ZN4sycl3_V13ext6oneapi12experimental19get_mem_granularityERKNS0_7contextENS3_16granularity_modeE +_ZN4sycl3_V13ext6oneapi12experimental19reserve_virtual_memEmmRKNS0_7contextE +_ZN4sycl3_V13ext6oneapi12experimental5unmapEPKvmRKNS0_7contextE _ZN4sycl3_V13ext6oneapi12experimental12create_imageENS3_16image_mem_handleERKNS3_16image_descriptorERKNS0_5queueE _ZN4sycl3_V13ext6oneapi12experimental12create_imageENS3_16image_mem_handleERKNS3_16image_descriptorERKNS0_6deviceERKNS0_7contextE _ZN4sycl3_V13ext6oneapi12experimental12create_imageENS3_16image_mem_handleERKNS3_22bindless_image_samplerERKNS3_16image_descriptorERKNS0_5queueE @@ -3592,6 +3601,10 @@ _ZNK4sycl3_V114interop_handle16getNativeContextEv _ZNK4sycl3_V115device_selector13select_deviceEv _ZNK4sycl3_V116default_selectorclERKNS0_6deviceE _ZNK4sycl3_V120accelerator_selectorclERKNS0_6deviceE +_ZNK4sycl3_V13ext6oneapi12experimental12physical_mem10get_deviceEv +_ZNK4sycl3_V13ext6oneapi12experimental12physical_mem11get_contextEv +_ZNK4sycl3_V13ext6oneapi12experimental12physical_mem3mapEmmNS3_19address_access_modeEm +_ZNK4sycl3_V13ext6oneapi12experimental12physical_mem4sizeEv _ZNK4sycl3_V13ext6oneapi12experimental4node14get_successorsEv _ZNK4sycl3_V13ext6oneapi12experimental4node16get_predecessorsEv _ZNK4sycl3_V13ext6oneapi12experimental4node8get_typeEv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index f27a5bbab639c..9b80d2eb69c8b 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -609,6 +609,10 @@ ??0kernel_id@_V1@sycl@@AEAA@PEBD@Z ??0kernel_id@_V1@sycl@@QEAA@$$QEAV012@@Z ??0kernel_id@_V1@sycl@@QEAA@AEBV012@@Z +??0physical_mem@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBV012345@@Z +??0physical_mem@experimental@oneapi@ext@_V1@sycl@@QEAA@$$QEAV012345@@Z +??0physical_mem@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBVqueue@45@_K@Z +??0physical_mem@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBVdevice@45@AEBVcontext@45@_K@Z ??0modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@IEAA@AEBV?$shared_ptr@Vgraph_impl@detail@experimental@oneapi@ext@_V1@sycl@@@std@@@Z ??0modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@$$QEAV0123456@@Z ??0modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBV0123456@@Z @@ -679,6 +683,7 @@ ??1kernel@_V1@sycl@@QEAA@XZ ??1kernel_bundle_plain@detail@_V1@sycl@@QEAA@XZ ??1kernel_id@_V1@sycl@@QEAA@XZ +??1physical_mem@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ ??1modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ ??1node@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ ??1platform@_V1@sycl@@QEAA@XZ @@ -696,6 +701,8 @@ ??4?$OwnerLessBase@Vkernel@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4?$OwnerLessBase@Vkernel_id@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z ??4?$OwnerLessBase@Vkernel_id@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z +??4?$OwnerLessBase@Vphysical_mem@experimental@oneapi@ext@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z +??4?$OwnerLessBase@Vphysical_mem@experimental@oneapi@ext@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4?$OwnerLessBase@Vplatform@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z ??4?$OwnerLessBase@Vplatform@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4?$OwnerLessBase@Vqueue@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z @@ -761,6 +768,8 @@ ??4kernel_bundle_plain@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4kernel_id@_V1@sycl@@QEAAAEAV012@$$QEAV012@@Z ??4kernel_id@_V1@sycl@@QEAAAEAV012@AEBV012@@Z +??4physical_mem@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV012345@$$QEAV012345@@Z +??4physical_mem@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV012345@AEBV012345@@Z ??4modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV0123456@$$QEAV0123456@@Z ??4modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV0123456@AEBV0123456@@Z ??4node@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV012345@$$QEAV012345@@Z @@ -782,6 +791,7 @@ ??8kernel@_V1@sycl@@QEBA_NAEBV012@@Z ??8kernel_bundle_plain@detail@_V1@sycl@@QEBA_NAEBV0123@@Z ??8kernel_id@_V1@sycl@@QEBA_NAEBV012@@Z +??8physical_mem@experimental@oneapi@ext@_V1@sycl@@QEBA_NAEBV012345@@Z ??8platform@_V1@sycl@@QEBA_NAEBV012@@Z ??8queue@_V1@sycl@@QEBA_NAEBV012@@Z ??8sampler@_V1@sycl@@QEBA_NAEBV012@@Z @@ -794,6 +804,7 @@ ??9kernel@_V1@sycl@@QEBA_NAEBV012@@Z ??9kernel_bundle_plain@detail@_V1@sycl@@QEBA_NAEBV0123@@Z ??9kernel_id@_V1@sycl@@QEBA_NAEBV012@@Z +??9physical_mem@experimental@oneapi@ext@_V1@sycl@@QEBA_NAEBV012345@@Z ??9platform@_V1@sycl@@QEBA_NAEBV012@@Z ??9queue@_V1@sycl@@QEBA_NAEBV012@@Z ??9sampler@_V1@sycl@@QEBA_NAEBV012@@Z @@ -4038,6 +4049,8 @@ ?ext_oneapi_owner_before@?$OwnerLessBase@Vkernel@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBVkernel@34@@Z ?ext_oneapi_owner_before@?$OwnerLessBase@Vkernel_id@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBV?$weak_object_base@Vkernel_id@_V1@sycl@@@2oneapi@ext@34@@Z ?ext_oneapi_owner_before@?$OwnerLessBase@Vkernel_id@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBVkernel_id@34@@Z +?ext_oneapi_owner_before@?$OwnerLessBase@Vphysical_mem@experimental@oneapi@ext@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBV?$weak_object_base@Vphysical_mem@experimental@oneapi@ext@_V1@sycl@@@2oneapi@ext@34@@Z +?ext_oneapi_owner_before@?$OwnerLessBase@Vphysical_mem@experimental@oneapi@ext@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBVphysical_mem@experimental@oneapi@ext@34@@Z ?ext_oneapi_owner_before@?$OwnerLessBase@Vplatform@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBV?$weak_object_base@Vplatform@_V1@sycl@@@2oneapi@ext@34@@Z ?ext_oneapi_owner_before@?$OwnerLessBase@Vplatform@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBVplatform@34@@Z ?ext_oneapi_owner_before@?$OwnerLessBase@Vqueue@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBV?$weak_object_base@Vqueue@_V1@sycl@@@2oneapi@ext@34@@Z @@ -4075,12 +4088,14 @@ ?find_device_intersection@detail@_V1@sycl@@YA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@AEBV?$vector@V?$kernel_bundle@$00@_V1@sycl@@V?$allocator@V?$kernel_bundle@$00@_V1@sycl@@@std@@@5@@Z ?free@_V1@sycl@@YAXPEAXAEBVcontext@12@AEBUcode_location@detail@12@@Z ?free@_V1@sycl@@YAXPEAXAEBVqueue@12@AEBUcode_location@detail@12@@Z -?free_image_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@AEBVdevice@45@AEBVcontext@45@@Z +?free_virtual_mem@experimental@oneapi@ext@_V1@sycl@@YAX_K0AEBVcontext@45@@Z ?free_image_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@AEBVqueue@45@@Z ?free_image_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@W4image_type@12345@AEBVdevice@45@AEBVcontext@45@@Z ?free_image_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@W4image_type@12345@AEBVqueue@45@@Z +?free_image_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@AEBVdevice@45@AEBVcontext@45@@Z ?free_mipmap_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@AEBVdevice@45@AEBVcontext@45@@Z ?free_mipmap_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@AEBVqueue@45@@Z +?free_mipmap_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@AEBVdevice@45@AEBVcontext@45@@Z ?frexp_impl@detail@_V1@sycl@@YA?AVhalf@half_impl@123@V45123@PEAH@Z ?frexp_impl@detail@_V1@sycl@@YAMMPEAH@Z ?frexp_impl@detail@_V1@sycl@@YANNPEAH@Z @@ -4170,6 +4185,7 @@ ?getStartTime@HostProfilingInfo@detail@_V1@sycl@@QEBA_KXZ ?getType@handler@_V1@sycl@@AEAA?AW4CGTYPE@CG@detail@23@XZ ?getValueFromDynamicParameter@detail@_V1@sycl@@YAPEAXAEAVdynamic_parameter_base@1experimental@oneapi@ext@23@@Z +?get_access_mode@experimental@oneapi@ext@_V1@sycl@@YA?AW4address_access_mode@12345@PEBX_KAEBVcontext@45@@Z ?get_addressing_mode@sampler@_V1@sycl@@QEBA?AW4addressing_mode@23@XZ ?get_allocator_internal@buffer_plain@detail@_V1@sycl@@IEBAAEBV?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@XZ ?get_allocator_internal@image_plain@detail@_V1@sycl@@IEBAAEBV?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@XZ @@ -4189,10 +4205,12 @@ ?get_context@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AVcontext@56@XZ ?get_context@kernel@_V1@sycl@@QEBA?AVcontext@23@XZ ?get_context@kernel_bundle_plain@detail@_V1@sycl@@QEBA?AVcontext@34@XZ +?get_context@physical_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AVcontext@56@XZ ?get_context@queue@_V1@sycl@@QEBA?AVcontext@23@XZ ?get_coordinate_normalization_mode@sampler@_V1@sycl@@QEBA?AW4coordinate_normalization_mode@23@XZ ?get_count@image_plain@detail@_V1@sycl@@IEBA_KXZ ?get_descriptor@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBAAEBUimage_descriptor@23456@XZ +?get_device@physical_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AVdevice@56@XZ ?get_device@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AVdevice@56@XZ ?get_device@queue@_V1@sycl@@QEBA?AVdevice@23@XZ ?get_devices@context@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@XZ @@ -4218,6 +4236,8 @@ ?get_kernel_ids@_V1@sycl@@YA?AV?$vector@Vkernel_id@_V1@sycl@@V?$allocator@Vkernel_id@_V1@sycl@@@std@@@std@@XZ ?get_kernel_ids@kernel_bundle_plain@detail@_V1@sycl@@QEBA?AV?$vector@Vkernel_id@_V1@sycl@@V?$allocator@Vkernel_id@_V1@sycl@@@std@@@std@@XZ ?get_max_statement_size@stream@_V1@sycl@@QEBA_KXZ +?get_mem_granularity@experimental@oneapi@ext@_V1@sycl@@YA_KAEBVcontext@45@W4granularity_mode@12345@@Z +?get_mem_granularity@experimental@oneapi@ext@_V1@sycl@@YA_KAEBVdevice@45@AEBVcontext@45@W4granularity_mode@12345@@Z ?get_mip_level_mem_handle@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@U612345@IAEBVdevice@45@AEBVcontext@45@@Z ?get_mip_level_mem_handle@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@U612345@IAEBVqueue@45@@Z ?get_mip_level_mem_handle@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AUimage_mem_handle@23456@I@Z @@ -4327,6 +4347,7 @@ ?malloc_shared@_V1@sycl@@YAPEAX_KAEBVdevice@12@AEBVcontext@12@AEBVproperty_list@12@AEBUcode_location@detail@12@@Z ?malloc_shared@_V1@sycl@@YAPEAX_KAEBVqueue@12@AEBUcode_location@detail@12@@Z ?malloc_shared@_V1@sycl@@YAPEAX_KAEBVqueue@12@AEBVproperty_list@12@AEBUcode_location@detail@12@@Z +?map@physical_mem@experimental@oneapi@ext@_V1@sycl@@QEBAPEAX_K0W4address_access_mode@23456@0@Z ?map_external_image_memory@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@Uinterop_mem_handle@12345@AEBUimage_descriptor@12345@AEBVdevice@45@AEBVcontext@45@@Z ?map_external_image_memory@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@Uinterop_mem_handle@12345@AEBUimage_descriptor@12345@AEBVqueue@45@@Z ?map_external_memory_array@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@Uinterop_mem_handle@12345@AEBUimage_descriptor@12345@AEBVdevice@45@AEBVcontext@45@@Z @@ -4389,6 +4410,7 @@ ?remquo_impl@detail@_V1@sycl@@YA?AVhalf@half_impl@123@V45123@0PEAH@Z ?remquo_impl@detail@_V1@sycl@@YAMMMPEAH@Z ?remquo_impl@detail@_V1@sycl@@YANNNPEAH@Z +?reserve_virtual_mem@experimental@oneapi@ext@_V1@sycl@@YA_K_K0AEBVcontext@45@@Z ?reset@filter_selector@ONEAPI@_V1@sycl@@QEBAXXZ ?reset@filter_selector@oneapi@ext@_V1@sycl@@QEBAXXZ ?sampledImageConstructorNotification@detail@_V1@sycl@@YAXPEAX0AEBV?$optional@W4image_target@_V1@sycl@@@std@@PEBXIAEBUcode_location@123@@Z @@ -4412,6 +4434,7 @@ ?setStateSpecConstSet@handler@_V1@sycl@@AEAAXXZ ?setType@handler@_V1@sycl@@AEAAXW4CGTYPE@CG@detail@23@@Z ?setUserFacingNodeType@handler@_V1@sycl@@AEAAXW4node_type@experimental@oneapi@ext@23@@Z +?set_access_mode@experimental@oneapi@ext@_V1@sycl@@YAXPEBX_KW4address_access_mode@12345@AEBVcontext@45@@Z ?set_final_data_internal@buffer_plain@detail@_V1@sycl@@IEAAXAEBV?$function@$$A6AXAEBV?$function@$$A6AXPEAX@Z@std@@@Z@std@@@Z ?set_final_data_internal@buffer_plain@detail@_V1@sycl@@IEAAXXZ ?set_final_data_internal@image_plain@detail@_V1@sycl@@IEAAXAEBV?$function@$$A6AXAEBV?$function@$$A6AXPEAX@Z@std@@@Z@std@@@Z @@ -4427,6 +4450,7 @@ ?sincos_impl@detail@_V1@sycl@@YANNPEAN@Z ?single_task@handler@_V1@sycl@@QEAAXVkernel@23@@Z ?size@exception_list@_V1@sycl@@QEBA_KXZ +?size@physical_mem@experimental@oneapi@ext@_V1@sycl@@QEBA_KXZ ?size@stream@_V1@sycl@@QEBA_KXZ ?start@HostProfilingInfo@detail@_V1@sycl@@QEAAXXZ ?start_fusion@fusion_wrapper@experimental@codeplay@ext@_V1@sycl@@QEAAXXZ @@ -4442,6 +4466,7 @@ ?sycl_category@_V1@sycl@@YAAEBVerror_category@std@@XZ ?throwIfActionIsCreated@handler@_V1@sycl@@AEAAXXZ ?throw_asynchronous@queue@_V1@sycl@@QEAAXXZ +?unmap@experimental@oneapi@ext@_V1@sycl@@YAXPEBX_KAEBVcontext@45@@Z ?unsampledImageConstructorNotification@detail@_V1@sycl@@YAXPEAX0AEBV?$optional@W4image_target@_V1@sycl@@@std@@W4mode@access@23@PEBXIAEBUcode_location@123@@Z ?unsampledImageConstructorNotification@image_plain@detail@_V1@sycl@@IEAAXAEBUcode_location@234@PEAXPEBXIQEA_KW4image_format@34@@Z ?unsampledImageDestructorNotification@image_plain@detail@_V1@sycl@@IEAAXPEAX@Z diff --git a/sycl/unittests/helpers/PiMockPlugin.hpp b/sycl/unittests/helpers/PiMockPlugin.hpp index ca29b9bd6aa1e..b7fea5aae4ff9 100644 --- a/sycl/unittests/helpers/PiMockPlugin.hpp +++ b/sycl/unittests/helpers/PiMockPlugin.hpp @@ -1353,6 +1353,61 @@ inline pi_result mock_piextEnqueueDeviceGlobalVariableRead( return PI_SUCCESS; } +inline pi_result +mock_piextVirtualMemGranularityGetInfo(pi_context, pi_device, + pi_virtual_mem_granularity_info, size_t, + void *, size_t *) { + return PI_SUCCESS; +} + +inline pi_result +mock_piextPhysicalMemCreate(pi_context, pi_device, size_t, + pi_physical_mem *ret_physical_mem) { + *ret_physical_mem = createDummyHandle(); + return PI_SUCCESS; +} + +inline pi_result mock_piextPhysicalMemRetain(pi_physical_mem) { + return PI_SUCCESS; +} + +inline pi_result mock_piextPhysicalMemRelease(pi_physical_mem) { + return PI_SUCCESS; +} + +inline pi_result mock_piextVirtualMemReserve(pi_context, const void *start, + size_t range_size, + void **ret_ptr) { + *ret_ptr = + start ? const_cast(start) : createDummyHandle(range_size); + return PI_SUCCESS; +} + +inline pi_result mock_piextVirtualMemFree(pi_context, const void *, size_t) { + return PI_SUCCESS; +} + +inline pi_result mock_piextVirtualMemMap(pi_context, const void *, size_t, + pi_physical_mem, size_t, + pi_virtual_access_flags) { + return PI_SUCCESS; +} + +inline pi_result mock_piextVirtualMemUnmap(pi_context, const void *, size_t) { + return PI_SUCCESS; +} + +inline pi_result mock_piextVirtualMemSetAccess(pi_context, const void *, size_t, + pi_virtual_access_flags) { + return PI_SUCCESS; +} + +inline pi_result mock_piextVirtualMemGetInfo(pi_context, const void *, size_t, + pi_virtual_mem_info, size_t, + void *, size_t *) { + return PI_SUCCESS; +} + inline pi_result mock_piextPluginGetOpaqueData(void *opaque_data_param, void **opaque_data_return) { return PI_SUCCESS; From 3040061eb25f209ed2cd4355f747ea7d936ad78d Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Mon, 1 Jul 2024 08:59:03 -0700 Subject: [PATCH 36/40] [SYCL] no exceptions leaking from destructors (#14273) Destructors are implicitly noexcept, so we must ensure they don't actually throw exceptions. No change to API or ABI with this PR. --- sycl/include/sycl/buffer.hpp | 8 ++- sycl/include/sycl/detail/common.hpp | 11 ++++ sycl/include/sycl/image.hpp | 13 ++++- sycl/include/syclcompat/device.hpp | 10 ++-- sycl/source/detail/context_impl.cpp | 38 +++++++------- sycl/source/detail/device_image_impl.hpp | 21 ++++---- sycl/source/detail/event_impl.cpp | 8 ++- sycl/source/detail/global_handler.cpp | 31 ++++++++---- sycl/source/detail/graph_impl.cpp | 58 ++++++++++++---------- sycl/source/detail/kernel_impl.cpp | 10 ++-- sycl/source/detail/pi_utils.hpp | 11 ++-- sycl/source/detail/program_impl.cpp | 12 +++-- sycl/source/detail/queue_impl.hpp | 38 +++++++------- sycl/source/detail/sampler_impl.cpp | 15 ++++-- sycl/source/detail/thread_pool.hpp | 8 ++- sycl/unittests/thread_safety/ThreadUtils.h | 8 ++- 16 files changed, 196 insertions(+), 104 deletions(-) diff --git a/sycl/include/sycl/buffer.hpp b/sycl/include/sycl/buffer.hpp index 5dde105b678e6..32588de22c980 100644 --- a/sycl/include/sycl/buffer.hpp +++ b/sycl/include/sycl/buffer.hpp @@ -472,7 +472,13 @@ class buffer : public detail::buffer_plain, buffer &operator=(buffer &&rhs) = default; - ~buffer() { buffer_plain::handleRelease(); } + ~buffer() { + try { + buffer_plain::handleRelease(); + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~buffer", e); + } + } bool operator==(const buffer &rhs) const { return impl == rhs.impl; } diff --git a/sycl/include/sycl/detail/common.hpp b/sycl/include/sycl/detail/common.hpp index 1c940a21a7223..09c31ef76ef6d 100644 --- a/sycl/include/sycl/detail/common.hpp +++ b/sycl/include/sycl/detail/common.hpp @@ -368,6 +368,17 @@ static constexpr std::array RepeatValue(const T &Arg) { return RepeatValueHelper(Arg, std::make_index_sequence()); } +// to output exceptions caught in ~destructors +#ifndef NDEBUG +#define __SYCL_REPORT_EXCEPTION_TO_STREAM(str, e) \ + { \ + std::cerr << str << " " << e.what() << std::endl; \ + assert(false); \ + } +#else +#define __SYCL_REPORT_EXCEPTION_TO_STREAM(str, e) +#endif + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/include/sycl/image.hpp b/sycl/include/sycl/image.hpp index 1239f65cdd259..2d0401764bbe9 100644 --- a/sycl/include/sycl/image.hpp +++ b/sycl/include/sycl/image.hpp @@ -954,7 +954,12 @@ class unsampled_image unsampled_image &operator=(unsampled_image &&rhs) = default; ~unsampled_image() { - common_base::unsampledImageDestructorNotification((void *)this->impl.get()); + try { + common_base::unsampledImageDestructorNotification( + (void *)this->impl.get()); + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~unsampled_image", e); + } } bool operator==(const unsampled_image &rhs) const { @@ -1095,7 +1100,11 @@ class sampled_image sampled_image &operator=(sampled_image &&rhs) = default; ~sampled_image() { - common_base::sampledImageDestructorNotification((void *)this->impl.get()); + try { + common_base::sampledImageDestructorNotification((void *)this->impl.get()); + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~sampled_image", e); + } } bool operator==(const sampled_image &rhs) const { diff --git a/sycl/include/syclcompat/device.hpp b/sycl/include/syclcompat/device.hpp index ed16a9b32bfa4..3e3e6cb77e71d 100644 --- a/sycl/include/syclcompat/device.hpp +++ b/sycl/include/syclcompat/device.hpp @@ -339,9 +339,13 @@ class device_ext : public sycl::device { public: device_ext() : sycl::device(), _ctx(*this) {} ~device_ext() { - std::lock_guard lock(m_mutex); - sycl::event::wait(_events); - _queues.clear(); + try { + std::lock_guard lock(m_mutex); + sycl::event::wait(_events); + _queues.clear(); + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~device_ext", e); + } } device_ext(const sycl::device &base, bool print_on_async_exceptions = false, bool in_order = true) diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 388c312305d4a..add8bfa679b1a 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -145,23 +145,27 @@ cl_context context_impl::get() const { bool context_impl::is_host() const { return MHostContext; } context_impl::~context_impl() { - // Free all events associated with the initialization of device globals. - for (auto &DeviceGlobalInitializer : MDeviceGlobalInitializers) - DeviceGlobalInitializer.second.ClearEvents(getPlugin()); - // Free all device_global USM allocations associated with this context. - for (const void *DeviceGlobal : MAssociatedDeviceGlobals) { - DeviceGlobalMapEntry *DGEntry = - detail::ProgramManager::getInstance().getDeviceGlobalEntry( - DeviceGlobal); - DGEntry->removeAssociatedResources(this); - } - for (auto LibProg : MCachedLibPrograms) { - assert(LibProg.second && "Null program must not be kept in the cache"); - getPlugin()->call(LibProg.second); - } - if (!MHostContext) { - // TODO catch an exception and put it to list of asynchronous exceptions - getPlugin()->call_nocheck(MContext); + try { + // Free all events associated with the initialization of device globals. + for (auto &DeviceGlobalInitializer : MDeviceGlobalInitializers) + DeviceGlobalInitializer.second.ClearEvents(getPlugin()); + // Free all device_global USM allocations associated with this context. + for (const void *DeviceGlobal : MAssociatedDeviceGlobals) { + DeviceGlobalMapEntry *DGEntry = + detail::ProgramManager::getInstance().getDeviceGlobalEntry( + DeviceGlobal); + DGEntry->removeAssociatedResources(this); + } + for (auto LibProg : MCachedLibPrograms) { + assert(LibProg.second && "Null program must not be kept in the cache"); + getPlugin()->call(LibProg.second); + } + if (!MHostContext) { + // TODO catch an exception and put it to list of asynchronous exceptions + getPlugin()->call(MContext); + } + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~context_impl", e); } } diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index f21bf3ccd0185..eda511e340d10 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -300,15 +300,18 @@ class device_image_impl { } ~device_image_impl() { - - if (MProgram) { - const PluginPtr &Plugin = getSyclObjImpl(MContext)->getPlugin(); - Plugin->call(MProgram); - } - if (MSpecConstsBuffer) { - std::lock_guard Lock{MSpecConstAccessMtx}; - const PluginPtr &Plugin = getSyclObjImpl(MContext)->getPlugin(); - memReleaseHelper(Plugin, MSpecConstsBuffer); + try { + if (MProgram) { + const PluginPtr &Plugin = getSyclObjImpl(MContext)->getPlugin(); + Plugin->call(MProgram); + } + if (MSpecConstsBuffer) { + std::lock_guard Lock{MSpecConstAccessMtx}; + const PluginPtr &Plugin = getSyclObjImpl(MContext)->getPlugin(); + memReleaseHelper(Plugin, MSpecConstsBuffer); + } + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~device_image_impl", e); } } diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index c7d245e5e91c0..63d60e41ac7e8 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -54,8 +54,12 @@ bool event_impl::is_host() { } event_impl::~event_impl() { - if (MEvent) - getPlugin()->call(MEvent); + try { + if (MEvent) + getPlugin()->call(MEvent); + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~event_impl", e); + } } void event_impl::waitInternal(bool *Success) { diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 072a9628d6a6b..301125d9b5c93 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -54,14 +54,18 @@ class ObjectUsageCounter { MCounter++; } ~ObjectUsageCounter() { - if (!MModifyCounter) - return; - - LockGuard Guard(GlobalHandler::MSyclGlobalHandlerProtector); - MCounter--; - GlobalHandler *RTGlobalObjHandler = GlobalHandler::getInstancePtr(); - if (RTGlobalObjHandler) { - RTGlobalObjHandler->prepareSchedulerToRelease(!MCounter); + try { + if (!MModifyCounter) + return; + + LockGuard Guard(GlobalHandler::MSyclGlobalHandlerProtector); + MCounter--; + GlobalHandler *RTGlobalObjHandler = GlobalHandler::getInstancePtr(); + if (RTGlobalObjHandler) { + RTGlobalObjHandler->prepareSchedulerToRelease(!MCounter); + } + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~ObjectUsageCounter", e); } } @@ -234,12 +238,17 @@ void GlobalHandler::releaseDefaultContexts() { struct EarlyShutdownHandler { ~EarlyShutdownHandler() { + try { #ifdef _WIN32 - // on Windows we keep to the existing shutdown procedure - GlobalHandler::instance().releaseDefaultContexts(); + // on Windows we keep to the existing shutdown procedure + GlobalHandler::instance().releaseDefaultContexts(); #else - shutdown_early(); + shutdown_early(); #endif + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~EarlyShutdownHandler", + e); + } } }; diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 09ccef30dacd2..9ef8ce262932f 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -297,9 +297,13 @@ void exec_graph_impl::makePartitions() { } graph_impl::~graph_impl() { - clearQueues(); - for (auto &MemObj : MMemObjs) { - MemObj->markNoLongerBeingUsedInGraph(); + try { + clearQueues(); + for (auto &MemObj : MMemObjs) { + MemObj->markNoLongerBeingUsedInGraph(); + } + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~graph_impl", e); } } @@ -784,34 +788,38 @@ exec_graph_impl::exec_graph_impl(sycl::context Context, } exec_graph_impl::~exec_graph_impl() { - const sycl::detail::PluginPtr &Plugin = - sycl::detail::getSyclObjImpl(MContext)->getPlugin(); - MSchedule.clear(); - // We need to wait on all command buffer executions before we can release - // them. - for (auto &Event : MExecutionEvents) { - Event->wait(Event); - } + try { + const sycl::detail::PluginPtr &Plugin = + sycl::detail::getSyclObjImpl(MContext)->getPlugin(); + MSchedule.clear(); + // We need to wait on all command buffer executions before we can release + // them. + for (auto &Event : MExecutionEvents) { + Event->wait(Event); + } - for (const auto &Partition : MPartitions) { - Partition->MSchedule.clear(); - for (const auto &Iter : Partition->MPiCommandBuffers) { - if (auto CmdBuf = Iter.second; CmdBuf) { + for (const auto &Partition : MPartitions) { + Partition->MSchedule.clear(); + for (const auto &Iter : Partition->MPiCommandBuffers) { + if (auto CmdBuf = Iter.second; CmdBuf) { + pi_result Res = Plugin->call_nocheck< + sycl::detail::PiApiKind::piextCommandBufferRelease>(CmdBuf); + (void)Res; + assert(Res == pi_result::PI_SUCCESS); + } + } + } + + for (auto &Iter : MCommandMap) { + if (auto Command = Iter.second; Command) { pi_result Res = Plugin->call_nocheck< - sycl::detail::PiApiKind::piextCommandBufferRelease>(CmdBuf); + sycl::detail::PiApiKind::piextCommandBufferReleaseCommand>(Command); (void)Res; assert(Res == pi_result::PI_SUCCESS); } } - } - - for (auto &Iter : MCommandMap) { - if (auto Command = Iter.second; Command) { - pi_result Res = Plugin->call_nocheck< - sycl::detail::PiApiKind::piextCommandBufferReleaseCommand>(Command); - (void)Res; - assert(Res == pi_result::PI_SUCCESS); - } + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~exec_graph_impl", e); } } diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp index 9c5a1851cd3b1..0696123e94450 100644 --- a/sycl/source/detail/kernel_impl.cpp +++ b/sycl/source/detail/kernel_impl.cpp @@ -75,9 +75,13 @@ kernel_impl::kernel_impl(ContextImplPtr Context, ProgramImplPtr ProgramImpl) : MContext(Context), MProgram(ProgramImpl->getHandleRef()) {} kernel_impl::~kernel_impl() { - // TODO catch an exception and put it to list of asynchronous exceptions - if (!is_host()) { - getPlugin()->call(MKernel); + try { + // TODO catch an exception and put it to list of asynchronous exceptions + if (!is_host()) { + getPlugin()->call(MKernel); + } + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~kernel_impl", e); } } diff --git a/sycl/source/detail/pi_utils.hpp b/sycl/source/detail/pi_utils.hpp index 877cbd0d14e52..fa288c91fc583 100644 --- a/sycl/source/detail/pi_utils.hpp +++ b/sycl/source/detail/pi_utils.hpp @@ -31,9 +31,14 @@ struct OwnedPiEvent { MPlugin->call(*MEvent); } ~OwnedPiEvent() { - // Release the event if the ownership was not transferred. - if (MEvent.has_value()) - MPlugin->call(*MEvent); + try { + // Release the event if the ownership was not transferred. + if (MEvent.has_value()) + MPlugin->call(*MEvent); + + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~OwnedPiEvent", e); + } } OwnedPiEvent(OwnedPiEvent &&Other) diff --git a/sycl/source/detail/program_impl.cpp b/sycl/source/detail/program_impl.cpp index d65f3163b961f..6952c6d45ebd2 100644 --- a/sycl/source/detail/program_impl.cpp +++ b/sycl/source/detail/program_impl.cpp @@ -207,10 +207,14 @@ program_impl::program_impl(ContextImplPtr Context, } program_impl::~program_impl() { - // TODO catch an exception and put it to list of asynchronous exceptions - if (!is_host() && MProgram != nullptr) { - const PluginPtr &Plugin = getPlugin(); - Plugin->call(MProgram); + try { + // TODO catch an exception and put it to list of asynchronous exceptions + if (!is_host() && MProgram != nullptr) { + const PluginPtr &Plugin = getPlugin(); + Plugin->call(MProgram); + } + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~program_impl", e); } } diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index db586fda5bf3e..4878134ec1e92 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -314,25 +314,29 @@ class queue_impl { } ~queue_impl() { - // The trace event created in the constructor should be active through the - // lifetime of the queue object as member variables when ABI breakage is - // allowed. This example shows MTraceEvent as a member variable. + try { + // The trace event created in the constructor should be active through the + // lifetime of the queue object as member variables when ABI breakage is + // allowed. This example shows MTraceEvent as a member variable. #if XPTI_ENABLE_INSTRUMENTATION - constexpr uint16_t NotificationTraceType = - static_cast(xpti::trace_point_type_t::queue_destroy); - if (xptiCheckTraceEnabled(MStreamID, NotificationTraceType)) { - // Used cached information in member variables - xptiNotifySubscribers(MStreamID, NotificationTraceType, nullptr, - (xpti::trace_event_data_t *)MTraceEvent, - MInstanceID, - static_cast("queue_destroy")); - xptiReleaseEvent((xpti::trace_event_data_t *)MTraceEvent); - } + constexpr uint16_t NotificationTraceType = + static_cast(xpti::trace_point_type_t::queue_destroy); + if (xptiCheckTraceEnabled(MStreamID, NotificationTraceType)) { + // Used cached information in member variables + xptiNotifySubscribers(MStreamID, NotificationTraceType, nullptr, + (xpti::trace_event_data_t *)MTraceEvent, + MInstanceID, + static_cast("queue_destroy")); + xptiReleaseEvent((xpti::trace_event_data_t *)MTraceEvent); + } #endif - throw_asynchronous(); - if (!MHostQueue) { - cleanup_fusion_cmd(); - getPlugin()->call(MQueues[0]); + throw_asynchronous(); + if (!MHostQueue) { + cleanup_fusion_cmd(); + getPlugin()->call(MQueues[0]); + } + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~queue_impl", e); } } diff --git a/sycl/source/detail/sampler_impl.cpp b/sycl/source/detail/sampler_impl.cpp index c2af7884a164c..edca8eb1be025 100644 --- a/sycl/source/detail/sampler_impl.cpp +++ b/sycl/source/detail/sampler_impl.cpp @@ -40,11 +40,16 @@ sampler_impl::sampler_impl(cl_sampler clSampler, const context &syclContext) { } sampler_impl::~sampler_impl() { - std::lock_guard Lock(MMutex); - for (auto &Iter : MContextToSampler) { - // TODO catch an exception and add it to the list of asynchronous exceptions - const PluginPtr &Plugin = getSyclObjImpl(Iter.first)->getPlugin(); - Plugin->call(Iter.second); + try { + std::lock_guard Lock(MMutex); + for (auto &Iter : MContextToSampler) { + // TODO catch an exception and add it to the list of asynchronous + // exceptions + const PluginPtr &Plugin = getSyclObjImpl(Iter.first)->getPlugin(); + Plugin->call(Iter.second); + } + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~sample_impl", e); } } diff --git a/sycl/source/detail/thread_pool.hpp b/sycl/source/detail/thread_pool.hpp index 35adb98e9d570..304045389b53b 100644 --- a/sycl/source/detail/thread_pool.hpp +++ b/sycl/source/detail/thread_pool.hpp @@ -74,7 +74,13 @@ class ThreadPool { start(); } - ~ThreadPool() { finishAndWait(); } + ~ThreadPool() { + try { + finishAndWait(); + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~ThreadPool", e); + } + } void finishAndWait() { MStop.store(true); diff --git a/sycl/unittests/thread_safety/ThreadUtils.h b/sycl/unittests/thread_safety/ThreadUtils.h index ccbca98d44e3f..4b40123ba1bb7 100644 --- a/sycl/unittests/thread_safety/ThreadUtils.h +++ b/sycl/unittests/thread_safety/ThreadUtils.h @@ -48,7 +48,13 @@ class ThreadPool { enqueueHelper(std::forward(funcs)...); } - ~ThreadPool() { wait(); } + ~ThreadPool() { + try { + wait(); + } catch (std::exception &e) { + std::cerr << "exception in ~ThreadPool" << e.what() << std::endl; + } + } private: template From c99522b08976c4a74bc06890ee46c158251feb46 Mon Sep 17 00:00:00 2001 From: Michael Toguchi Date: Mon, 1 Jul 2024 11:18:28 -0700 Subject: [PATCH 37/40] [Driver][SYCL][NewOffload] Fix arch settings for nvptx and amd (#14340) When compiling for -fsycl-targets values of nvptx64-nvidia-cuda and amdgcn-amd-gpu, the default arch behaviors were not applied to the compilation. Updates to do the following: - Add default of sm_50 for nvptx64 if not provided - Emit diagnostic if no arch provided for amd - Parse -Xsycl-backend-target for offload-arch values --- clang/lib/Driver/Driver.cpp | 42 +++++++++++++++++---- clang/test/Driver/sycl-offload-new-driver.c | 23 ++++++++--- 2 files changed, 52 insertions(+), 13 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index b3143789c632b..37d00e130ac1e 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -7781,12 +7781,28 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, for (auto *Arg : Args) { // Extract any '--[no-]offload-arch' arguments intended for this toolchain. std::unique_ptr ExtractedArg = nullptr; - if (Arg->getOption().matches(options::OPT_Xopenmp_target_EQ) && - ToolChain::getOpenMPTriple(Arg->getValue(0)) == TC->getTriple()) { - Arg->claim(); - unsigned Index = Args.getBaseArgs().MakeIndex(Arg->getValue(1)); - ExtractedArg = getOpts().ParseOneArg(Args, Index); - Arg = ExtractedArg.get(); + if (Kind == Action::OFK_SYCL) { + // For SYCL based offloading, we allow for -Xsycl-target-backend + // and -Xsycl-target-backend= for specifying options. + if (Arg->getOption().matches(options::OPT_Xsycl_backend_EQ) && + llvm::Triple(Arg->getValue(0)) == TC->getTriple()) { + Arg->claim(); + unsigned Index = Args.getBaseArgs().MakeIndex(Arg->getValue(1)); + ExtractedArg = getOpts().ParseOneArg(Args, Index); + Arg = ExtractedArg.get(); + } else if (Arg->getOption().matches(options::OPT_Xsycl_backend)) { + unsigned Index = Args.getBaseArgs().MakeIndex(Arg->getValue(0)); + ExtractedArg = getOpts().ParseOneArg(Args, Index); + Arg = ExtractedArg.get(); + } + } else { + if (Arg->getOption().matches(options::OPT_Xopenmp_target_EQ) && + ToolChain::getOpenMPTriple(Arg->getValue(0)) == TC->getTriple()) { + Arg->claim(); + unsigned Index = Args.getBaseArgs().MakeIndex(Arg->getValue(1)); + ExtractedArg = getOpts().ParseOneArg(Args, Index); + Arg = ExtractedArg.get(); + } } // Add or remove the seen architectures in order of appearance. If an @@ -7851,8 +7867,18 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, Archs.insert(CudaArchToString(CudaArch::HIPDefault)); else if (Kind == Action::OFK_OpenMP) Archs.insert(StringRef()); - else if (Kind == Action::OFK_SYCL) - Archs.insert(StringRef()); + else if (Kind == Action::OFK_SYCL) { + // For SYCL offloading, we need to check the triple for NVPTX or AMDGPU. + // The default arch is set for NVPTX if not provided. For AMDGPU, emit + // an error as the user is responsible to set the arch. + if (TC->getTriple().isNVPTX()) + Archs.insert(CudaArchToString(CudaArch::SM_50)); + else if (TC->getTriple().isAMDGPU()) + C.getDriver().Diag(clang::diag::err_drv_sycl_missing_amdgpu_arch) + << 1 << TC->getTriple().str(); + else + Archs.insert(StringRef()); + } } else { Args.ClaimAllArgs(options::OPT_offload_arch_EQ); Args.ClaimAllArgs(options::OPT_no_offload_arch_EQ); diff --git a/clang/test/Driver/sycl-offload-new-driver.c b/clang/test/Driver/sycl-offload-new-driver.c index 90c4e7437ca64..a101b7b790a16 100644 --- a/clang/test/Driver/sycl-offload-new-driver.c +++ b/clang/test/Driver/sycl-offload-new-driver.c @@ -6,11 +6,11 @@ // OFFLOAD-NEW-DRIVER: 1: append-footer, {0}, c++, (host-sycl) // OFFLOAD-NEW-DRIVER: 2: preprocessor, {1}, c++-cpp-output, (host-sycl) // OFFLOAD-NEW-DRIVER: 3: compiler, {2}, ir, (host-sycl) -// OFFLOAD-NEW-DRIVER: 4: input, "[[INPUT]]", c++, (device-sycl) -// OFFLOAD-NEW-DRIVER: 5: preprocessor, {4}, c++-cpp-output, (device-sycl) -// OFFLOAD-NEW-DRIVER: 6: compiler, {5}, ir, (device-sycl) -// OFFLOAD-NEW-DRIVER: 7: backend, {6}, ir, (device-sycl) -// OFFLOAD-NEW-DRIVER: 8: offload, "device-sycl (nvptx64-nvidia-cuda)" {7}, ir +// OFFLOAD-NEW-DRIVER: 4: input, "[[INPUT]]", c++, (device-sycl, sm_50) +// OFFLOAD-NEW-DRIVER: 5: preprocessor, {4}, c++-cpp-output, (device-sycl, sm_50) +// OFFLOAD-NEW-DRIVER: 6: compiler, {5}, ir, (device-sycl, sm_50) +// OFFLOAD-NEW-DRIVER: 7: backend, {6}, ir, (device-sycl, sm_50) +// OFFLOAD-NEW-DRIVER: 8: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {7}, ir // OFFLOAD-NEW-DRIVER: 9: input, "[[INPUT]]", c++, (device-sycl) // OFFLOAD-NEW-DRIVER: 10: preprocessor, {9}, c++-cpp-output, (device-sycl) // OFFLOAD-NEW-DRIVER: 11: compiler, {10}, ir, (device-sycl) @@ -173,3 +173,16 @@ // WRAPPER_OPTIONS_BACKEND_AOT: clang-linker-wrapper{{.*}} "--host-triple=x86_64-unknown-linux-gnu" // WRAPPER_OPTIONS_BACKEND_AOT-SAME: "--gpu-tool-arg={{.*}}-backend-gpu-opt" // WRAPPER_OPTIONS_BACKEND_AOT-SAME: "--cpu-tool-arg={{.*}}-backend-cpu-opt" + +/// Verify arch settings for nvptx and amdgcn targets +// RUN: %clangxx -fsycl -### -fsycl-targets=amdgcn-amd-gpu -fno-sycl-libspirv \ +// RUN: -nocudalib --offload-new-driver \ +// RUN: -Xsycl-target-backend=amdgcn-amd-gpu --offload-arch=gfx600 \ +// RUN: %s 2>&1 \ +// RUN: | FileCheck -check-prefix AMD_ARCH %s +// AMD_ARCH: clang-offload-packager{{.*}} "--image=file={{.*}},triple=amdgcn-amd-gpu,arch=gfx600,kind=sycl,compile-opts=--offload-arch=gfx600" + +// RUN: %clangxx -fsycl -### -fsycl-targets=nvptx64-nvidia-cuda \ +// RUN: -fno-sycl-libspirv -nocudalib --offload-new-driver %s 2>&1 \ +// RUN: | FileCheck -check-prefix NVPTX_DEF_ARCH %s +// NVPTX_DEF_ARCH: clang-offload-packager{{.*}} "--image=file={{.*}},triple=nvptx64-nvidia-cuda,arch=sm_50,kind=sycl" From 92c9f01485db5b6d0f247a9dad9fc02cdf25926b Mon Sep 17 00:00:00 2001 From: Nick Sarnie Date: Mon, 1 Jul 2024 15:40:59 -0400 Subject: [PATCH 38/40] [SYCL] Fix postcommit (#14373) Totally broken on all platforms right now. ``` /__w/llvm/llvm/src/sycl/source/detail/physical_mem_impl.hpp:33:3: error: default label in switch which covers all enumeration values [-Werror,-Wcovered-switch-default] 33 | default: ``` and https://github.com/intel/llvm/issues/14372 Signed-off-by: Sarnie, Nick --- llvm/test/CodeGen/SPIRV/transcoding/TransFNeg.ll | 3 +++ sycl/source/detail/physical_mem_impl.hpp | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/test/CodeGen/SPIRV/transcoding/TransFNeg.ll b/llvm/test/CodeGen/SPIRV/transcoding/TransFNeg.ll index eb52a775d38ee..ebde02b7e51ae 100644 --- a/llvm/test/CodeGen/SPIRV/transcoding/TransFNeg.ll +++ b/llvm/test/CodeGen/SPIRV/transcoding/TransFNeg.ll @@ -1,5 +1,8 @@ ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV +; https://github.com/intel/llvm/issues/14372 +; UNSUPPORTED: windows + ; CHECK-SPIRV: OpFNegate ; CHECK-SPIRV: OpFNegate ; CHECK-SPIRV: OpFNegate diff --git a/sycl/source/detail/physical_mem_impl.hpp b/sycl/source/detail/physical_mem_impl.hpp index 9fb38f1202257..0822692255379 100644 --- a/sycl/source/detail/physical_mem_impl.hpp +++ b/sycl/source/detail/physical_mem_impl.hpp @@ -30,9 +30,6 @@ inline sycl::detail::pi::PiVirtualAccessFlags AccessModeToVirtualAccessFlags( return PI_VIRTUAL_ACCESS_FLAG_RW; case ext::oneapi::experimental::address_access_mode::none: return 0; - default: - throw sycl::exception(make_error_code(errc::invalid), - "Invalid address_access_mode."); } } From 43286ab7b17cc9e628c8a4a74748995fb9ce3738 Mon Sep 17 00:00:00 2001 From: Nick Sarnie Date: Mon, 1 Jul 2024 17:25:18 -0400 Subject: [PATCH 39/40] [SYCL] Fix precommit (#14374) Fixes precommit after unsuccessful post-commit fix at https://github.com/intel/llvm/pull/14373. Signed-off-by: Nick Sarnie --- sycl/source/detail/physical_mem_impl.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sycl/source/detail/physical_mem_impl.hpp b/sycl/source/detail/physical_mem_impl.hpp index 0822692255379..e36830ba07bee 100644 --- a/sycl/source/detail/physical_mem_impl.hpp +++ b/sycl/source/detail/physical_mem_impl.hpp @@ -31,6 +31,8 @@ inline sycl::detail::pi::PiVirtualAccessFlags AccessModeToVirtualAccessFlags( case ext::oneapi::experimental::address_access_mode::none: return 0; } + throw sycl::exception(make_error_code(errc::invalid), + "Invalid address_access_mode."); } class physical_mem_impl { From 2e212e01d79484c319d432f94a4a6fdd5529e71f Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Mon, 1 Jul 2024 23:02:08 +0100 Subject: [PATCH 40/40] [CUDA][HIP] Fix host task mem migration and add pi entry point for urEnqueueNativeCommandExp (#14353) The SYCL RT assumes that for devices in the same context, no mem migration needs to occur across devices for a kernel launch or host task. However, a CUdeviceptr is relevant to a specific device, so mem migration must occur between devices in a ctx. If this assumption that the SYCL RT makes about native mems being accessible to all devices in a context, it must hand off the HT lambda to the plugin, so that the plugin can handle the necessary mem migration. This patch uses the new urEnqueueCustomCommandExp to execute the HT lambda, which takes care of mem migration implicitly in the plugin. --- sycl/include/sycl/detail/host_task_impl.hpp | 2 + sycl/include/sycl/detail/pi.def | 3 ++ sycl/include/sycl/detail/pi.h | 25 ++++++++- sycl/plugins/cuda/pi_cuda.cpp | 10 ++++ sycl/plugins/hip/pi_hip.cpp | 10 ++++ sycl/plugins/level_zero/pi_level_zero.cpp | 10 ++++ sycl/plugins/native_cpu/pi_native_cpu.cpp | 10 ++++ sycl/plugins/opencl/pi_opencl.cpp | 10 ++++ sycl/plugins/unified_runtime/pi2ur.hpp | 30 ++++++++++- .../unified_runtime/pi_unified_runtime.cpp | 10 ++++ sycl/source/detail/scheduler/commands.cpp | 54 ++++++++++++++++--- .../interop-task-cuda-buffer-migrate.cpp | 9 ---- sycl/test/abi/pi_cuda_symbol_check.dump | 1 + sycl/test/abi/pi_hip_symbol_check.dump | 1 + sycl/test/abi/pi_level_zero_symbol_check.dump | 1 + sycl/test/abi/pi_nativecpu_symbol_check.dump | 1 + sycl/test/abi/pi_opencl_symbol_check.dump | 1 + sycl/unittests/helpers/PiMockPlugin.hpp | 8 +++ 18 files changed, 179 insertions(+), 17 deletions(-) diff --git a/sycl/include/sycl/detail/host_task_impl.hpp b/sycl/include/sycl/detail/host_task_impl.hpp index 1634269ac2f2b..42b49eab820c7 100644 --- a/sycl/include/sycl/detail/host_task_impl.hpp +++ b/sycl/include/sycl/detail/host_task_impl.hpp @@ -47,6 +47,8 @@ class HostTask { if (HPI) HPI->end(); } + + friend class DispatchHostTask; }; class CGHostTask : public CG { diff --git a/sycl/include/sycl/detail/pi.def b/sycl/include/sycl/detail/pi.def index 3090b2d488ee0..f2acd7e897399 100644 --- a/sycl/include/sycl/detail/pi.def +++ b/sycl/include/sycl/detail/pi.def @@ -227,4 +227,7 @@ _PI_API(piextVirtualMemUnmap) _PI_API(piextVirtualMemSetAccess) _PI_API(piextVirtualMemGetInfo) +// Enqueue native command +_PI_API(piextEnqueueNativeCommand) + #undef _PI_API diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index ce7d34ef75899..f541c3e4e89d2 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -195,9 +195,10 @@ // PI_EXT_ONEAPI_DEVICE_INFO_SUPPORTS_VIRTUAL_MEM device info descriptor, // _pi_virtual_mem_granularity_info enum, _pi_virtual_mem_info enum and // pi_virtual_access_flags bit flags. +// 15.55 Added piextEnqueueNativeCommand as well as associated types and enums #define _PI_H_VERSION_MAJOR 15 -#define _PI_H_VERSION_MINOR 54 +#define _PI_H_VERSION_MINOR 55 #define _PI_STRING_HELPER(a) #a #define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b) @@ -512,6 +513,8 @@ typedef enum { // Virtual memory support PI_EXT_ONEAPI_DEVICE_INFO_SUPPORTS_VIRTUAL_MEM = 0x2011E, + // Native enqueue + PI_EXT_ONEAPI_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT = 0x2011F, } _pi_device_info; typedef enum { @@ -1279,6 +1282,7 @@ using pi_image_mem_handle = void *; using pi_interop_mem_handle = pi_uint64; using pi_interop_semaphore_handle = pi_uint64; using pi_physical_mem = _pi_physical_mem *; +using pi_enqueue_native_command_function = void (*)(pi_queue, void *); typedef struct { pi_image_channel_order image_channel_order; @@ -3201,6 +3205,25 @@ __SYCL_EXPORT pi_result piextSignalExternalSemaphore( pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event); +/// API to enqueue work through a backend API such that the plugin can schedule +/// the backend API calls within its own DAG. +/// +/// \param command_queue is the queue instructed to signal +/// \param fn is the user submitted native function enqueueing work to a +/// backend API +/// \param data is the data that will be used in fn +/// \param num_mems is the number of mems in mem_list +/// \param mem_list is the list of mems that are used in fn +/// \param num_events_in_wait_list is the number of events in the wait list +/// \param event_wait_list is the list of events to wait on before this +/// operation +/// \param event is the returned event representing this operation +__SYCL_EXPORT pi_result piextEnqueueNativeCommand( + pi_queue command_queue, pi_enqueue_native_command_function fn, void *data, + pi_uint32 num_mems, const pi_mem *mem_list, + pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, + pi_event *event); + typedef enum { _PI_SANITIZE_TYPE_NONE = 0x0, _PI_SANITIZE_TYPE_ADDRESS = 0x1, diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 1628b1537fae5..13b44ce9a701d 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -1361,6 +1361,16 @@ pi_result piextVirtualMemGetInfo(pi_context context, const void *ptr, param_value_size_ret); } +pi_result +piextEnqueueNativeCommand(pi_queue Queue, pi_enqueue_native_command_function Fn, + void *Data, pi_uint32 NumMems, const pi_mem *Mems, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, pi_event *Event) { + return pi2ur::piextEnqueueNativeCommand(Queue, Fn, Data, NumMems, Mems, + NumEventsInWaitList, EventWaitList, + Event); +} + const char SupportedVersion[] = _PI_CUDA_PLUGIN_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index c3324463690eb..074909bcd6736 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -1364,6 +1364,16 @@ pi_result piextVirtualMemGetInfo(pi_context context, const void *ptr, param_value_size_ret); } +pi_result +piextEnqueueNativeCommand(pi_queue Queue, pi_enqueue_native_command_function Fn, + void *Data, pi_uint32 NumMems, const pi_mem *Mems, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, pi_event *Event) { + return pi2ur::piextEnqueueNativeCommand(Queue, Fn, Data, NumMems, Mems, + NumEventsInWaitList, EventWaitList, + Event); +} + const char SupportedVersion[] = _PI_HIP_PLUGIN_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index bab365effe85f..61ddafc09ccdc 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -1562,6 +1562,16 @@ pi_result piextVirtualMemGetInfo(pi_context Context, const void *Ptr, ParamValueSizeRet); } +pi_result +piextEnqueueNativeCommand(pi_queue Queue, pi_enqueue_native_command_function Fn, + void *Data, pi_uint32 NumMems, const pi_mem *Mems, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, pi_event *Event) { + return pi2ur::piextEnqueueNativeCommand(Queue, Fn, Data, NumMems, Mems, + NumEventsInWaitList, EventWaitList, + Event); +} + const char SupportedVersion[] = _PI_LEVEL_ZERO_PLUGIN_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { // missing diff --git a/sycl/plugins/native_cpu/pi_native_cpu.cpp b/sycl/plugins/native_cpu/pi_native_cpu.cpp index 2276e9f78f7ea..e1f40fcbcd16b 100644 --- a/sycl/plugins/native_cpu/pi_native_cpu.cpp +++ b/sycl/plugins/native_cpu/pi_native_cpu.cpp @@ -1384,6 +1384,16 @@ pi_result piextVirtualMemGetInfo(pi_context context, const void *ptr, param_value_size_ret); } +pi_result +piextEnqueueNativeCommand(pi_queue Queue, pi_enqueue_native_command_function Fn, + void *Data, pi_uint32 NumMems, const pi_mem *Mems, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, pi_event *Event) { + return pi2ur::piextEnqueueNativeCommand(Queue, Fn, Data, NumMems, Mems, + NumEventsInWaitList, EventWaitList, + Event); +} + // Initialize function table with stubs. #define _PI_API(api) \ (PluginInit->PiFunctionTable).api = (decltype(&::api))(&api); diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 1d340b5685f4e..c569adb4a8839 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1291,6 +1291,16 @@ pi_result piextVirtualMemGetInfo(pi_context Context, const void *Ptr, ParamValueSizeRet); } +pi_result +piextEnqueueNativeCommand(pi_queue Queue, pi_enqueue_native_command_function Fn, + void *Data, pi_uint32 NumMems, const pi_mem *Mems, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, pi_event *Event) { + return pi2ur::piextEnqueueNativeCommand(Queue, Fn, Data, NumMems, Mems, + NumEventsInWaitList, EventWaitList, + Event); +} + pi_result piTearDown(void *PluginParameter) { return pi2ur::piTearDown(PluginParameter); } diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index f22e672d84423..3690c78d61b47 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -1325,6 +1325,9 @@ inline pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, PI_TO_UR_MAP_DEVICE_INFO( PI_EXT_ONEAPI_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT, UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP) + PI_TO_UR_MAP_DEVICE_INFO( + PI_EXT_ONEAPI_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT, + UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP) PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_INTEL_DEVICE_INFO_ESIMD_SUPPORT, UR_DEVICE_INFO_ESIMD_SUPPORT) PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_COMPONENT_DEVICES, @@ -5722,7 +5725,6 @@ piextVirtualMemGranularityGetInfo(pi_context Context, pi_device Device, HANDLE_ERRORS(urVirtualMemGranularityGetInfo(UrContext, UrDevice, InfoType, ParamValueSize, ParamValue, ParamValueSizeRet)); - return PI_SUCCESS; } @@ -5882,4 +5884,30 @@ inline pi_result piextVirtualMemGetInfo(pi_context Context, const void *Ptr, // Virtual Memory /////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// +// Enqueue Native Command Extension +inline pi_result +piextEnqueueNativeCommand(pi_queue Queue, pi_enqueue_native_command_function Fn, + void *Data, pi_uint32 NumMems, const pi_mem *MemList, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, pi_event *Event) { + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + + auto UrQueue = reinterpret_cast(Queue); + auto UrFn = reinterpret_cast(Fn); + const ur_mem_handle_t *UrMemList = + reinterpret_cast(MemList); + const ur_event_handle_t *UrEventWaitList = + reinterpret_cast(EventWaitList); + ur_event_handle_t *UREvent = reinterpret_cast(Event); + + HANDLE_ERRORS(urEnqueueNativeCommandExp( + UrQueue, UrFn, Data, NumMems, UrMemList, nullptr /*pProperties*/, + NumEventsInWaitList, UrEventWaitList, UREvent)); + + return PI_SUCCESS; +} +// Enqueue Native Command Extension +/////////////////////////////////////////////////////////////////////////////// + } // namespace pi2ur diff --git a/sycl/plugins/unified_runtime/pi_unified_runtime.cpp b/sycl/plugins/unified_runtime/pi_unified_runtime.cpp index 7e268199bba77..59d6b27017eae 100644 --- a/sycl/plugins/unified_runtime/pi_unified_runtime.cpp +++ b/sycl/plugins/unified_runtime/pi_unified_runtime.cpp @@ -1447,6 +1447,16 @@ __SYCL_EXPORT pi_result piextSignalExternalSemaphore( EventWaitList, Event); } +pi_result +piextEnqueueNativeCommand(pi_queue Queue, pi_enqueue_native_command_function Fn, + void *Data, pi_uint32 NumMems, const pi_mem *Mems, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, pi_event *Event) { + return pi2ur::piextEnqueueNativeCommand(Queue, Fn, Data, NumMems, Mems, + NumEventsInWaitList, EventWaitList, + Event); +} + // This interface is not in Unified Runtime currently __SYCL_EXPORT pi_result piPluginInit(pi_plugin *PluginInit) { PI_ASSERT(PluginInit, PI_ERROR_INVALID_VALUE); diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 6ea0fc569bced..fd467a5b178db 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -317,9 +317,23 @@ static void flushCrossQueueDeps(const std::vector &EventImpls, } } +namespace { + +struct EnqueueNativeCommandData { + sycl::interop_handle ih; + std::function func; +}; + +void InteropFreeFunc(pi_queue InteropQueue, void *InteropData) { + auto *Data = reinterpret_cast(InteropData); + return Data->func(Data->ih); +} +} // namespace + class DispatchHostTask { ExecCGCommand *MThisCmd; std::vector MReqToMem; + std::vector MReqPiMem; bool waitForEvents() const { std::map> @@ -365,8 +379,10 @@ class DispatchHostTask { public: DispatchHostTask(ExecCGCommand *ThisCmd, - std::vector ReqToMem) - : MThisCmd{ThisCmd}, MReqToMem(std::move(ReqToMem)) {} + std::vector ReqToMem, + std::vector ReqPiMem) + : MThisCmd{ThisCmd}, MReqToMem(std::move(ReqToMem)), + MReqPiMem(std::move(ReqPiMem)) {} void operator()() const { assert(MThisCmd->getCG().getType() == CG::CGTYPE::CodeplayHostTask); @@ -402,8 +418,32 @@ class DispatchHostTask { interop_handle IH{MReqToMem, HostTask.MQueue, HostTask.MQueue->getDeviceImplPtr(), HostTask.MQueue->getContextImplPtr()}; - - HostTask.MHostTask->call(MThisCmd->MEvent->getHostProfilingInfo(), IH); + // TODO: should all the backends that support this entry point use this + // for host task? + auto &Queue = HostTask.MQueue; + bool NativeCommandSupport = false; + Queue->getPlugin()->call( + detail::getSyclObjImpl(Queue->get_device())->getHandleRef(), + PI_EXT_ONEAPI_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT, + sizeof(NativeCommandSupport), &NativeCommandSupport, nullptr); + if (NativeCommandSupport) { + EnqueueNativeCommandData CustomOpData{ + IH, HostTask.MHostTask->MInteropTask}; + + // We are assuming that we have already synchronized with the HT's + // dependent events, and that the user will synchronize before the end + // of the HT lambda. As such we don't pass in any events, or ask for + // one back. + // + // This entry point is needed in order to migrate memory across + // devices in the same context for CUDA and HIP backends + Queue->getPlugin()->call( + HostTask.MQueue->getHandleRef(), InteropFreeFunc, &CustomOpData, + MReqPiMem.size(), MReqPiMem.data(), 0, nullptr, nullptr); + } else { + HostTask.MHostTask->call(MThisCmd->MEvent->getHostProfilingInfo(), + IH); + } } else HostTask.MHostTask->call(MThisCmd->MEvent->getHostProfilingInfo()); } catch (...) { @@ -3121,13 +3161,14 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { } std::vector ReqToMem; + std::vector ReqPiMem; if (HostTask->MHostTask->isInteropTask()) { // Extract the Mem Objects for all Requirements, to ensure they are // available if a user asks for them inside the interop task scope const std::vector &HandlerReq = HostTask->getRequirements(); - auto ReqToMemConv = [&ReqToMem, HostTask](Requirement *Req) { + auto ReqToMemConv = [&ReqToMem, &ReqPiMem, HostTask](Requirement *Req) { const std::vector &AllocaCmds = Req->MSYCLMemObj->MRecord->MAllocaCommands; @@ -3137,6 +3178,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { auto MemArg = reinterpret_cast(AllocaCmd->getMemAllocation()); ReqToMem.emplace_back(std::make_pair(Req, MemArg)); + ReqPiMem.emplace_back(MemArg); return; } @@ -3158,7 +3200,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { copySubmissionCodeLocation(); MQueue->getThreadPool().submit( - DispatchHostTask(this, std::move(ReqToMem))); + DispatchHostTask(this, std::move(ReqToMem), std::move(ReqPiMem))); MShouldCompleteEventIfPossible = false; diff --git a/sycl/test-e2e/HostInteropTask/interop-task-cuda-buffer-migrate.cpp b/sycl/test-e2e/HostInteropTask/interop-task-cuda-buffer-migrate.cpp index b577fe16d458d..fee893b9f7529 100644 --- a/sycl/test-e2e/HostInteropTask/interop-task-cuda-buffer-migrate.cpp +++ b/sycl/test-e2e/HostInteropTask/interop-task-cuda-buffer-migrate.cpp @@ -1,7 +1,4 @@ // REQUIRES: cuda -// XFAIL: cuda -// -// FIXME: this is broken with a multi device context // // RUN: %{build} -o %t.out -lcuda // RUN: %{run} %t.out @@ -31,12 +28,6 @@ int main() { platform(gpu_selector_v).get_devices(info::device_type::gpu); std::cout << Devices.size() << " devices found" << std::endl; - if (Devices.size() == 1) { - // Since this is XFAIL for Devices.size() > 1 we need to return failure if - // test can't run - return 1; - } - context C(Devices); int Index = 0; diff --git a/sycl/test/abi/pi_cuda_symbol_check.dump b/sycl/test/abi/pi_cuda_symbol_check.dump index e6b19e97d1b87..4808f55c487cd 100644 --- a/sycl/test/abi/pi_cuda_symbol_check.dump +++ b/sycl/test/abi/pi_cuda_symbol_check.dump @@ -114,6 +114,7 @@ piextDisablePeerAccess piextEnablePeerAccess piextEnqueueCommandBuffer piextEnqueueCooperativeKernelLaunch +piextEnqueueNativeCommand piextEnqueueReadHostPipe piextEnqueueWriteHostPipe piextEventCreateWithNativeHandle diff --git a/sycl/test/abi/pi_hip_symbol_check.dump b/sycl/test/abi/pi_hip_symbol_check.dump index 530ad95722494..2c421ad2fec55 100644 --- a/sycl/test/abi/pi_hip_symbol_check.dump +++ b/sycl/test/abi/pi_hip_symbol_check.dump @@ -114,6 +114,7 @@ piextDisablePeerAccess piextEnablePeerAccess piextEnqueueCommandBuffer piextEnqueueCooperativeKernelLaunch +piextEnqueueNativeCommand piextEnqueueReadHostPipe piextEnqueueWriteHostPipe piextEventCreateWithNativeHandle diff --git a/sycl/test/abi/pi_level_zero_symbol_check.dump b/sycl/test/abi/pi_level_zero_symbol_check.dump index 93cd4c4de10bb..7e8fc7500f4a4 100644 --- a/sycl/test/abi/pi_level_zero_symbol_check.dump +++ b/sycl/test/abi/pi_level_zero_symbol_check.dump @@ -113,6 +113,7 @@ piextDisablePeerAccess piextEnablePeerAccess piextEnqueueCommandBuffer piextEnqueueCooperativeKernelLaunch +piextEnqueueNativeCommand piextEnqueueReadHostPipe piextEnqueueWriteHostPipe piextEventCreateWithNativeHandle diff --git a/sycl/test/abi/pi_nativecpu_symbol_check.dump b/sycl/test/abi/pi_nativecpu_symbol_check.dump index c63f579ca6b53..10f19aac80652 100644 --- a/sycl/test/abi/pi_nativecpu_symbol_check.dump +++ b/sycl/test/abi/pi_nativecpu_symbol_check.dump @@ -114,6 +114,7 @@ piextDisablePeerAccess piextEnablePeerAccess piextEnqueueCommandBuffer piextEnqueueCooperativeKernelLaunch +piextEnqueueNativeCommand piextEnqueueReadHostPipe piextEnqueueWriteHostPipe piextEventCreateWithNativeHandle diff --git a/sycl/test/abi/pi_opencl_symbol_check.dump b/sycl/test/abi/pi_opencl_symbol_check.dump index 8807d1647ebdc..c0e1b76c4d04f 100644 --- a/sycl/test/abi/pi_opencl_symbol_check.dump +++ b/sycl/test/abi/pi_opencl_symbol_check.dump @@ -113,6 +113,7 @@ piextDisablePeerAccess piextEnablePeerAccess piextEnqueueCommandBuffer piextEnqueueCooperativeKernelLaunch +piextEnqueueNativeCommand piextEnqueueReadHostPipe piextEnqueueWriteHostPipe piextEventCreateWithNativeHandle diff --git a/sycl/unittests/helpers/PiMockPlugin.hpp b/sycl/unittests/helpers/PiMockPlugin.hpp index b7fea5aae4ff9..4e41a88c14544 100644 --- a/sycl/unittests/helpers/PiMockPlugin.hpp +++ b/sycl/unittests/helpers/PiMockPlugin.hpp @@ -1569,6 +1569,14 @@ inline pi_result mock_piextCommandBufferAdviseUSM( return PI_SUCCESS; } +inline pi_result mock_piextEnqueueNativeCommand(pi_queue, + void (*)(pi_queue, void *), + void *, uint32_t, + const pi_mem *, pi_uint32, + const pi_event *, pi_event *) { + return PI_SUCCESS; +} + inline pi_result mock_piTearDown(void *PluginParameter) { return PI_SUCCESS; } inline pi_result mock_piPluginGetLastError(char **message) {