Merge branch 'sycl' into fp-model

intel · Sep 23, 2024 · dd64f97 · dd64f97
2 parents cfa3a25 + 82e08b5
commit dd64f97
Show file tree

Hide file tree

Showing 112 changed files with 1,290 additions and 1,205 deletions.
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12486,9 +12486,8 @@ def err_sycl_function_attribute_mismatch : Error<
 def err_sycl_x_y_z_arguments_must_be_one : Error<
   "all %0 attribute arguments must be '1' when the %1 attribute argument is '0'">;
 def err_sycl_attribute_internal_decl
-    : Error<"%0 attribute cannot be applied to a "
-            "static %select{function|variable}1 or %select{function|variable}1 "
-            "in an anonymous namespace">;
+    : Error<"%0 attribute cannot be applied to a %select{function|variable}1"
+            " without external linkage">;
 def err_sycl_attribute_not_device_global
     : Error<"%0 attribute can only be applied to 'device_global' variables">;
 def err_fpga_attribute_incorrect_variable

diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -938,8 +938,17 @@ void CudaToolChain::addClangTargetOptions(
           DeviceOffloadingKind == Action::OFK_Cuda) &&
          "Only OpenMP, SYCL or CUDA offloading kinds are supported for NVIDIA GPUs.");
 
-  CC1Args.append(
-      {"-fcuda-is-device", "-mllvm", "-enable-memcpyopt-without-libcalls"});
+  // If we are compiling SYCL kernels for Nvidia GPUs, we do not support Cuda
+  // device code compatability, hence we do not set Cuda mode in that instance.
+  if (DeviceOffloadingKind == Action::OFK_SYCL) {
+    toolchains::SYCLToolChain::AddSYCLIncludeArgs(getDriver(), DriverArgs,
+                                                  CC1Args);
+
+    if (DriverArgs.hasArg(options::OPT_fsycl_fp32_prec_sqrt))
+      CC1Args.push_back("-fcuda-prec-sqrt");
+  } else {
+    CC1Args.append(
+        {"-fcuda-is-device", "-mllvm", "-enable-memcpyopt-without-libcalls"});
 
     // Unsized function arguments used for variadics were introduced in CUDA-9.0
     // We still do not support generating code that actually uses variadic
@@ -948,18 +957,10 @@ void CudaToolChain::addClangTargetOptions(
     if (CudaInstallation.version() >= CudaVersion::CUDA_90)
       CC1Args.push_back("-fcuda-allow-variadic-functions");
 
-    if (DriverArgs.hasArg(options::OPT_fsycl)) {
-      // Add these flags for .cu SYCL compilation.
+    // Add these flags for .cu SYCL compilation.
+    if (DeviceOffloadingKind == Action::OFK_Cuda &&
+        DriverArgs.hasArg(options::OPT_fsycl))
       CC1Args.append({"-std=c++17", "-fsycl-is-host"});
-    }
-
-  if (DeviceOffloadingKind == Action::OFK_SYCL) {
-    toolchains::SYCLToolChain::AddSYCLIncludeArgs(getDriver(), DriverArgs,
-                                                  CC1Args);
-
-    if (DriverArgs.hasArg(options::OPT_fsycl_fp32_prec_sqrt)) {
-      CC1Args.push_back("-fcuda-prec-sqrt");
-    }
   }
 
   auto NoLibSpirv = DriverArgs.hasArg(options::OPT_fno_sycl_libspirv) ||

diff --git a/clang/test/Driver/sycl.c b/clang/test/Driver/sycl.c
@@ -1,6 +1,3 @@
-// Failing on Windows - temporarily disable
-// REQUIRES: system-linux
-
 // RUN: %clang -### -fsycl -c %s 2>&1 | FileCheck %s --check-prefix=ENABLED
 // RUN: %clang -### -fsycl %s 2>&1 | FileCheck %s --check-prefix=ENABLED
 // RUN: %clang -### -fno-sycl -fsycl %s 2>&1 | FileCheck %s --check-prefix=ENABLED
@@ -107,7 +104,7 @@
 // Test with a bad argument is expected to fail
 // RUN: not %clang -fsycl-help=foo %s 2>&1 | FileCheck %s --check-prefix=SYCL-HELP-BADARG
 // RUN: %clang -### -fsycl-help=gen %s 2>&1 | FileCheck %s --check-prefix=SYCL-HELP-GEN
-// RUN: env PATH=%t-sycl-dir %clang -### -fsycl-help=fpga %s 2>&1 | FileCheck %s --check-prefixes=SYCL-HELP-FPGA,SYCL-HELP-FPGA-OUT -DDIR=%t-sycl-dir
+// RUN: env "PATH=%t-sycl-dir%{pathsep}%PATH%" %clang -### -fsycl-help=fpga %s 2>&1 | FileCheck %s --check-prefixes=SYCL-HELP-FPGA,SYCL-HELP-FPGA-OUT -DDIR=%t-sycl-dir
 // RUN: %clang -### -fsycl-help=x86_64 %s 2>&1 | FileCheck %s --check-prefix=SYCL-HELP-CPU
 // RUN: %clang -### -fsycl-help %s 2>&1 | FileCheck %s --check-prefixes=SYCL-HELP-GEN,SYCL-HELP-FPGA,SYCL-HELP-CPU
 // SYCL-HELP-BADARG: unsupported argument 'foo' to option '-fsycl-help='

diff --git a/clang/test/Preprocessor/sycl-macro.cpp b/clang/test/Preprocessor/sycl-macro.cpp
@@ -6,7 +6,10 @@
 // RUNx: %clang_cc1 %s -fsycl-id-queries-fit-in-int -fsycl-is-device -E -dM -fms-compatibility | FileCheck --check-prefix=CHECK-MSVC %s
 // RUN: %clang_cc1 -fno-sycl-id-queries-fit-in-int %s -E -dM | FileCheck \
 // RUN: --check-prefix=CHECK-NO-SYCL_FIT_IN_INT %s
-// RUN: %clang_cc1 %s  -triple nvptx64-nvidia-cuda -target-cpu sm_80 -fsycl-is-device -E -dM | FileCheck --check-prefix=CHECK-CUDA %s
+// RUN: %clang_cc1 %s  -triple nvptx64-nvidia-cuda -target-cpu sm_80 -fsycl-is-device -E -dM | FileCheck \
+// RUN: --check-prefix=CHECK-CUDA %s -DARCH_CODE=800
+// RUN: %clangxx %s -fsycl -nocudalib -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --offload-arch=sm_80 -E -dM | FileCheck \
+// RUN: --check-prefix=CHECK-CUDA-SYCL-DRIVER %s
 // RUN: %clang_cc1 %s  -triple amdgcn-amd-amdhsa -target-cpu gfx906 -fsycl-is-device -E -dM | FileCheck --check-prefix=CHECK-HIP %s
 
 // RUN: %clang_cc1 %s  -triple nvptx64-nvidia-cuda -target-cpu sm_90a -fsycl-is-device -E -dM | FileCheck --check-prefix=CHECK-CUDA-FEATURE %s
@@ -32,8 +35,10 @@
 // CHECK-NO-SYCL_FIT_IN_INT-NOT:#define __SYCL_ID_QUERIES_FIT_IN_INT__ 1
 // CHECK-SYCL-ID:#define __SYCL_ID_QUERIES_FIT_IN_INT__ 1
 
-// CHECK-CUDA:#define __SYCL_CUDA_ARCH__ 800
-// CHECK-CUDA-NOT:#define __CUDA_ARCH__ 800
+// CHECK-CUDA:#define __SYCL_CUDA_ARCH__ [[ARCH_CODE]]
+// CHECK-CUDA-NOT:#define __CUDA_ARCH__ {{[0-9]+}}
+
+// CHECK-CUDA-SYCL-DRIVER-NOT: #define __CUDA_ARCH__ {{[0-9]+}}
 
 // CHECK-HIP:#define __CUDA_ARCH__ 0
 

diff --git a/clang/test/SemaSYCL/device-indirectly-callable-attr.cpp b/clang/test/SemaSYCL/device-indirectly-callable-attr.cpp
@@ -12,16 +12,21 @@ int N;
 void
 bar() {}
 
-[[intel::device_indirectly_callable]] // expected-error {{'device_indirectly_callable' attribute cannot be applied to a static function or function in an anonymous namespace}}
+[[intel::device_indirectly_callable]] // expected-error {{'device_indirectly_callable' attribute cannot be applied to a function without external linkage}}
 static void
 func1() {}
 
 namespace {
-[[intel::device_indirectly_callable]] // expected-error {{'device_indirectly_callable' attribute cannot be applied to a static function or function in an anonymous namespace}}
+[[intel::device_indirectly_callable]] // expected-error {{'device_indirectly_callable' attribute cannot be applied to a function without external linkage}}
 void
 func2() {}
+
+struct UnnX {};
 }
 
+[[intel::device_indirectly_callable]] // expected-error {{'device_indirectly_callable' attribute cannot be applied to a function without external linkage}}
+void func4(UnnX) {}
+
 class A {
   [[intel::device_indirectly_callable]] A() {}
 

diff --git a/clang/test/SemaSYCL/device_global_external.cpp b/clang/test/SemaSYCL/device_global_external.cpp
@@ -5,7 +5,7 @@
 using namespace sycl::ext::oneapi;
 
 SYCL_EXTERNAL device_global<int> glob;
-// expected-error@+1{{'sycl_device' attribute cannot be applied to a static variable or variable in an anonymous namespace}}
+// expected-error@+1{{'sycl_device' attribute cannot be applied to a variable without external linkage}}
 SYCL_EXTERNAL static device_global<float> static_glob;
 
 namespace foo {
@@ -20,10 +20,15 @@ struct RandomStruct {
 SYCL_EXTERNAL RandomStruct S;
 
 namespace {
-// expected-error@+1{{'sycl_device' attribute cannot be applied to a static variable or variable in an anonymous namespace}}
+// expected-error@+1{{'sycl_device' attribute cannot be applied to a variable without external linkage}}
 SYCL_EXTERNAL device_global<int> same_name;
+
+struct UnnX {};
 } // namespace
 
+// expected-error@+1{{'sycl_device' attribute cannot be applied to a variable without external linkage}}
+SYCL_EXTERNAL device_global<UnnX> dg_x;
+
 // expected-error@+1{{'sycl_device' attribute can only be applied to 'device_global' variables}}
 SYCL_EXTERNAL int AAA;
 

diff --git a/clang/test/SemaSYCL/sycl-device.cpp b/clang/test/SemaSYCL/sycl-device.cpp
@@ -12,14 +12,19 @@ int N;
 __attribute__((sycl_device(3))) // expected-error {{'sycl_device' attribute takes no arguments}}
 void bar() {}
 
-__attribute__((sycl_device)) // expected-error {{'sycl_device' attribute cannot be applied to a static function or function in an anonymous namespace}}
+__attribute__((sycl_device)) // expected-error {{'sycl_device' attribute cannot be applied to a function without external linkage}}
 static void func1() {}
 
 namespace {
-  __attribute__((sycl_device)) // expected-error {{'sycl_device' attribute cannot be applied to a static function or function in an anonymous namespace}}
+  __attribute__((sycl_device)) // expected-error {{'sycl_device' attribute cannot be applied to a function without external linkage}}
   void func2() {}
+
+  struct UnnX {};
 }
 
+__attribute__((sycl_device)) // expected-error {{'sycl_device' attribute cannot be applied to a function without external linkage}}
+  void func4(UnnX) {}
+
 class A {
   __attribute__((sycl_device))
   A() {}

diff --git a/opencl/CMakeLists.txt b/opencl/CMakeLists.txt
@@ -20,8 +20,8 @@ set(OCL_LOADER_REPO
 
 # Repo tags/hashes
 
-set(OCL_HEADERS_TAG 9ddb236e6eb3cf844f9e2f81677e1045f9bf838e)
-set(OCL_LOADER_TAG 9a3e962f16f5097d2054233ad8b6dad51b6f41b7)
+set(OCL_HEADERS_TAG 542d7a8f65ecfd88b38de35d8b10aa67b36b33b2)
+set(OCL_LOADER_TAG 3d27d7ca04d29fabe608a2372ce693601bcc4e81)
 
 # OpenCL Headers
 if(NOT OpenCL_HEADERS)

diff --git a/sycl/cmake/modules/FetchUnifiedRuntime.cmake b/sycl/cmake/modules/FetchUnifiedRuntime.cmake
@@ -117,13 +117,13 @@ if(SYCL_UR_USE_FETCH_CONTENT)
   endfunction()
 
   set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
-  # commit 185149248dd257bd37482aac43307a136204c051
-  # Merge: 2af159d4 d619bcd1
+  # commit 9ca3ec7a9c1d2f4a362d7e5add103b30271a8a55
+  # Merge: 7384e2d7 59e5e405
   # Author: Piotr Balcer <piotr.balcer@intel.com>
-  # Date:   Thu Sep 19 11:02:27 2024 +0200
-  #    Merge pull request #1934 from yingcong-wu/yc/0806-exclude-shadow-from-coredump
-  #    [DeviceSanitizer] Exclude shadow memory from coredump file for CPU device.
-  set(UNIFIED_RUNTIME_TAG 185149248dd257bd37482aac43307a136204c051)
+  # Date:   Mon Sep 23 10:58:51 2024 +0200
+  #   Merge pull request #2113 from oneapi-src/revert-1698-counter-based-2 
+  #   Revert "[L0] Phase 2 of Counter-Based Event Implementation"
+  set(UNIFIED_RUNTIME_TAG 9ca3ec7a9c1d2f4a362d7e5add103b30271a8a55)
 
   set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
   # Due to the use of dependentloadflag and no installer for UMF and hwloc we need

diff --git a/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc
@@ -401,9 +401,14 @@ of the link:sycl_ext_intel_matrix.asciidoc[sycl_ext_intel_matrix]
 
 Besides the `Group` and the `joint_matrix` arguments,
 `joint_matrix_apply` takes a C++ Callable object which is invoked once
-for each element of the matrix. This callable object must be invocable
-with a single parameter of type `T&`. Commonly, applications pass a
-lambda expression.
+for each element of the matrix. There are two cases: (1) one matrix is
+passed, (2) two matrices are passed.
+
+===== Unary Operation
+In this case, `joint_matrix_apply` takes one `joint_matrix`
+argument. The callable object must be invocable with a single
+parameter of type `T&`. Commonly, applications pass a lambda
+expression.
 
 ```c++
 namespace sycl::ext::oneapi::experimental::matrix {
@@ -427,6 +432,39 @@ joint_matrix_apply(sg, C, [=](T &x) {
 });
 ```
 
+===== Binary Operation
+In this case, `joint_matrix_apply` takes two `joint_matrix` arguments:
+`jm0` and `jm1` that have the same `use`, number of rows, number of
+columns, and `layout`. `jm0` and `jm1` can be read-only, write-only,
+or read and write arguments. The callable object must be invocable
+with two parameters `x` and `y` of types `T0&` amd `T1&`, where `x` is
+an element from `jm0` and `y` is an element from `jm1`. Moreover, `x`
+and `y` are guaranteed to have identical coordinates in their
+respective matrices. Commonly, applications pass a lambda expression.
+
+```c++
+namespace sycl::ext::oneapi::experimental::matrix {
+
+template<typename Group, typename T0, typename T1, use Use,
+  size_t Rows, size_t Cols, layout Layout, typename F>
+void joint_matrix_apply(Group g,
+          joint_matrix<Group, T0, Use, Rows, Cols, Layout>& jm0,
+          joint_matrix<Group, T1, Use, Rows, Cols, Layout>& jm1,
+	  F&& func);
+
+} // namespace sycl::ext::oneapi::experimental::matrix
+```
+
+In the following example, every element `x` of the matrix `C` is
+multiplied by `alpha`. The result is returned into the element `y` of
+the matrix `D`.
+
+```c++
+joint_matrix_apply(sg, C, D, [=](const T &x, T &y) {
+    y = x * alpha;
+});
+```
+
 ==== Prefetch
 
 ```c++

diff --git a/sycl/include/sycl/detail/ur.hpp b/sycl/include/sycl/detail/ur.hpp
@@ -103,8 +103,8 @@ __SYCL_EXPORT void contextSetExtendedDeleter(const sycl::context &constext,
                                              void *user_data);
 }
 
-class plugin;
-using PluginPtr = std::shared_ptr<plugin>;
+class Adapter;
+using AdapterPtr = std::shared_ptr<Adapter>;
 
 // TODO: To be removed as this was only introduced for esimd which was removed.
 template <sycl::backend BE>
@@ -126,11 +126,11 @@ void *getOsLibraryFuncAddress(void *Library, const std::string &FunctionName);
 void *getURLoaderLibrary();
 
 // Performs UR one-time initialization.
-std::vector<PluginPtr> &
+std::vector<AdapterPtr> &
 initializeUr(ur_loader_config_handle_t LoaderConfig = nullptr);
 
-// Get the plugin serving given backend.
-template <backend BE> const PluginPtr &getPlugin();
+// Get the adapter serving given backend.
+template <backend BE> const AdapterPtr &getAdapter();
 
 // The SYCL_UR_TRACE sets what we will trace.
 // This is a bit-mask of various things we'd want to trace.

diff --git a/sycl/include/sycl/ext/oneapi/experimental/address_cast.hpp b/sycl/include/sycl/ext/oneapi/experimental/address_cast.hpp
@@ -27,14 +27,10 @@ multi_ptr<ElementType, Space, access::decorated::no>
 static_address_cast(ElementType *Ptr) {
   using ret_ty = multi_ptr<ElementType, Space, access::decorated::no>;
 #ifdef __SYCL_DEVICE_ONLY__
-  // TODO: Remove this restriction.
   static_assert(std::is_same_v<ElementType, remove_decoration_t<ElementType>>,
-                "The extension expect undecorated raw pointers only!");
+                "The extension expects undecorated raw pointers only!");
   if constexpr (Space == generic_space) {
     // Undecorated raw pointer is in generic AS already, no extra casts needed.
-    // Note for future, for `OpPtrCastToGeneric`, `Pointer` must point to one of
-    // `Storage Classes` that doesn't include `Generic`, so this will have to
-    // remain a special case even if the restriction above is lifted.
     return ret_ty(Ptr);
   } else {
     auto CastPtr = sycl::detail::spirv::GenericCastToPtr<Space>(Ptr);
@@ -60,9 +56,8 @@ multi_ptr<ElementType, Space, access::decorated::no>
 dynamic_address_cast(ElementType *Ptr) {
   using ret_ty = multi_ptr<ElementType, Space, access::decorated::no>;
 #ifdef __SYCL_DEVICE_ONLY__
-  // TODO: Remove this restriction.
   static_assert(std::is_same_v<ElementType, remove_decoration_t<ElementType>>,
-                "The extension expect undecorated raw pointers only!");
+                "The extension expects undecorated raw pointers only!");
   if constexpr (Space == generic_space) {
     return ret_ty(Ptr);
   } else {

diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp
@@ -768,16 +768,16 @@ class __SYCL_EXPORT handler {
       int ArgIndex);
 
   /* The kernel passed to StoreLambda can take an id, an item or an nd_item as
-   * its argument. Since esimd plugin directly invokes the kernel (doesn’t use
-   * piKernelSetArg), the kernel argument type must be known to the plugin.
-   * However, passing kernel argument type to the plugin requires changing ABI
+   * its argument. Since esimd adapter directly invokes the kernel (doesn’t use
+   * urKernelSetArg), the kernel argument type must be known to the adapter.
+   * However, passing kernel argument type to the adapter requires changing ABI
    * in HostKernel class. To overcome this problem, helpers below wrap the
    * “original” kernel with a functor that always takes an nd_item as argument.
    * A functor is used instead of a lambda because extractArgsAndReqsFromLambda
    * needs access to the “original” kernel and keeps references to its internal
    * data, i.e. the kernel passed as argument cannot be local in scope. The
    * functor itself is again encapsulated in a std::function since functor’s
-   * type is unknown to the plugin.
+   * type is unknown to the adapter.
    */
 
   // For 'id, item w/wo offset, nd_item' kernel arguments