Merge remote-tracking branch 'origin/sycl' into convert-to-ocl-helper

intel · Feb 9, 2024 · f1444d8 · f1444d8
2 parents 4708c42 + 73d3473
commit f1444d8
Show file tree

Hide file tree

Showing 80 changed files with 474 additions and 287 deletions.
diff --git a/sycl/include/sycl/builtins_preview.hpp b/sycl/include/sycl/builtins_preview.hpp
@@ -137,7 +137,14 @@ auto builtin_marray_impl(FuncTy F, const Ts &...x) {
   marray<ret_elem_type, T::size()> Res;
   constexpr auto N = T::size();
   for (size_t I = 0; I < N / 2; ++I) {
-    auto PartialRes = F(to_vec2(x, I * 2)...);
+    auto PartialRes = [&]() {
+      using elem_ty = get_elem_type_t<T>;
+      if constexpr (std::is_integral_v<elem_ty>)
+        return F(to_vec2(x, I * 2)
+                     .template as<vec<get_fixed_sized_int_t<elem_ty>, 2>>()...);
+      else
+        return F(to_vec2(x, I * 2)...);
+    }();
     std::memcpy(&Res[I * 2], &PartialRes, sizeof(decltype(PartialRes)));
   }
   if (N % 2)

diff --git a/sycl/include/sycl/builtins_utils_scalar.hpp b/sycl/include/sycl/builtins_utils_scalar.hpp
@@ -128,6 +128,17 @@ template <size_t Size> struct get_unsigned_int_by_size {
 template <typename T> struct same_size_unsigned_int {
   using type = typename get_unsigned_int_by_size<sizeof(T)>::type;
 };
+template <typename T>
+using same_size_unsigned_int_t = typename same_size_unsigned_int<T>::type;
+
+template <typename T> struct get_fixed_sized_int {
+  static_assert(std::is_integral_v<T>);
+  using type =
+      std::conditional_t<std::is_signed_v<T>, same_size_signed_int_t<T>,
+                         same_size_unsigned_int_t<T>>;
+};
+template <typename T>
+using get_fixed_sized_int_t = typename get_fixed_sized_int<T>::type;
 
 // Utility trait for getting an upsampled integer type.
 // NOTE: For upsampling we look for an integer of double the size of the

diff --git a/sycl/include/sycl/detail/builtins/helper_macros.hpp b/sycl/include/sycl/detail/builtins/helper_macros.hpp
@@ -48,6 +48,11 @@
   FOR_EACH4_A6(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, ARG3,    \
                ARG4, ARG5, ARG6)                                               \
   BASE_CASE(FIXED1, FIXED2, FIXED3, FIXED4, ARG7)
+#define FOR_EACH4_A8(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2,    \
+                     ARG3, ARG4, ARG5, ARG6, ARG7, ARG8)                       \
+  FOR_EACH4_A7(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, ARG3,    \
+               ARG4, ARG5, ARG6, ARG7)                                         \
+  BASE_CASE(FIXED1, FIXED2, FIXED3, FIXED4, ARG8)
 #define FOR_EACH4_A11(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2,   \
                       ARG3, ARG4, ARG5, ARG6, ARG7, ARG8, ARG9, ARG10, ARG11)  \
   FOR_EACH4_A7(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, ARG3,    \
@@ -169,6 +174,9 @@
   unsigned char, unsigned short, unsigned int, unsigned long, unsigned long long
 // 11 types
 #define INTEGER_TYPES SIGNED_TYPES, UNSIGNED_TYPES
+// 8 types
+#define FIXED_WIDTH_INTEGER_TYPES                                              \
+  int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t
 
 #define DEVICE_IMPL_TEMPLATE_CUSTOM_DELEGATE(                                  \
     NUM_ARGS, NAME, ENABLER, DELEGATOR, NS, /*SCALAR_VEC_IMPL*/...)            \

diff --git a/sycl/include/sycl/detail/builtins/integer_functions.inc b/sycl/include/sycl/detail/builtins/integer_functions.inc
@@ -16,9 +16,10 @@ namespace detail {
 template <typename T>
 struct integer_elem_type
     : std::bool_constant<
-          check_type_in_v<get_elem_type_t<T>, char, signed char, short, int,
-                          long, long long, unsigned char, unsigned short,
-                          unsigned int, unsigned long, unsigned long long>> {};
+          (is_vec_or_swizzle_v<T> &&
+           check_type_in_v<get_elem_type_t<T>, FIXED_WIDTH_INTEGER_TYPES>) ||
+          (!is_vec_or_swizzle_v<T> &&
+           check_type_in_v<get_elem_type_t<T>, INTEGER_TYPES>)> {};
 template <typename T>
 struct suint32_elem_type
     : std::bool_constant<

diff --git a/sycl/include/sycl/detail/builtins/relational_functions.inc b/sycl/include/sycl/detail/builtins/relational_functions.inc
@@ -15,10 +15,12 @@ inline namespace _V1 {
 namespace detail {
 template <typename T>
 struct bitselect_elem_type
-    : std::bool_constant<check_type_in_v<
-          get_elem_type_t<T>, float, double, half, char, signed char, short,
-          int, long, long long, unsigned char, unsigned short, unsigned int,
-          unsigned long, unsigned long long>> {};
+    : std::bool_constant<
+          check_type_in_v<get_elem_type_t<T>, FP_TYPES> ||
+          (is_vec_or_swizzle_v<T> &&
+           check_type_in_v<get_elem_type_t<T>, FIXED_WIDTH_INTEGER_TYPES>) ||
+          (!is_vec_or_swizzle_v<T> &&
+           check_type_in_v<get_elem_type_t<T>, INTEGER_TYPES>)> {};
 
 template <typename T>
 struct rel_ret_traits

diff --git a/sycl/include/sycl/kernel_bundle.hpp b/sycl/include/sycl/kernel_bundle.hpp
@@ -205,6 +205,8 @@ class __SYCL_EXPORT kernel_bundle_plain {
   void get_specialization_constant_impl(const char *SpecName,
                                         void *Value) const noexcept;
 
+  // \returns a bool value which indicates if specialization constant was set to
+  // a value different from default value.
   bool is_specialization_constant_set(const char *SpecName) const noexcept;
 
   detail::KernelBundleImplPtr impl;

diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt
@@ -57,13 +57,13 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
   include(FetchContent)
 
   set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
-  # commit 9babc4d092a92c1036791d26ef328e5eeaf19803
-  # Merge: 3be8f205 90498ec5
+  # commit 47102cb2b275472054803a9399b5b977ef210b23
+  # Merge: 186bfb9d 2390664d
   # Author: aarongreig <aaron.greig@codeplay.com>
-  # Date:   Thu Feb 8 15:44:54 2024 +0000
-  #     Merge pull request #1321 from pbalcer/adapter-compute-constructor
-  #     [L0] move adapter init into its constructor from urAdapterGet
-  set(UNIFIED_RUNTIME_TAG 9babc4d092a92c1036791d26ef328e5eeaf19803)
+  # Date:   Fri Feb 9 12:21:47 2024 +0000
+  #     Merge pull request #1328 from Bensuo/maxime/events-reset-bugfix
+  #     [EXP][CMDBUF] Reset events for multiple submissions (bugfix)
+  set(UNIFIED_RUNTIME_TAG 47102cb2b275472054803a9399b5b977ef210b23)
 
   if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO)
     set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}")

diff --git a/sycl/source/builtins/host_helper_macros.hpp b/sycl/source/builtins/host_helper_macros.hpp
@@ -56,6 +56,9 @@
 #define EXPORT_VEC(NUM_ARGS, NAME, TYPE, VL)                                   \
   EXPORT_VEC_NS(NUM_ARGS, NAME, sycl, TYPE, VL)
 
+#define EXPORT_VEC_1_16_IMPL(NUM_ARGS, NAME, NS, TYPE)                         \
+  FOR_VEC_1_16(EXPORT_VEC_NS, NUM_ARGS, NAME, NS, TYPE)
+
 #define EXPORT_SCALAR_AND_VEC_1_16_IMPL(NUM_ARGS, NAME, NS, TYPE)              \
   EXPORT_SCALAR_NS(NUM_ARGS, NAME, NS, TYPE)                                   \
   FOR_VEC_1_16(EXPORT_VEC_NS, NUM_ARGS, NAME, NS, TYPE)
@@ -69,8 +72,12 @@
 
 #define EXPORT_SCALAR_AND_VEC_1_16_NS(NUM_ARGS, NAME, NS, ...)                 \
   FOR_EACH3(EXPORT_SCALAR_AND_VEC_1_16_IMPL, NUM_ARGS, NAME, NS, __VA_ARGS__)
+#define EXPORT_VEC_1_16_NS(NUM_ARGS, NAME, NS, ...)                            \
+  FOR_EACH3(EXPORT_VEC_1_16_IMPL, NUM_ARGS, NAME, NS, __VA_ARGS__)
 #define EXPORT_SCALAR_AND_VEC_1_16(NUM_ARGS, NAME, ...)                        \
   EXPORT_SCALAR_AND_VEC_1_16_NS(NUM_ARGS, NAME, sycl, __VA_ARGS__)
+#define EXPORT_VEC_1_16(NUM_ARGS, NAME, ...)                                   \
+  EXPORT_VEC_1_16_NS(NUM_ARGS, NAME, sycl, __VA_ARGS__)
 
 #define EXPORT_SCALAR_AND_VEC_2_4(NUM_ARGS, NAME, ...)                         \
   FOR_EACH2(EXPORT_SCALAR_AND_VEC_2_4_IMPL, NUM_ARGS, NAME, __VA_ARGS__)

diff --git a/sycl/source/builtins/integer_functions.cpp b/sycl/source/builtins/integer_functions.cpp
@@ -76,7 +76,8 @@ namespace sycl {
 inline namespace _V1 {
 #define BUILTIN_GENINT(NUM_ARGS, NAME, IMPL)                                   \
   HOST_IMPL(NAME, IMPL)                                                        \
-  EXPORT_SCALAR_AND_VEC_1_16(NUM_ARGS, NAME, INTEGER_TYPES)
+  FOR_EACH2(EXPORT_SCALAR, NUM_ARGS, NAME, INTEGER_TYPES)                      \
+  EXPORT_VEC_1_16(NUM_ARGS, NAME, FIXED_WIDTH_INTEGER_TYPES)
 #define BUILTIN_GENINT_SU(NUM_ARGS, NAME, IMPL)                                \
   BUILTIN_GENINT(NUM_ARGS, NAME, IMPL)
 

diff --git a/sycl/source/builtins/relational_functions.cpp b/sycl/source/builtins/relational_functions.cpp
@@ -103,6 +103,7 @@ HOST_IMPL(bitselect, [](auto x, auto y, auto z) {
   assert((ures & std::numeric_limits<utype>::max()) == ures);
   return bit_cast<T0>(static_cast<utype>(ures));
 })
-EXPORT_SCALAR_AND_VEC_1_16(THREE_ARGS, bitselect, INTEGER_TYPES, FP_TYPES)
+FOR_EACH2(EXPORT_SCALAR, THREE_ARGS, bitselect, INTEGER_TYPES, FP_TYPES)
+EXPORT_VEC_1_16(THREE_ARGS, bitselect, FIXED_WIDTH_INTEGER_TYPES, FP_TYPES)
 } // namespace _V1
 } // namespace sycl
diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp
@@ -50,6 +50,8 @@ class device_image_impl {
     unsigned int CompositeOffset = 0;
     unsigned int Size = 0;
     unsigned int BlobOffset = 0;
+    // Indicates if the specialization constant was set to a value which is
+    // different from the default value.
     bool IsSet = false;
   };
 
@@ -61,7 +63,8 @@ class device_image_impl {
                     sycl::detail::pi::PiProgram Program)
       : MBinImage(BinImage), MContext(std::move(Context)),
         MDevices(std::move(Devices)), MState(State), MProgram(Program),
-        MKernelIDs(std::move(KernelIDs)) {
+        MKernelIDs(std::move(KernelIDs)),
+        MSpecConstsDefValBlob(getSpecConstsDefValBlob()) {
     updateSpecConstSymMap();
   }
 
@@ -74,6 +77,7 @@ class device_image_impl {
       : MBinImage(BinImage), MContext(std::move(Context)),
         MDevices(std::move(Devices)), MState(State), MProgram(Program),
         MKernelIDs(std::move(KernelIDs)), MSpecConstsBlob(SpecConstsBlob),
+        MSpecConstsDefValBlob(getSpecConstsDefValBlob()),
         MSpecConstSymMap(SpecConstMap) {}
 
   bool has_kernel(const kernel_id &KernelIDCand) const noexcept {
@@ -152,6 +156,21 @@ class device_image_impl {
     std::vector<SpecConstDescT> &Descs =
         MSpecConstSymMap[std::string{SpecName}];
     for (SpecConstDescT &Desc : Descs) {
+      // If there is a default value of the specialization constant and it is
+      // the same as the value which is being set then do nothing, runtime is
+      // going to handle this case just like if only the default value of the
+      // specialization constant was provided.
+      if (MSpecConstsDefValBlob.size() &&
+          (std::memcmp(MSpecConstsDefValBlob.begin() + Desc.BlobOffset,
+                       static_cast<const char *>(Value) + Desc.CompositeOffset,
+                       Desc.Size) == 0)) {
+        // Now we have default value, so reset to false.
+        Desc.IsSet = false;
+        continue;
+      }
+
+      // Value of the specialization constant is set to a value which is
+      // different from the default value.
       Desc.IsSet = true;
       std::memcpy(MSpecConstsBlob.data() + Desc.BlobOffset,
                   static_cast<const char *>(Value) + Desc.CompositeOffset,
@@ -161,19 +180,20 @@ class device_image_impl {
 
   void get_specialization_constant_raw_value(const char *SpecName,
                                              void *ValueRet) const noexcept {
-    assert(is_specialization_constant_set(SpecName));
+    bool IsSet = is_specialization_constant_set(SpecName);
     // Lock the mutex to prevent when one thread in the middle of writing a
     // new value while another thread is reading the value to pass it to
     // JIT compiler.
     const std::lock_guard<std::mutex> SpecConstLock(MSpecConstAccessMtx);
-
+    assert(IsSet || MSpecConstsDefValBlob.size());
     // operator[] can't be used here, since it's not marked as const
     const std::vector<SpecConstDescT> &Descs =
         MSpecConstSymMap.at(std::string{SpecName});
     for (const SpecConstDescT &Desc : Descs) {
-
+      auto Blob =
+          IsSet ? MSpecConstsBlob.data() : MSpecConstsDefValBlob.begin();
       std::memcpy(static_cast<char *>(ValueRet) + Desc.CompositeOffset,
-                  MSpecConstsBlob.data() + Desc.BlobOffset, Desc.Size);
+                  Blob + Desc.BlobOffset, Desc.Size);
     }
   }
 
@@ -293,16 +313,30 @@ class device_image_impl {
   }
 
 private:
+  // Get the specialization constant default value blob.
+  ByteArray getSpecConstsDefValBlob() const {
+    if (!MBinImage)
+      return ByteArray(nullptr, 0);
+
+    // Get default values for specialization constants.
+    const RTDeviceBinaryImage::PropertyRange &SCDefValRange =
+        MBinImage->getSpecConstantsDefaultValues();
+    if (!SCDefValRange.size())
+      return ByteArray(nullptr, 0);
+
+    ByteArray DefValDescriptors =
+        DeviceBinaryProperty(*SCDefValRange.begin()).asByteArray();
+    // First 8 bytes are consumed by the size of the property.
+    DefValDescriptors.dropBytes(8);
+    return DefValDescriptors;
+  }
+
   void updateSpecConstSymMap() {
     if (MBinImage) {
       const RTDeviceBinaryImage::PropertyRange &SCRange =
           MBinImage->getSpecConstants();
       using SCItTy = RTDeviceBinaryImage::PropertyRange::ConstIterator;
 
-      // get default values for specialization constants
-      const RTDeviceBinaryImage::PropertyRange &SCDefValRange =
-          MBinImage->getSpecConstantsDefaultValues();
-
       // This variable is used to calculate spec constant value offset in a
       // flat byte array.
       unsigned BlobOffset = 0;
@@ -341,16 +375,13 @@ class device_image_impl {
       }
       MSpecConstsBlob.resize(BlobOffset);
 
-      bool HasDefaultValues = SCDefValRange.begin() != SCDefValRange.end();
-
-      if (HasDefaultValues) {
-        ByteArray DefValDescriptors =
-            DeviceBinaryProperty(*SCDefValRange.begin()).asByteArray();
-        assert(DefValDescriptors.size() - 8 == MSpecConstsBlob.size() &&
+      if (MSpecConstsDefValBlob.size()) {
+        assert(MSpecConstsDefValBlob.size() == MSpecConstsBlob.size() &&
                "Specialization constant default value blob do not have the "
                "expected size.");
-        std::uninitialized_copy(&DefValDescriptors[8],
-                                &DefValDescriptors[8] + MSpecConstsBlob.size(),
+        std::uninitialized_copy(MSpecConstsDefValBlob.begin(),
+                                MSpecConstsDefValBlob.begin() +
+                                    MSpecConstsBlob.size(),
                                 MSpecConstsBlob.data());
       }
     }
@@ -372,6 +403,9 @@ class device_image_impl {
   // Binary blob which can have values of all specialization constants in the
   // image
   std::vector<unsigned char> MSpecConstsBlob;
+  // Binary blob which can have default values of all specialization constants
+  // in the image.
+  const ByteArray MSpecConstsDefValBlob;
   // Buffer containing binary blob which can have values of all specialization
   // constants in the image, it is using for storing non-native specialization
   // constants

diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp
@@ -963,6 +963,7 @@ Scheduler::GraphBuildResult Scheduler::GraphBuilder::addCG(
       for (auto Ev = Events.begin(); Ev != Events.end();) {
         auto *EvDepCmd = static_cast<Command *>((*Ev)->getCommand());
         if (!EvDepCmd) {
+          ++Ev;
           continue;
         }
         // Handle event dependencies on any commands part of another active

diff --git a/sycl/test-e2e/ESIMD/unified_memory_api/Inputs/atomic_update.hpp b/sycl/test-e2e/ESIMD/unified_memory_api/Inputs/atomic_update.hpp
@@ -715,10 +715,13 @@ bool test_int_types_and_sizes(queue q, const Config &cfg) {
         q, cfg);
     passed &= test_int_types<64, Op, UseMask, UseLSCFeatures, UseAcc, SignMask>(
         q, cfg);
-    passed &= test_int_types<12, Op, UseMask, UseLSCFeatures, UseAcc, SignMask>(
-        q, cfg);
-    passed &= test_int_types<33, Op, UseMask, UseLSCFeatures, UseAcc, SignMask>(
-        q, cfg);
+    // non power of two values are supported only in newer driver.
+    // TODO: Enable this when the new driver reaches test infrastructure
+    // (v27556).
+#if 0
+    passed &= test_int_types<12, Op, UseMask, UseLSCFeatures, UseAcc, SignMask>(q, cfg);
+    passed &= test_int_types<33, Op, UseMask, UseLSCFeatures, UseAcc, SignMask>(q, cfg);
+#endif
   }
 
   return passed;
@@ -739,8 +742,13 @@ bool test_fp_types_and_sizes(queue q, const Config &cfg) {
     passed &= test_fp_types<32, Op, UseMask, UseLSCFeatures, UseAcc>(q, cfg);
     passed &= test_fp_types<64, Op, UseMask, UseLSCFeatures, UseAcc>(q, cfg);
 
+    // non power of two values are supported only in newer driver.
+    // TODO: Enable this when the new driver reaches test infrastructure
+    // (v27556).
+#if 0
     passed &= test_fp_types<12, Op, UseMask, UseLSCFeatures, UseAcc>(q, cfg);
     passed &= test_fp_types<35, Op, UseMask, UseLSCFeatures, UseAcc>(q, cfg);
+#endif
   }
   return passed;
 }

diff --git a/sycl/test-e2e/ESIMD/unified_memory_api/Inputs/atomic_update_slm.hpp b/sycl/test-e2e/ESIMD/unified_memory_api/Inputs/atomic_update_slm.hpp
@@ -612,9 +612,14 @@ bool test_fp_types(queue q) {
 
   if constexpr (Features == TestFeatures::DG2 ||
                 Features == TestFeatures::PVC) {
-    if (q.get_device().has(sycl::aspect::atomic64) &&
-        q.get_device().has(sycl::aspect::fp64)) {
-      passed &= run_test<UseAcc, double, N, Op, UseMask>(q);
+    // TODO: fmin/fmax/fcmpxchg for double requires a newer GPU driver.
+    if constexpr (!std::is_same_v<Op<double, N>, ImplLSCFmax<double, N>> &&
+                  !std::is_same_v<Op<double, N>, ImplLSCFmin<double, N>> &&
+                  !std::is_same_v<Op<double, N>, ImplLSCFcmpwr<double, N>>) {
+      if (q.get_device().has(sycl::aspect::atomic64) &&
+          q.get_device().has(sycl::aspect::fp64)) {
+        passed &= run_test<UseAcc, double, N, Op, UseMask>(q);
+      }
     }
   }
   return passed;
@@ -628,6 +633,7 @@ bool test_int_types_and_sizes(queue q) {
   passed &= test_int_types<2, Op, UseMask, Features, UseAcc, SignMask>(q);
   passed &= test_int_types<4, Op, UseMask, Features, UseAcc, SignMask>(q);
   passed &= test_int_types<8, Op, UseMask, Features, UseAcc, SignMask>(q);
+  // TODO: N=16 and N=32 does not pass on Gen12 with mask due to older driver.
   if (UseMask && Features == TestFeatures::Generic &&
       esimd_test::isGPUDriverGE(q, esimd_test::GPUDriverOS::LinuxAndWindows,
                                 "26918", "101.4953", false)) {
@@ -639,8 +645,13 @@ bool test_int_types_and_sizes(queue q) {
   if constexpr (Features == TestFeatures::DG2 ||
                 Features == TestFeatures::PVC) {
     passed &= test_int_types<64, Op, UseMask, Features, UseAcc, SignMask>(q);
+    // non power of two values are supported only in newer driver.
+    // TODO: Enable this when the new driver reaches test infrastructure
+    // (v27556).
+#if 0
     passed &= test_int_types<12, Op, UseMask, Features, UseAcc, SignMask>(q);
     passed &= test_int_types<33, Op, UseMask, Features, UseAcc, SignMask>(q);
+#endif
   }
 
   return passed;
@@ -661,8 +672,13 @@ bool test_fp_types_and_sizes(queue q) {
   if constexpr (Features == TestFeatures::DG2 ||
                 Features == TestFeatures::PVC) {
     passed &= test_fp_types<64, Op, UseMask, Features, UseAcc>(q);
+    // non power of two values are supported only in newer driver.
+    // TODO: Enable this when the new driver reaches test infrastructure
+    // (v27556).
+#if 0
     passed &= test_fp_types<33, Op, UseMask, Features, UseAcc>(q);
     passed &= test_fp_types<65, Op, UseMask, Features, UseAcc>(q);
+#endif
   }
   return passed;
 }