Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/sycl' into convert-to-ocl-helper
Browse files Browse the repository at this point in the history
  • Loading branch information
aelovikov-intel committed Feb 9, 2024
2 parents 4708c42 + 73d3473 commit f1444d8
Show file tree
Hide file tree
Showing 80 changed files with 474 additions and 287 deletions.
9 changes: 8 additions & 1 deletion sycl/include/sycl/builtins_preview.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,14 @@ auto builtin_marray_impl(FuncTy F, const Ts &...x) {
marray<ret_elem_type, T::size()> Res;
constexpr auto N = T::size();
for (size_t I = 0; I < N / 2; ++I) {
auto PartialRes = F(to_vec2(x, I * 2)...);
auto PartialRes = [&]() {
using elem_ty = get_elem_type_t<T>;
if constexpr (std::is_integral_v<elem_ty>)
return F(to_vec2(x, I * 2)
.template as<vec<get_fixed_sized_int_t<elem_ty>, 2>>()...);
else
return F(to_vec2(x, I * 2)...);
}();
std::memcpy(&Res[I * 2], &PartialRes, sizeof(decltype(PartialRes)));
}
if (N % 2)
Expand Down
11 changes: 11 additions & 0 deletions sycl/include/sycl/builtins_utils_scalar.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,17 @@ template <size_t Size> struct get_unsigned_int_by_size {
template <typename T> struct same_size_unsigned_int {
using type = typename get_unsigned_int_by_size<sizeof(T)>::type;
};
template <typename T>
using same_size_unsigned_int_t = typename same_size_unsigned_int<T>::type;

template <typename T> struct get_fixed_sized_int {
static_assert(std::is_integral_v<T>);
using type =
std::conditional_t<std::is_signed_v<T>, same_size_signed_int_t<T>,
same_size_unsigned_int_t<T>>;
};
template <typename T>
using get_fixed_sized_int_t = typename get_fixed_sized_int<T>::type;

// Utility trait for getting an upsampled integer type.
// NOTE: For upsampling we look for an integer of double the size of the
Expand Down
8 changes: 8 additions & 0 deletions sycl/include/sycl/detail/builtins/helper_macros.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@
FOR_EACH4_A6(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, ARG3, \
ARG4, ARG5, ARG6) \
BASE_CASE(FIXED1, FIXED2, FIXED3, FIXED4, ARG7)
#define FOR_EACH4_A8(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, \
ARG3, ARG4, ARG5, ARG6, ARG7, ARG8) \
FOR_EACH4_A7(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, ARG3, \
ARG4, ARG5, ARG6, ARG7) \
BASE_CASE(FIXED1, FIXED2, FIXED3, FIXED4, ARG8)
#define FOR_EACH4_A11(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, \
ARG3, ARG4, ARG5, ARG6, ARG7, ARG8, ARG9, ARG10, ARG11) \
FOR_EACH4_A7(BASE_CASE, FIXED1, FIXED2, FIXED3, FIXED4, ARG1, ARG2, ARG3, \
Expand Down Expand Up @@ -169,6 +174,9 @@
unsigned char, unsigned short, unsigned int, unsigned long, unsigned long long
// 11 types
#define INTEGER_TYPES SIGNED_TYPES, UNSIGNED_TYPES
// 8 types
#define FIXED_WIDTH_INTEGER_TYPES \
int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t

#define DEVICE_IMPL_TEMPLATE_CUSTOM_DELEGATE( \
NUM_ARGS, NAME, ENABLER, DELEGATOR, NS, /*SCALAR_VEC_IMPL*/...) \
Expand Down
7 changes: 4 additions & 3 deletions sycl/include/sycl/detail/builtins/integer_functions.inc
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@ namespace detail {
template <typename T>
struct integer_elem_type
: std::bool_constant<
check_type_in_v<get_elem_type_t<T>, char, signed char, short, int,
long, long long, unsigned char, unsigned short,
unsigned int, unsigned long, unsigned long long>> {};
(is_vec_or_swizzle_v<T> &&
check_type_in_v<get_elem_type_t<T>, FIXED_WIDTH_INTEGER_TYPES>) ||
(!is_vec_or_swizzle_v<T> &&
check_type_in_v<get_elem_type_t<T>, INTEGER_TYPES>)> {};
template <typename T>
struct suint32_elem_type
: std::bool_constant<
Expand Down
10 changes: 6 additions & 4 deletions sycl/include/sycl/detail/builtins/relational_functions.inc
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@ inline namespace _V1 {
namespace detail {
template <typename T>
struct bitselect_elem_type
: std::bool_constant<check_type_in_v<
get_elem_type_t<T>, float, double, half, char, signed char, short,
int, long, long long, unsigned char, unsigned short, unsigned int,
unsigned long, unsigned long long>> {};
: std::bool_constant<
check_type_in_v<get_elem_type_t<T>, FP_TYPES> ||
(is_vec_or_swizzle_v<T> &&
check_type_in_v<get_elem_type_t<T>, FIXED_WIDTH_INTEGER_TYPES>) ||
(!is_vec_or_swizzle_v<T> &&
check_type_in_v<get_elem_type_t<T>, INTEGER_TYPES>)> {};

template <typename T>
struct rel_ret_traits
Expand Down
2 changes: 2 additions & 0 deletions sycl/include/sycl/kernel_bundle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,8 @@ class __SYCL_EXPORT kernel_bundle_plain {
void get_specialization_constant_impl(const char *SpecName,
void *Value) const noexcept;

// \returns a bool value which indicates if specialization constant was set to
// a value different from default value.
bool is_specialization_constant_set(const char *SpecName) const noexcept;

detail::KernelBundleImplPtr impl;
Expand Down
12 changes: 6 additions & 6 deletions sycl/plugins/unified_runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,13 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
include(FetchContent)

set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
# commit 9babc4d092a92c1036791d26ef328e5eeaf19803
# Merge: 3be8f205 90498ec5
# commit 47102cb2b275472054803a9399b5b977ef210b23
# Merge: 186bfb9d 2390664d
# Author: aarongreig <aaron.greig@codeplay.com>
# Date: Thu Feb 8 15:44:54 2024 +0000
# Merge pull request #1321 from pbalcer/adapter-compute-constructor
# [L0] move adapter init into its constructor from urAdapterGet
set(UNIFIED_RUNTIME_TAG 9babc4d092a92c1036791d26ef328e5eeaf19803)
# Date: Fri Feb 9 12:21:47 2024 +0000
# Merge pull request #1328 from Bensuo/maxime/events-reset-bugfix
# [EXP][CMDBUF] Reset events for multiple submissions (bugfix)
set(UNIFIED_RUNTIME_TAG 47102cb2b275472054803a9399b5b977ef210b23)

if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO)
set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}")
Expand Down
7 changes: 7 additions & 0 deletions sycl/source/builtins/host_helper_macros.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@
#define EXPORT_VEC(NUM_ARGS, NAME, TYPE, VL) \
EXPORT_VEC_NS(NUM_ARGS, NAME, sycl, TYPE, VL)

#define EXPORT_VEC_1_16_IMPL(NUM_ARGS, NAME, NS, TYPE) \
FOR_VEC_1_16(EXPORT_VEC_NS, NUM_ARGS, NAME, NS, TYPE)

#define EXPORT_SCALAR_AND_VEC_1_16_IMPL(NUM_ARGS, NAME, NS, TYPE) \
EXPORT_SCALAR_NS(NUM_ARGS, NAME, NS, TYPE) \
FOR_VEC_1_16(EXPORT_VEC_NS, NUM_ARGS, NAME, NS, TYPE)
Expand All @@ -69,8 +72,12 @@

#define EXPORT_SCALAR_AND_VEC_1_16_NS(NUM_ARGS, NAME, NS, ...) \
FOR_EACH3(EXPORT_SCALAR_AND_VEC_1_16_IMPL, NUM_ARGS, NAME, NS, __VA_ARGS__)
#define EXPORT_VEC_1_16_NS(NUM_ARGS, NAME, NS, ...) \
FOR_EACH3(EXPORT_VEC_1_16_IMPL, NUM_ARGS, NAME, NS, __VA_ARGS__)
#define EXPORT_SCALAR_AND_VEC_1_16(NUM_ARGS, NAME, ...) \
EXPORT_SCALAR_AND_VEC_1_16_NS(NUM_ARGS, NAME, sycl, __VA_ARGS__)
#define EXPORT_VEC_1_16(NUM_ARGS, NAME, ...) \
EXPORT_VEC_1_16_NS(NUM_ARGS, NAME, sycl, __VA_ARGS__)

#define EXPORT_SCALAR_AND_VEC_2_4(NUM_ARGS, NAME, ...) \
FOR_EACH2(EXPORT_SCALAR_AND_VEC_2_4_IMPL, NUM_ARGS, NAME, __VA_ARGS__)
Expand Down
3 changes: 2 additions & 1 deletion sycl/source/builtins/integer_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ namespace sycl {
inline namespace _V1 {
#define BUILTIN_GENINT(NUM_ARGS, NAME, IMPL) \
HOST_IMPL(NAME, IMPL) \
EXPORT_SCALAR_AND_VEC_1_16(NUM_ARGS, NAME, INTEGER_TYPES)
FOR_EACH2(EXPORT_SCALAR, NUM_ARGS, NAME, INTEGER_TYPES) \
EXPORT_VEC_1_16(NUM_ARGS, NAME, FIXED_WIDTH_INTEGER_TYPES)
#define BUILTIN_GENINT_SU(NUM_ARGS, NAME, IMPL) \
BUILTIN_GENINT(NUM_ARGS, NAME, IMPL)

Expand Down
3 changes: 2 additions & 1 deletion sycl/source/builtins/relational_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ HOST_IMPL(bitselect, [](auto x, auto y, auto z) {
assert((ures & std::numeric_limits<utype>::max()) == ures);
return bit_cast<T0>(static_cast<utype>(ures));
})
EXPORT_SCALAR_AND_VEC_1_16(THREE_ARGS, bitselect, INTEGER_TYPES, FP_TYPES)
FOR_EACH2(EXPORT_SCALAR, THREE_ARGS, bitselect, INTEGER_TYPES, FP_TYPES)
EXPORT_VEC_1_16(THREE_ARGS, bitselect, FIXED_WIDTH_INTEGER_TYPES, FP_TYPES)
} // namespace _V1
} // namespace sycl
68 changes: 51 additions & 17 deletions sycl/source/detail/device_image_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ class device_image_impl {
unsigned int CompositeOffset = 0;
unsigned int Size = 0;
unsigned int BlobOffset = 0;
// Indicates if the specialization constant was set to a value which is
// different from the default value.
bool IsSet = false;
};

Expand All @@ -61,7 +63,8 @@ class device_image_impl {
sycl::detail::pi::PiProgram Program)
: MBinImage(BinImage), MContext(std::move(Context)),
MDevices(std::move(Devices)), MState(State), MProgram(Program),
MKernelIDs(std::move(KernelIDs)) {
MKernelIDs(std::move(KernelIDs)),
MSpecConstsDefValBlob(getSpecConstsDefValBlob()) {
updateSpecConstSymMap();
}

Expand All @@ -74,6 +77,7 @@ class device_image_impl {
: MBinImage(BinImage), MContext(std::move(Context)),
MDevices(std::move(Devices)), MState(State), MProgram(Program),
MKernelIDs(std::move(KernelIDs)), MSpecConstsBlob(SpecConstsBlob),
MSpecConstsDefValBlob(getSpecConstsDefValBlob()),
MSpecConstSymMap(SpecConstMap) {}

bool has_kernel(const kernel_id &KernelIDCand) const noexcept {
Expand Down Expand Up @@ -152,6 +156,21 @@ class device_image_impl {
std::vector<SpecConstDescT> &Descs =
MSpecConstSymMap[std::string{SpecName}];
for (SpecConstDescT &Desc : Descs) {
// If there is a default value of the specialization constant and it is
// the same as the value which is being set then do nothing, runtime is
// going to handle this case just like if only the default value of the
// specialization constant was provided.
if (MSpecConstsDefValBlob.size() &&
(std::memcmp(MSpecConstsDefValBlob.begin() + Desc.BlobOffset,
static_cast<const char *>(Value) + Desc.CompositeOffset,
Desc.Size) == 0)) {
// Now we have default value, so reset to false.
Desc.IsSet = false;
continue;
}

// Value of the specialization constant is set to a value which is
// different from the default value.
Desc.IsSet = true;
std::memcpy(MSpecConstsBlob.data() + Desc.BlobOffset,
static_cast<const char *>(Value) + Desc.CompositeOffset,
Expand All @@ -161,19 +180,20 @@ class device_image_impl {

void get_specialization_constant_raw_value(const char *SpecName,
void *ValueRet) const noexcept {
assert(is_specialization_constant_set(SpecName));
bool IsSet = is_specialization_constant_set(SpecName);
// Lock the mutex to prevent when one thread in the middle of writing a
// new value while another thread is reading the value to pass it to
// JIT compiler.
const std::lock_guard<std::mutex> SpecConstLock(MSpecConstAccessMtx);

assert(IsSet || MSpecConstsDefValBlob.size());
// operator[] can't be used here, since it's not marked as const
const std::vector<SpecConstDescT> &Descs =
MSpecConstSymMap.at(std::string{SpecName});
for (const SpecConstDescT &Desc : Descs) {

auto Blob =
IsSet ? MSpecConstsBlob.data() : MSpecConstsDefValBlob.begin();
std::memcpy(static_cast<char *>(ValueRet) + Desc.CompositeOffset,
MSpecConstsBlob.data() + Desc.BlobOffset, Desc.Size);
Blob + Desc.BlobOffset, Desc.Size);
}
}

Expand Down Expand Up @@ -293,16 +313,30 @@ class device_image_impl {
}

private:
// Get the specialization constant default value blob.
ByteArray getSpecConstsDefValBlob() const {
if (!MBinImage)
return ByteArray(nullptr, 0);

// Get default values for specialization constants.
const RTDeviceBinaryImage::PropertyRange &SCDefValRange =
MBinImage->getSpecConstantsDefaultValues();
if (!SCDefValRange.size())
return ByteArray(nullptr, 0);

ByteArray DefValDescriptors =
DeviceBinaryProperty(*SCDefValRange.begin()).asByteArray();
// First 8 bytes are consumed by the size of the property.
DefValDescriptors.dropBytes(8);
return DefValDescriptors;
}

void updateSpecConstSymMap() {
if (MBinImage) {
const RTDeviceBinaryImage::PropertyRange &SCRange =
MBinImage->getSpecConstants();
using SCItTy = RTDeviceBinaryImage::PropertyRange::ConstIterator;

// get default values for specialization constants
const RTDeviceBinaryImage::PropertyRange &SCDefValRange =
MBinImage->getSpecConstantsDefaultValues();

// This variable is used to calculate spec constant value offset in a
// flat byte array.
unsigned BlobOffset = 0;
Expand Down Expand Up @@ -341,16 +375,13 @@ class device_image_impl {
}
MSpecConstsBlob.resize(BlobOffset);

bool HasDefaultValues = SCDefValRange.begin() != SCDefValRange.end();

if (HasDefaultValues) {
ByteArray DefValDescriptors =
DeviceBinaryProperty(*SCDefValRange.begin()).asByteArray();
assert(DefValDescriptors.size() - 8 == MSpecConstsBlob.size() &&
if (MSpecConstsDefValBlob.size()) {
assert(MSpecConstsDefValBlob.size() == MSpecConstsBlob.size() &&
"Specialization constant default value blob do not have the "
"expected size.");
std::uninitialized_copy(&DefValDescriptors[8],
&DefValDescriptors[8] + MSpecConstsBlob.size(),
std::uninitialized_copy(MSpecConstsDefValBlob.begin(),
MSpecConstsDefValBlob.begin() +
MSpecConstsBlob.size(),
MSpecConstsBlob.data());
}
}
Expand All @@ -372,6 +403,9 @@ class device_image_impl {
// Binary blob which can have values of all specialization constants in the
// image
std::vector<unsigned char> MSpecConstsBlob;
// Binary blob which can have default values of all specialization constants
// in the image.
const ByteArray MSpecConstsDefValBlob;
// Buffer containing binary blob which can have values of all specialization
// constants in the image, it is using for storing non-native specialization
// constants
Expand Down
1 change: 1 addition & 0 deletions sycl/source/detail/scheduler/graph_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -963,6 +963,7 @@ Scheduler::GraphBuildResult Scheduler::GraphBuilder::addCG(
for (auto Ev = Events.begin(); Ev != Events.end();) {
auto *EvDepCmd = static_cast<Command *>((*Ev)->getCommand());
if (!EvDepCmd) {
++Ev;
continue;
}
// Handle event dependencies on any commands part of another active
Expand Down
16 changes: 12 additions & 4 deletions sycl/test-e2e/ESIMD/unified_memory_api/Inputs/atomic_update.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -715,10 +715,13 @@ bool test_int_types_and_sizes(queue q, const Config &cfg) {
q, cfg);
passed &= test_int_types<64, Op, UseMask, UseLSCFeatures, UseAcc, SignMask>(
q, cfg);
passed &= test_int_types<12, Op, UseMask, UseLSCFeatures, UseAcc, SignMask>(
q, cfg);
passed &= test_int_types<33, Op, UseMask, UseLSCFeatures, UseAcc, SignMask>(
q, cfg);
// non power of two values are supported only in newer driver.
// TODO: Enable this when the new driver reaches test infrastructure
// (v27556).
#if 0
passed &= test_int_types<12, Op, UseMask, UseLSCFeatures, UseAcc, SignMask>(q, cfg);
passed &= test_int_types<33, Op, UseMask, UseLSCFeatures, UseAcc, SignMask>(q, cfg);
#endif
}

return passed;
Expand All @@ -739,8 +742,13 @@ bool test_fp_types_and_sizes(queue q, const Config &cfg) {
passed &= test_fp_types<32, Op, UseMask, UseLSCFeatures, UseAcc>(q, cfg);
passed &= test_fp_types<64, Op, UseMask, UseLSCFeatures, UseAcc>(q, cfg);

// non power of two values are supported only in newer driver.
// TODO: Enable this when the new driver reaches test infrastructure
// (v27556).
#if 0
passed &= test_fp_types<12, Op, UseMask, UseLSCFeatures, UseAcc>(q, cfg);
passed &= test_fp_types<35, Op, UseMask, UseLSCFeatures, UseAcc>(q, cfg);
#endif
}
return passed;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -612,9 +612,14 @@ bool test_fp_types(queue q) {

if constexpr (Features == TestFeatures::DG2 ||
Features == TestFeatures::PVC) {
if (q.get_device().has(sycl::aspect::atomic64) &&
q.get_device().has(sycl::aspect::fp64)) {
passed &= run_test<UseAcc, double, N, Op, UseMask>(q);
// TODO: fmin/fmax/fcmpxchg for double requires a newer GPU driver.
if constexpr (!std::is_same_v<Op<double, N>, ImplLSCFmax<double, N>> &&
!std::is_same_v<Op<double, N>, ImplLSCFmin<double, N>> &&
!std::is_same_v<Op<double, N>, ImplLSCFcmpwr<double, N>>) {
if (q.get_device().has(sycl::aspect::atomic64) &&
q.get_device().has(sycl::aspect::fp64)) {
passed &= run_test<UseAcc, double, N, Op, UseMask>(q);
}
}
}
return passed;
Expand All @@ -628,6 +633,7 @@ bool test_int_types_and_sizes(queue q) {
passed &= test_int_types<2, Op, UseMask, Features, UseAcc, SignMask>(q);
passed &= test_int_types<4, Op, UseMask, Features, UseAcc, SignMask>(q);
passed &= test_int_types<8, Op, UseMask, Features, UseAcc, SignMask>(q);
// TODO: N=16 and N=32 does not pass on Gen12 with mask due to older driver.
if (UseMask && Features == TestFeatures::Generic &&
esimd_test::isGPUDriverGE(q, esimd_test::GPUDriverOS::LinuxAndWindows,
"26918", "101.4953", false)) {
Expand All @@ -639,8 +645,13 @@ bool test_int_types_and_sizes(queue q) {
if constexpr (Features == TestFeatures::DG2 ||
Features == TestFeatures::PVC) {
passed &= test_int_types<64, Op, UseMask, Features, UseAcc, SignMask>(q);
// non power of two values are supported only in newer driver.
// TODO: Enable this when the new driver reaches test infrastructure
// (v27556).
#if 0
passed &= test_int_types<12, Op, UseMask, Features, UseAcc, SignMask>(q);
passed &= test_int_types<33, Op, UseMask, Features, UseAcc, SignMask>(q);
#endif
}

return passed;
Expand All @@ -661,8 +672,13 @@ bool test_fp_types_and_sizes(queue q) {
if constexpr (Features == TestFeatures::DG2 ||
Features == TestFeatures::PVC) {
passed &= test_fp_types<64, Op, UseMask, Features, UseAcc>(q);
// non power of two values are supported only in newer driver.
// TODO: Enable this when the new driver reaches test infrastructure
// (v27556).
#if 0
passed &= test_fp_types<33, Op, UseMask, Features, UseAcc>(q);
passed &= test_fp_types<65, Op, UseMask, Features, UseAcc>(q);
#endif
}
return passed;
}
Expand Down
Loading

0 comments on commit f1444d8

Please sign in to comment.