Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL] Use std::array as storage for sycl::vec on device #14130

Merged
merged 22 commits into from
Jun 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
0aa7a9a
Add copy constructor
uditagarwal97 Apr 18, 2024
f2a1dc2
Merge branch 'sycl' of https://github.com/uditagarwal97/llvm into sycl
uditagarwal97 May 28, 2024
15f3094
Refactor vector/byte.cpp E2E test
uditagarwal97 May 29, 2024
5341760
Restrict vec and swizzle opperations to types mentioned in the SPEC
uditagarwal97 May 29, 2024
0c2dd78
Merge branch 'sycl' into vec_refac_2
uditagarwal97 May 30, 2024
b75b835
Update byte.cpp
uditagarwal97 May 30, 2024
e5fe421
Add asserts for byte.cpp E2E
uditagarwal97 May 30, 2024
fd27010
Address reviews; Fix formatting
uditagarwal97 May 30, 2024
23475a0
Seperate out math operators in a class.
uditagarwal97 Jun 4, 2024
b739c08
Fix formatting; Address reviews
uditagarwal97 Jun 4, 2024
19aa68d
Expose getters and setters to BINOPS via a class in ::detail
uditagarwal97 Jun 6, 2024
0d74886
Address reviews
uditagarwal97 Jun 7, 2024
3b34c2c
Minor fixes
uditagarwal97 Jun 7, 2024
73c01b2
Merge branch 'sycl' into vec_refac_3
uditagarwal97 Jun 7, 2024
94216e5
Address reviews
uditagarwal97 Jun 10, 2024
d0f6e0c
Add link to alignment requirement of sycl::vec
uditagarwal97 Jun 10, 2024
b0d61ec
Use std::array as storage for sycl::vec on device.
uditagarwal97 Jun 10, 2024
5c06f37
Merge branch 'sycl' into vec_refac_4
uditagarwal97 Jun 10, 2024
b8aef5f
Fix formatting
uditagarwal97 Jun 10, 2024
470987f
Fix bug
uditagarwal97 Jun 10, 2024
ee58931
Address reviews; Remove setVaue().
uditagarwal97 Jun 12, 2024
84aca9f
Restrict device code test to linux and fix formatting issues.
uditagarwal97 Jun 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions sycl/include/sycl/detail/generic_type_traits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,8 @@ template <typename T> auto convertToOpenCLType(T &&x) {
std::declval<ElemTy>()))>,
no_ref::size()>;
#ifdef __SYCL_DEVICE_ONLY__

#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
// TODO: for some mysterious reasons on NonUniformGroups E2E tests fail if
// we use the "else" version only. I suspect that's an issues with
// non-uniform groups implementation.
Expand All @@ -350,6 +352,10 @@ template <typename T> auto convertToOpenCLType(T &&x) {
else
return static_cast<typename MatchingVec::vector_t>(
x.template as<MatchingVec>());
#else // __INTEL_PREVIEW_BREAKING_CHANGES
return sycl::bit_cast<typename MatchingVec::vector_t>(x);
#endif // __INTEL_PREVIEW_BREAKING_CHANGES

#else
return x.template as<MatchingVec>();
#endif
Expand Down
170 changes: 78 additions & 92 deletions sycl/include/sycl/detail/vector_arith.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,15 +50,14 @@ using rel_t = typename std::conditional_t<
friend std::enable_if_t<(COND), vec_t> operator BINOP(const vec_t & Lhs, \
const vec_t & Rhs) { \
vec_t Ret; \
if constexpr (vec_t::IsUsingArrayOnDevice) { \
if constexpr (vec_t::IsBfloat16) { \
for (size_t I = 0; I < NumElements; ++I) { \
detail::VecAccess<vec_t>::setValue( \
Ret, I, \
(detail::VecAccess<vec_t>::getValue(Lhs, I) \
BINOP detail::VecAccess<vec_t>::getValue(Rhs, I))); \
Ret[I] = Lhs[I] BINOP Rhs[I]; \
} \
} else { \
Ret.m_Data = Lhs.m_Data BINOP Rhs.m_Data; \
auto ExtVecLhs = sycl::bit_cast<typename vec_t::vector_t>(Lhs); \
auto ExtVecRhs = sycl::bit_cast<typename vec_t::vector_t>(Rhs); \
Ret = vec<DataT, NumElements>(ExtVecLhs BINOP ExtVecRhs); \
if constexpr (std::is_same_v<DataT, bool> && CONVERT) { \
vec_arith_common<bool, NumElements>::ConvertToDataT(Ret); \
} \
Expand All @@ -72,13 +71,9 @@ using rel_t = typename std::conditional_t<
friend std::enable_if_t<(COND), vec_t> operator BINOP(const vec_t & Lhs, \
const vec_t & Rhs) { \
vec_t Ret{}; \
for (size_t I = 0; I < NumElements; ++I) \
detail::VecAccess<vec_t>::setValue( \
Ret, I, \
(DataT)(vec_data<DataT>::get( \
detail::VecAccess<vec_t>::getValue(Lhs, I)) \
BINOP vec_data<DataT>::get( \
detail::VecAccess<vec_t>::getValue(Rhs, I)))); \
for (size_t I = 0; I < NumElements; ++I) { \
Ret[I] = Lhs[I] BINOP Rhs[I]; \
} \
return Ret; \
}
#endif // __SYCL_DEVICE_ONLY__
Expand Down Expand Up @@ -130,83 +125,78 @@ template <typename DataT, int NumElements>
class vec_arith : public vec_arith_common<DataT, NumElements> {
protected:
using vec_t = vec<DataT, NumElements>;
using ocl_t = rel_t<DataT>;
using ocl_t = detail::select_cl_scalar_integral_signed_t<DataT>;
template <typename T> using vec_data = vec_helper<T>;

// operator!.
friend vec<rel_t<DataT>, NumElements> operator!(const vec_t &Rhs) {
if constexpr (vec_t::IsUsingArrayOnDevice || vec_t::IsUsingArrayOnHost) {
vec_t Ret{};
friend vec<ocl_t, NumElements> operator!(const vec_t &Rhs) {
#ifdef __SYCL_DEVICE_ONLY__
if constexpr (!vec_t::IsBfloat16) {
auto extVec = sycl::bit_cast<typename vec_t::vector_t>(Rhs);
vec<ocl_t, NumElements> Ret{
(typename vec<ocl_t, NumElements>::vector_t) !extVec};
return Ret;
} else
#endif // __SYCL_DEVICE_ONLY__
{
vec<ocl_t, NumElements> Ret{};
for (size_t I = 0; I < NumElements; ++I) {
detail::VecAccess<vec_t>::setValue(
Ret, I,
!vec_data<DataT>::get(detail::VecAccess<vec_t>::getValue(Rhs, I)));
// static_cast will work here as the output of ! operator is either 0 or
// -1.
Ret[I] = static_cast<ocl_t>(-1 * (!Rhs[I]));
}
return Ret.template as<vec<rel_t<DataT>, NumElements>>();
} else {
return vec_t{(typename vec<DataT, NumElements>::DataType) !Rhs.m_Data}
.template as<vec<rel_t<DataT>, NumElements>>();
return Ret;
}
}

// operator +.
friend vec_t operator+(const vec_t &Lhs) {
if constexpr (vec_t::IsUsingArrayOnDevice || vec_t::IsUsingArrayOnHost) {
vec_t Ret{};
for (size_t I = 0; I < NumElements; ++I)
detail::VecAccess<vec_t>::setValue(
Ret, I,
vec_data<DataT>::get(+vec_data<DataT>::get(
detail::VecAccess<vec_t>::getValue(Lhs, I))));
return Ret;
} else {
return vec_t{+Lhs.m_Data};
}
#ifdef __SYCL_DEVICE_ONLY__
auto extVec = sycl::bit_cast<typename vec_t::vector_t>(Lhs);
return vec_t{+extVec};
#else
vec_t Ret{};
for (size_t I = 0; I < NumElements; ++I)
Ret[I] = +Lhs[I];
return Ret;
#endif
}

// operator -.
friend vec_t operator-(const vec_t &Lhs) {
namespace oneapi = sycl::ext::oneapi;
vec_t Ret{};
if constexpr (vec_t::IsBfloat16 && NumElements == 1) {
oneapi::bfloat16 v = oneapi::detail::bitsToBfloat16(Lhs.m_Data);
oneapi::bfloat16 w = -v;
Ret.m_Data = oneapi::detail::bfloat16ToBits(w);
} else if constexpr (vec_t::IsBfloat16) {
for (size_t I = 0; I < NumElements; I++) {
oneapi::bfloat16 v = oneapi::detail::bitsToBfloat16(Lhs.m_Data[I]);
oneapi::bfloat16 w = -v;
Ret.m_Data[I] = oneapi::detail::bfloat16ToBits(w);
}
} else if constexpr (vec_t::IsUsingArrayOnDevice ||
vec_t::IsUsingArrayOnHost) {
for (size_t I = 0; I < NumElements; ++I)
detail::VecAccess<vec_t>::setValue(
Ret, I,
vec_data<DataT>::get(-vec_data<DataT>::get(
detail::VecAccess<vec_t>::getValue(Lhs, I))));
return Ret;
if constexpr (vec_t::IsBfloat16) {
for (size_t I = 0; I < NumElements; I++)
Ret[I] = -Lhs[I];
} else {
Ret = vec_t{-Lhs.m_Data};
#ifndef __SYCL_DEVICE_ONLY__
for (size_t I = 0; I < NumElements; ++I)
Ret[I] = -Lhs[I];
#else
auto extVec = sycl::bit_cast<typename vec_t::vector_t>(Lhs);
Ret = vec_t{-extVec};
if constexpr (std::is_same_v<DataT, bool>) {
vec_arith_common<bool, NumElements>::ConvertToDataT(Ret);
}
return Ret;
#endif
}
return Ret;
}

// Unary operations on sycl::vec
// FIXME: Don't allow Unary operators on vec<bool> after
// https://github.com/KhronosGroup/SYCL-CTS/issues/896 gets fixed.
#ifdef __SYCL_UOP
#error "Undefine __SYCL_UOP macro"
#endif
#define __SYCL_UOP(UOP, OPASSIGN) \
friend vec_t &operator UOP(vec_t & Rhs) { \
Rhs OPASSIGN vec_data<DataT>::get(1); \
Rhs OPASSIGN DataT{1}; \
return Rhs; \
} \
friend vec_t operator UOP(vec_t &Lhs, int) { \
vec_t Ret(Lhs); \
Lhs OPASSIGN vec_data<DataT>::get(1); \
Lhs OPASSIGN DataT{1}; \
return Ret; \
}

Expand All @@ -228,25 +218,24 @@ class vec_arith : public vec_arith_common<DataT, NumElements> {
friend std::enable_if_t<(COND), vec<ocl_t, NumElements>> operator RELLOGOP( \
const vec_t & Lhs, const vec_t & Rhs) { \
vec<ocl_t, NumElements> Ret{}; \
/* This special case is needed since there are no standard operator|| */ \
/* or operator&& functions for std::array. */ \
if constexpr (vec_t::IsUsingArrayOnDevice && \
(std::string_view(#RELLOGOP) == "||" || \
std::string_view(#RELLOGOP) == "&&")) { \
/* ext_vector_type does not support bfloat16, so for these */ \
/* we do element-by-element operation on the underlying std::array. */ \
if constexpr (vec_t::IsBfloat16) { \
for (size_t I = 0; I < NumElements; ++I) { \
/* We cannot use SetValue here as the operator is not a friend of*/ \
/* Ret on Windows. */ \
Ret[I] = static_cast<ocl_t>( \
-(vec_data<DataT>::get(detail::VecAccess<vec_t>::getValue(Lhs, I)) \
RELLOGOP vec_data<DataT>::get( \
detail::VecAccess<vec_t>::getValue(Rhs, I)))); \
Ret[I] = static_cast<ocl_t>(-(Lhs[I] RELLOGOP Rhs[I])); \
uditagarwal97 marked this conversation as resolved.
Show resolved Hide resolved
} \
} else { \
auto ExtVecLhs = sycl::bit_cast<typename vec_t::vector_t>(Lhs); \
auto ExtVecRhs = sycl::bit_cast<typename vec_t::vector_t>(Rhs); \
/* Cast required to convert unsigned char ext_vec_type to */ \
/* char ext_vec_type. */ \
Ret = vec<ocl_t, NumElements>( \
(typename vec<ocl_t, NumElements>::vector_t)( \
Lhs.m_Data RELLOGOP Rhs.m_Data)); \
if (NumElements == 1) /*Scalar 0/1 logic was applied, invert*/ \
ExtVecLhs RELLOGOP ExtVecRhs)); \
/* For NumElements == 1, we use scalar instead of ext_vector_type. */ \
if constexpr (NumElements == 1) { \
Ret *= -1; \
} \
} \
return Ret; \
}
Expand All @@ -257,12 +246,7 @@ class vec_arith : public vec_arith_common<DataT, NumElements> {
const vec_t & Lhs, const vec_t & Rhs) { \
vec<ocl_t, NumElements> Ret{}; \
for (size_t I = 0; I < NumElements; ++I) { \
/* We cannot use SetValue here as the operator is not a friend of*/ \
/* Ret on Windows. */ \
Ret[I] = static_cast<ocl_t>( \
-(vec_data<DataT>::get(detail::VecAccess<vec_t>::getValue(Lhs, I)) \
RELLOGOP vec_data<DataT>::get( \
detail::VecAccess<vec_t>::getValue(Rhs, I)))); \
Ret[I] = static_cast<ocl_t>(-(Lhs[I] RELLOGOP Rhs[I])); \
} \
return Ret; \
}
Expand Down Expand Up @@ -376,34 +360,36 @@ template <typename DataT, int NumElements> class vec_arith_common {
protected:
using vec_t = vec<DataT, NumElements>;

static constexpr bool IsBfloat16 =
std::is_same_v<DataT, sycl::ext::oneapi::bfloat16>;

// operator~() available only when: dataT != float && dataT != double
// && dataT != half
template <typename T = DataT>
friend std::enable_if_t<!detail::is_vgenfloat_v<T>, vec_t>
operator~(const vec_t &Rhs) {
if constexpr (vec_t::IsUsingArrayOnDevice || vec_t::IsUsingArrayOnHost) {
vec_t Ret{};
for (size_t I = 0; I < NumElements; ++I) {
detail::VecAccess<vec_t>::setValue(
Ret, I, ~detail::VecAccess<vec_t>::getValue(Rhs, I));
}
return Ret;
} else {
vec_t Ret{(typename vec_t::DataType) ~Rhs.m_Data};
if constexpr (std::is_same_v<DataT, bool>) {
vec_arith_common<bool, NumElements>::ConvertToDataT(Ret);
}
return Ret;
#ifdef __SYCL_DEVICE_ONLY__
auto extVec = sycl::bit_cast<typename vec_t::vector_t>(Rhs);
vec_t Ret{~extVec};
if constexpr (std::is_same_v<DataT, bool>) {
ConvertToDataT(Ret);
}
return Ret;
#else
vec_t Ret{};
for (size_t I = 0; I < NumElements; ++I) {
Ret[I] = ~Rhs[I];
}
return Ret;
#endif
}

#ifdef __SYCL_DEVICE_ONLY__
using vec_bool_t = vec<bool, NumElements>;
// Required only for std::bool.
static void ConvertToDataT(vec_bool_t &Ret) {
for (size_t I = 0; I < NumElements; ++I) {
DataT Tmp = detail::VecAccess<vec_bool_t>::getValue(Ret, I);
detail::VecAccess<vec_bool_t>::setValue(Ret, I, Tmp);
Ret[I] = bit_cast<int8_t>(Ret[I]) != 0;
}
}
#endif
Expand Down
9 changes: 9 additions & 0 deletions sycl/include/sycl/detail/vector_convert.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,15 @@ NativeToT convertImpl(NativeFromT Value) {
}
}

#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
template <typename FromT, typename ToT, sycl::rounding_mode RoundingMode,
int VecSize, typename NativeFromT, typename NativeToT>
auto ConvertImpl(std::byte val) {
return convertImpl<FromT, ToT, RoundingMode, VecSize, NativeFromT, NativeToT>(
(std::int8_t)val);
}
#endif

} // namespace detail
} // namespace _V1
} // namespace sycl
2 changes: 2 additions & 0 deletions sycl/include/sycl/ext/oneapi/bfloat16.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ bfloat16 bitsToBfloat16(const Bfloat16StorageT Value);

// sycl::vec support
namespace bf16 {
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
#ifdef __SYCL_DEVICE_ONLY__
using Vec2StorageT = Bfloat16StorageT __attribute__((ext_vector_type(2)));
using Vec3StorageT = Bfloat16StorageT __attribute__((ext_vector_type(3)));
Expand All @@ -45,6 +46,7 @@ using Vec4StorageT = std::array<Bfloat16StorageT, 4>;
using Vec8StorageT = std::array<Bfloat16StorageT, 8>;
using Vec16StorageT = std::array<Bfloat16StorageT, 16>;
#endif
#endif // __INTEL_PREVIEW_BREAKING_CHANGES
} // namespace bf16
} // namespace detail

Expand Down
6 changes: 6 additions & 0 deletions sycl/include/sycl/half_type.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,18 +249,22 @@ using StorageT = _Float16;
using BIsRepresentationT = _Float16;
using VecElemT = _Float16;

#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
using Vec2StorageT = VecElemT __attribute__((ext_vector_type(2)));
using Vec3StorageT = VecElemT __attribute__((ext_vector_type(3)));
using Vec4StorageT = VecElemT __attribute__((ext_vector_type(4)));
using Vec8StorageT = VecElemT __attribute__((ext_vector_type(8)));
using Vec16StorageT = VecElemT __attribute__((ext_vector_type(16)));
#endif // __INTEL_PREVIEW_BREAKING_CHANGES

#else // SYCL_DEVICE_ONLY
using StorageT = detail::host_half_impl::half;
// No need to extract underlying data type for built-in functions operating on
// host
using BIsRepresentationT = half;
using VecElemT = half;

#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
// On the host side we cannot use OpenCL cl_half# types as an underlying type
// for vec because they are actually defined as an integer type under the
// hood. As a result half values will be converted to the integer and passed
Expand All @@ -270,6 +274,8 @@ using Vec3StorageT = std::array<VecElemT, 3>;
using Vec4StorageT = std::array<VecElemT, 4>;
using Vec8StorageT = std::array<VecElemT, 8>;
using Vec16StorageT = std::array<VecElemT, 16>;
#endif // __INTEL_PREVIEW_BREAKING_CHANGES

#endif // SYCL_DEVICE_ONLY

#ifndef __SYCL_DEVICE_ONLY__
Expand Down
Loading
Loading