Skip to content

Commit

Permalink
[SYCL] Fix using some of math built-ins when ESIMD is included (#14793)
Browse files Browse the repository at this point in the history
ESIMD headers declare some of `__spirv_ocl_*` built-ins as template
functions, but those built-ins are also automatically declared by the
compiler implicitly when used.

On Windows, redeclarations in headers cause compilation issues, because
by some reason they take priority, but template arguments for them
couldn't be inferred.

This commit effectively introduces new tests to cover affected scenarios
and reverts a couple of ESIMD commits to fix the issue:
- #14020 is completely reverted
- #13383 is partially reverted to preserve new interfaces and
tests, but stop declaring `__spirv_ocl_*` built-ins

I suppose that both PRs were made in attempt to move away from custom
ESIMD intrinsic to standard SPIR-V ones, but that should be done without
manually declaring the latter. A bigger refactoring might be needed to
use auto-declared SPIR-V built-ins in ESIMD because of presence and
usage of single-element vectors in ESIMD (which do not exist in SPIR-V).
  • Loading branch information
AlexeySachkov authored Jul 29, 2024
1 parent 619185f commit 0228c23
Show file tree
Hide file tree
Showing 14 changed files with 163 additions and 51 deletions.
18 changes: 18 additions & 0 deletions llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1234,6 +1234,21 @@ static Instruction *addCastInstIfNeeded(Instruction *OldI, Instruction *NewI,
return NewI;
}

// Translates the following intrinsics:
// %res = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
// %res = call double @llvm.fmuladd.f64(double %a, double %b, double %c)
// To
// %mul = fmul <type> %a, <type> %b
// %res = fadd <type> %mul, <type> %c
// TODO: Remove when newer GPU driver is used in CI.
void translateFmuladd(CallInst *CI) {
assert(CI->getIntrinsicID() == Intrinsic::fmuladd);
IRBuilder<> Bld(CI);
auto *Mul = Bld.CreateFMul(CI->getOperand(0), CI->getOperand(1));
auto *Res = Bld.CreateFAdd(Mul, CI->getOperand(2));
CI->replaceAllUsesWith(Res);
}

// Translates an LLVM intrinsic to a form, digestable by the BE.
bool translateLLVMIntrinsic(CallInst *CI) {
Function *F = CI->getCalledFunction();
Expand All @@ -1245,6 +1260,9 @@ bool translateLLVMIntrinsic(CallInst *CI) {
// no translation - it will be simply removed.
// TODO: make use of 'assume' info in the BE
break;
case Intrinsic::fmuladd:
translateFmuladd(CI);
break;
default:
return false; // "intrinsic wasn't translated, keep the original call"
}
Expand Down
14 changes: 8 additions & 6 deletions llvm/test/SYCLLowerIR/ESIMD/lower_llvm_intrin.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; RUN: opt -passes=LowerESIMD -S < %s | FileCheck %s

; This test checks that LowerESIMD pass does not lower some llvm intrinsics
; which can now be handled by the VC BE.
; This test checks that LowerESIMD pass correctly lowers some llvm intrinsics
; which can't be handled by the VC BE.
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
target triple = "spir64-unknown-unknown"

Expand All @@ -10,15 +10,17 @@ declare double @llvm.fmuladd.f64(double %x, double %y, double %z)

define spir_func float @test_fmuladd_f32(float %x, float %y, float %z) {
%1 = call float @llvm.fmuladd.f32(float %x, float %y, float %z)
; CHECK: %[[A:[0-9a-zA-Z\._]+]] = call float @llvm.fmuladd.f32(float %x, float %y, float %z)
; CHECK: %[[A:[0-9a-zA-Z\._]+]] = fmul float %x, %y
; CHECK: %[[B:[0-9a-zA-Z\._]+]] = fadd float %[[A]], %z
ret float %1
; CHECK: ret float %[[A]]
; CHECK: ret float %[[B]]
}

define spir_func double @test_fmuladd_f64(double %x, double %y, double %z) {
%1 = call double @llvm.fmuladd.f64(double %x, double %y, double %z)
; CHECK: %[[A:[0-9a-zA-Z\._]+]] = call double @llvm.fmuladd.f64(double %x, double %y, double %z)
; CHECK: %[[A:[0-9a-zA-Z\._]+]] = fmul double %x, %y
; CHECK: %[[B:[0-9a-zA-Z\._]+]] = fadd double %[[A]], %z
ret double %1
; CHECK: ret double %[[A]]
; CHECK: ret double %[[B]]
}

33 changes: 14 additions & 19 deletions sycl/include/sycl/ext/intel/esimd/detail/math_intrin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,23 +72,6 @@ template <typename T, int N>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
__spirv_ocl_native_powr(__ESIMD_raw_vec_t(T, N), __ESIMD_raw_vec_t(T, N));

template <typename T, int N>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
__spirv_ocl_fabs(__ESIMD_raw_vec_t(T, N)) __ESIMD_INTRIN_END;

template <typename T, int N>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
__spirv_ocl_s_abs(__ESIMD_raw_vec_t(T, N)) __ESIMD_INTRIN_END;

template <typename T, int N>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
__spirv_ocl_fmin(__ESIMD_raw_vec_t(T, N),
__ESIMD_raw_vec_t(T, N)) __ESIMD_INTRIN_END;

template <typename T, int N>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
__spirv_ocl_fmax(__ESIMD_raw_vec_t(T, N),
__ESIMD_raw_vec_t(T, N)) __ESIMD_INTRIN_END;
// saturation intrinsics
template <typename T0, typename T1, int SZ>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T0, SZ)
Expand Down Expand Up @@ -118,7 +101,15 @@ template <typename T0, typename T1, int SZ>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T0, SZ)
__esimd_sstrunc_sat(__ESIMD_raw_vec_t(T1, SZ) src) __ESIMD_INTRIN_END;

/// 3 kinds of max, the missing fmax uses spir-v intrinsics above
template <typename T, int SZ>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ)
__esimd_abs(__ESIMD_raw_vec_t(T, SZ) src0) __ESIMD_INTRIN_END;

/// 3 kinds of max
template <typename T, int SZ>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ)
__esimd_fmax(__ESIMD_raw_vec_t(T, SZ) src0,
__ESIMD_raw_vec_t(T, SZ) src1) __ESIMD_INTRIN_END;
template <typename T, int SZ>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ)
__esimd_umax(__ESIMD_raw_vec_t(T, SZ) src0,
Expand All @@ -128,7 +119,11 @@ __ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ)
__esimd_smax(__ESIMD_raw_vec_t(T, SZ) src0,
__ESIMD_raw_vec_t(T, SZ) src1) __ESIMD_INTRIN_END;

/// 3 kinds of min, the missing fmin uses spir-v instrinsics above
/// 3 kinds of min
template <typename T, int SZ>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ)
__esimd_fmin(__ESIMD_raw_vec_t(T, SZ) src0,
__ESIMD_raw_vec_t(T, SZ) src1) __ESIMD_INTRIN_END;
template <typename T, int SZ>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ)
__esimd_umin(__ESIMD_raw_vec_t(T, SZ) src0,
Expand Down
14 changes: 5 additions & 9 deletions sycl/include/sycl/ext/intel/esimd/math.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,7 @@ namespace detail {
template <typename TRes, typename TArg, int SZ>
ESIMD_NODEBUG ESIMD_INLINE simd<TRes, SZ>
__esimd_abs_common_internal(simd<TArg, SZ> src0) {
simd<TArg, SZ> Result;
if constexpr (detail::is_generic_floating_point_v<TArg>)
Result = simd<TArg, SZ>(__spirv_ocl_fabs<TArg, SZ>(src0.data()));
else
Result = simd<TArg, SZ>(__spirv_ocl_s_abs<TArg, SZ>(src0.data()));
simd<TArg, SZ> Result = simd<TArg, SZ>(__esimd_abs<TArg, SZ>(src0.data()));
return convert<TRes>(Result);
}

Expand Down Expand Up @@ -185,7 +181,7 @@ __ESIMD_API simd<T, SZ>(max)(simd<T, SZ> src0, simd<T, SZ> src1, Sat sat = {}) {
constexpr bool is_sat = std::is_same_v<Sat, saturation_on_tag>;

if constexpr (std::is_floating_point<T>::value) {
auto Result = __spirv_ocl_fmax<T, SZ>(src0.data(), src1.data());
auto Result = __esimd_fmax<T, SZ>(src0.data(), src1.data());
if constexpr (is_sat)
Result = __esimd_sat<T, T, SZ>(Result);
return simd<T, SZ>(Result);
Expand Down Expand Up @@ -270,7 +266,7 @@ __ESIMD_API simd<T, SZ>(min)(simd<T, SZ> src0, simd<T, SZ> src1, Sat sat = {}) {
constexpr bool is_sat = std::is_same_v<Sat, saturation_on_tag>;

if constexpr (std::is_floating_point<T>::value) {
auto Result = __spirv_ocl_fmin<T, SZ>(src0.data(), src1.data());
auto Result = __esimd_fmin<T, SZ>(src0.data(), src1.data());
if constexpr (is_sat)
Result = __esimd_sat<T, T, SZ>(Result);
return simd<T, SZ>(Result);
Expand Down Expand Up @@ -1466,7 +1462,7 @@ template <typename T0, typename T1, int SZ> struct esimd_apply_reduced_max {
template <typename... T>
simd<T0, SZ> operator()(simd<T1, SZ> v1, simd<T1, SZ> v2) {
if constexpr (std::is_floating_point<T1>::value) {
return __spirv_ocl_fmax<T1, SZ>(v1.data(), v2.data());
return __esimd_fmax<T1, SZ>(v1.data(), v2.data());
} else if constexpr (std::is_unsigned<T1>::value) {
return __esimd_umax<T1, SZ>(v1.data(), v2.data());
} else {
Expand All @@ -1479,7 +1475,7 @@ template <typename T0, typename T1, int SZ> struct esimd_apply_reduced_min {
template <typename... T>
simd<T0, SZ> operator()(simd<T1, SZ> v1, simd<T1, SZ> v2) {
if constexpr (std::is_floating_point<T1>::value) {
return __spirv_ocl_fmin<T1, SZ>(v1.data(), v2.data());
return __esimd_fmin<T1, SZ>(v1.data(), v2.data());
} else if constexpr (std::is_unsigned<T1>::value) {
return __esimd_umin<T1, SZ>(v1.data(), v2.data());
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,19 +112,8 @@ __ESIMD_INTRIN __ESIMD_raw_vec_t(sycl::half, N)

template <typename T, int N>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
__spirv_ocl_fma(__ESIMD_raw_vec_t(T, N) a, __ESIMD_raw_vec_t(T, N) b,
__ESIMD_raw_vec_t(T, N) c) __ESIMD_INTRIN_END;
template <typename T, int N>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
__spirv_ocl_popcount(__ESIMD_raw_vec_t(T, N) src0) __ESIMD_INTRIN_END;

template <typename T, int N>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
__spirv_ocl_ctz(__ESIMD_raw_vec_t(T, N) src0) __ESIMD_INTRIN_END;

template <typename T, int N>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
__spirv_ocl_clz(__ESIMD_raw_vec_t(T, N) src0) __ESIMD_INTRIN_END;
__esimd_fmadd(__ESIMD_raw_vec_t(T, N) a, __ESIMD_raw_vec_t(T, N) b,
__ESIMD_raw_vec_t(T, N) c) __ESIMD_INTRIN_END;

#undef __ESIMD_raw_vec_t
#undef __ESIMD_cpp_vec_t
Expand Down
20 changes: 16 additions & 4 deletions sycl/include/sycl/ext/intel/experimental/esimd/math.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,11 @@ template <typename T, int N>
__ESIMD_API std::enable_if_t<std::is_integral_v<T> && sizeof(T) < 8,
__ESIMD_NS::simd<T, N>>
popcount(__ESIMD_NS::simd<T, N> vec) {
return __spirv_ocl_popcount<T, N>(vec.data());
#ifdef __SYCL_DEVICE_ONLY__
return __spirv_ocl_popcount(vec.data());
#else
return vec;
#endif
}

/// Count the number of leading zeros.
Expand All @@ -44,7 +48,11 @@ template <typename T, int N>
__ESIMD_API std::enable_if_t<std::is_integral_v<T> && sizeof(T) < 8,
__ESIMD_NS::simd<T, N>>
clz(__ESIMD_NS::simd<T, N> vec) {
return __spirv_ocl_clz<T, N>(vec.data());
#ifdef __SYCL_DEVICE_ONLY__
return __spirv_ocl_clz(vec.data());
#else
return vec;
#endif
}

/// Count the number of trailing zeros.
Expand All @@ -55,7 +63,11 @@ template <typename T, int N>
__ESIMD_API std::enable_if_t<std::is_integral_v<T> && sizeof(T) < 8,
__ESIMD_NS::simd<T, N>>
ctz(__ESIMD_NS::simd<T, N> vec) {
return __spirv_ocl_ctz<T, N>(vec.data());
#ifdef __SYCL_DEVICE_ONLY__
return __spirv_ocl_ctz(vec.data());
#else
return vec;
#endif
}

/// @} sycl_esimd_bitmanip
Expand Down Expand Up @@ -740,7 +752,7 @@ ESIMD_INLINE __ESIMD_NS::simd<T, N> fma(__ESIMD_NS::simd<T, N> a,
static_assert(__ESIMD_DNS::is_generic_floating_point_v<T>,
"fma only supports floating point types");
using CppT = __ESIMD_DNS::element_type_traits<T>::EnclosingCppT;
auto Ret = __spirv_ocl_fma<__ESIMD_DNS::__raw_t<CppT>, N>(
auto Ret = __esimd_fmadd<__ESIMD_DNS::__raw_t<CppT>, N>(
__ESIMD_DNS::convert_vector<CppT, T, N>(a.data()),
__ESIMD_DNS::convert_vector<CppT, T, N>(b.data()),
__ESIMD_DNS::convert_vector<CppT, T, N>(c.data()));
Expand Down
11 changes: 11 additions & 0 deletions sycl/test/regression/esimd/abs.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// RUN: %clangxx -fsycl -fsyntax-only %s

#include <sycl/sycl.hpp>

#include <sycl/ext/intel/esimd.hpp>

SYCL_EXTERNAL sycl::vec<int, 8> call_abs_vec(sycl::vec<int, 8> input) {
return sycl::abs(input);
}

SYCL_EXTERNAL int call_abs_scalar(int input) { return sycl::abs(input); }
11 changes: 11 additions & 0 deletions sycl/test/regression/esimd/clz.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// RUN: %clangxx -fsycl -fsyntax-only %s

#include <sycl/sycl.hpp>

#include <sycl/ext/intel/esimd.hpp>

SYCL_EXTERNAL sycl::vec<int, 8> call_clz_vec(sycl::vec<int, 8> input) {
return sycl::clz(input);
}

SYCL_EXTERNAL int call_clz_scalar(int input) { return sycl::clz(input); }
11 changes: 11 additions & 0 deletions sycl/test/regression/esimd/ctz.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// RUN: %clangxx -fsycl -fsyntax-only %s

#include <sycl/sycl.hpp>

#include <sycl/ext/intel/esimd.hpp>

SYCL_EXTERNAL sycl::vec<int, 8> call_ctz_vec(sycl::vec<int, 8> input) {
return sycl::ctz(input);
}

SYCL_EXTERNAL int call_ctz_scalar(int input) { return sycl::ctz(input); }
11 changes: 11 additions & 0 deletions sycl/test/regression/esimd/fabs.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// RUN: %clangxx -fsycl -fsyntax-only %s

#include <sycl/sycl.hpp>

#include <sycl/ext/intel/esimd.hpp>

SYCL_EXTERNAL sycl::vec<float, 8> call_fabs_vec(sycl::vec<float, 8> input) {
return sycl::fabs(input);
}

SYCL_EXTERNAL float call_fabs_scalar(float input) { return sycl::fabs(input); }
15 changes: 15 additions & 0 deletions sycl/test/regression/esimd/fma.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// RUN: %clangxx -fsycl -fsyntax-only %s

#include <sycl/sycl.hpp>

#include <sycl/ext/intel/esimd.hpp>

SYCL_EXTERNAL sycl::vec<float, 8> call_fma_vec(sycl::vec<float, 8> a,
sycl::vec<float, 8> b,
sycl::vec<float, 8> c) {
return sycl::fma(a, b, c);
}

SYCL_EXTERNAL float call_fma_scalar(float a, float b, float c) {
return sycl::fma(a, b, c);
}
14 changes: 14 additions & 0 deletions sycl/test/regression/esimd/fmax.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// RUN: %clangxx -fsycl -fsyntax-only %s

#include <sycl/sycl.hpp>

#include <sycl/ext/intel/esimd.hpp>

SYCL_EXTERNAL sycl::vec<float, 8> call_fmax_vec(sycl::vec<float, 8> a,
sycl::vec<float, 8> b) {
return sycl::fmax(a, b);
}

SYCL_EXTERNAL float call_fmax_scalar(float a, float b) {
return sycl::fmax(a, b);
}
14 changes: 14 additions & 0 deletions sycl/test/regression/esimd/fmin.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// RUN: %clangxx -fsycl -fsyntax-only %s

#include <sycl/sycl.hpp>

#include <sycl/ext/intel/esimd.hpp>

SYCL_EXTERNAL sycl::vec<float, 8> call_fmin_vec(sycl::vec<float, 8> a,
sycl::vec<float, 8> b) {
return sycl::fmin(a, b);
}

SYCL_EXTERNAL float call_fmin_scalar(float a, float b) {
return sycl::fmin(a, b);
}
13 changes: 13 additions & 0 deletions sycl/test/regression/esimd/popcount.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// RUN: %clangxx -fsycl -fsyntax-only %s

#include <sycl/sycl.hpp>

#include <sycl/ext/intel/esimd.hpp>

SYCL_EXTERNAL sycl::vec<int, 8> call_popcount_vec(sycl::vec<int, 8> input) {
return sycl::popcount(input);
}

SYCL_EXTERNAL int call_popcount_scalar(int input) {
return sycl::popcount(input);
}

0 comments on commit 0228c23

Please sign in to comment.