diff --git a/sycl/include/sycl/detail/builtins.hpp b/sycl/include/sycl/detail/builtins.hpp index d0d76cc1c6925..99fc593156d06 100644 --- a/sycl/include/sycl/detail/builtins.hpp +++ b/sycl/include/sycl/detail/builtins.hpp @@ -9,7 +9,7 @@ #pragma once #include // for __SYCL_ALWAYS_INLINE -#include // for convertDataToType +#include // for to/from OpenCLType converts #include // TODO Decide whether to mark functions with this attribute. @@ -18,27 +18,31 @@ #ifdef __SYCL_DEVICE_ONLY__ #define __FUNC_PREFIX_OCL __spirv_ocl_ #define __FUNC_PREFIX_CORE __spirv_ -#define __SYCL_EXTERN_IT1(Ret, prefix, call, arg1) -#define __SYCL_EXTERN_IT2(Ret, prefix, call, arg1, arg2) -#define __SYCL_EXTERN_IT2_SAME(Ret, prefix, call, arg) -#define __SYCL_EXTERN_IT3(Ret, prefix, call, Arg1, Arg2, Arg3) +#define __SYCL_EXTERN_IT1(R, prefix, call, arg1) +#define __SYCL_EXTERN_IT2(R, prefix, call, arg1, arg2) +#define __SYCL_EXTERN_IT2_SAME(R, prefix, call, arg) +#define __SYCL_EXTERN_IT3(R, prefix, call, Arg1, Arg2, Arg3) #else #define __FUNC_PREFIX_OCL sycl_host_ #define __FUNC_PREFIX_CORE sycl_host_ -#define __SYCL_EXTERN_IT1(Ret, prefix, call, arg) \ +#define __SYCL_EXTERN_IT1(R, prefix, call, arg) \ using Arg = decltype(arg); \ + using Ret = sycl::detail::ConvertToOpenCLType_t; \ extern Ret __SYCL_PPCAT(prefix, call)(Arg) -#define __SYCL_EXTERN_IT2_SAME(Ret, prefix, call, arg) \ +#define __SYCL_EXTERN_IT2_SAME(R, prefix, call, arg) \ using Arg = decltype(arg); \ + using Ret = sycl::detail::ConvertToOpenCLType_t; \ extern Ret __SYCL_PPCAT(prefix, call)(Arg, Arg) -#define __SYCL_EXTERN_IT2(Ret, prefix, call, arg1, arg2) \ +#define __SYCL_EXTERN_IT2(R, prefix, call, arg1, arg2) \ using Arg1 = decltype(arg1); \ using Arg2 = decltype(arg2); \ + using Ret = sycl::detail::ConvertToOpenCLType_t; \ extern Ret __SYCL_PPCAT(prefix, call)(Arg1, Arg2) -#define __SYCL_EXTERN_IT3(Ret, prefix, call, arg1, arg2, arg3) \ +#define __SYCL_EXTERN_IT3(R, prefix, call, arg1, arg2, arg3) \ using Arg1 = decltype(arg1); \ using Arg2 = decltype(arg2); \ using Arg3 = decltype(arg3); \ + using Ret = sycl::detail::ConvertToOpenCLType_t; \ extern Ret __SYCL_PPCAT(prefix, call)(Arg1, Arg2, Arg3) #endif @@ -49,10 +53,9 @@ template \ inline __SYCL_ALWAYS_INLINE R __invoke_##call(T1 t1) __NOEXC { \ auto arg1 = sycl::detail::convertToOpenCLType(std::move(t1)); \ - using Ret = sycl::detail::ConvertToOpenCLType_t; \ - __SYCL_EXTERN_IT1(Ret, prefix, call, arg1); \ - Ret ret = __SYCL_PPCAT(prefix, call)(std::move(arg1)); \ - return sycl::detail::convertDataToType(std::move(ret)); \ + __SYCL_EXTERN_IT1(R, prefix, call, arg1); \ + return sycl::detail::convertFromOpenCLTypeFor( \ + __SYCL_PPCAT(prefix, call)(std::move(arg1))); \ } #define __SYCL_MAKE_CALL_ARG2(call, prefix) \ @@ -60,10 +63,9 @@ inline __SYCL_ALWAYS_INLINE R __invoke_##call(T1 t1, T2 t2) __NOEXC { \ auto arg1 = sycl::detail::convertToOpenCLType(std::move(t1)); \ auto arg2 = sycl::detail::convertToOpenCLType(std::move(t2)); \ - using Ret = sycl::detail::ConvertToOpenCLType_t; \ - __SYCL_EXTERN_IT2(Ret, prefix, call, arg1, arg2); \ - Ret ret = __SYCL_PPCAT(prefix, call)(std::move(arg1), std::move(arg2)); \ - return sycl::detail::convertDataToType(std::move(ret)); \ + __SYCL_EXTERN_IT2(R, prefix, call, arg1, arg2); \ + return sycl::detail::convertFromOpenCLTypeFor( \ + __SYCL_PPCAT(prefix, call)(std::move(arg1), std::move(arg2))); \ } #define __SYCL_MAKE_CALL_ARG2_SAME(call, prefix) \ @@ -71,10 +73,9 @@ inline __SYCL_ALWAYS_INLINE R __invoke_##call(T t1, T t2) __NOEXC { \ auto arg1 = sycl::detail::convertToOpenCLType(std::move(t1)); \ auto arg2 = sycl::detail::convertToOpenCLType(std::move(t2)); \ - using Ret = sycl::detail::ConvertToOpenCLType_t; \ - __SYCL_EXTERN_IT2_SAME(Ret, prefix, call, arg1); \ - Ret ret = __SYCL_PPCAT(prefix, call)(std::move(arg1), std::move(arg2)); \ - return sycl::detail::convertDataToType(std::move(ret)); \ + __SYCL_EXTERN_IT2_SAME(R, prefix, call, arg1); \ + return sycl::detail::convertFromOpenCLTypeFor( \ + __SYCL_PPCAT(prefix, call)(std::move(arg1), std::move(arg2))); \ } #define __SYCL_MAKE_CALL_ARG2_SAME_RESULT(call, prefix) \ @@ -82,10 +83,9 @@ inline __SYCL_ALWAYS_INLINE T __invoke_##call(T v1, T v2) __NOEXC { \ auto arg1 = sycl::detail::convertToOpenCLType(std::move(v1)); \ auto arg2 = sycl::detail::convertToOpenCLType(std::move(v2)); \ - using Type = decltype(arg1); \ - __SYCL_EXTERN_IT2_SAME(Type, prefix, call, arg1); \ - Type ret = __SYCL_PPCAT(prefix, call)(std::move(arg1), std::move(arg2)); \ - return sycl::detail::convertDataToType(std::move(ret)); \ + __SYCL_EXTERN_IT2_SAME(T, prefix, call, arg1); \ + return sycl::detail::convertFromOpenCLTypeFor( \ + __SYCL_PPCAT(prefix, call)(std::move(arg1), std::move(arg2))); \ } #define __SYCL_MAKE_CALL_ARG3(call, prefix) \ @@ -94,11 +94,9 @@ auto arg1 = sycl::detail::convertToOpenCLType(std::move(t1)); \ auto arg2 = sycl::detail::convertToOpenCLType(std::move(t2)); \ auto arg3 = sycl::detail::convertToOpenCLType(std::move(t3)); \ - using Ret = sycl::detail::ConvertToOpenCLType_t; \ - __SYCL_EXTERN_IT3(Ret, prefix, call, arg1, arg2, arg3); \ - Ret ret = __SYCL_PPCAT(prefix, call)(std::move(arg1), std::move(arg2), \ - std::move(arg3)); \ - return sycl::detail::convertDataToType(std::move(ret)); \ + __SYCL_EXTERN_IT3(R, prefix, call, arg1, arg2, arg3); \ + return sycl::detail::convertFromOpenCLTypeFor(__SYCL_PPCAT( \ + prefix, call)(std::move(arg1), std::move(arg2), std::move(arg3))); \ } #ifndef __SYCL_DEVICE_ONLY__ diff --git a/sycl/include/sycl/detail/generic_type_traits.hpp b/sycl/include/sycl/detail/generic_type_traits.hpp index ea36fcf7cd669..7cf893778394c 100644 --- a/sycl/include/sycl/detail/generic_type_traits.hpp +++ b/sycl/include/sycl/detail/generic_type_traits.hpp @@ -704,6 +704,18 @@ template auto convertToOpenCLType(T &&x) { return convertDataToType(std::forward(x)); } +template auto convertFromOpenCLTypeFor(From &&x) { + if constexpr (std::is_same_v && + std::is_same_v, bool>) { + // FIXME: Something seems to be wrong elsewhere... + return x; + } else { + static_assert(std::is_same_v, + ConvertToOpenCLType_t>); + return convertDataToType(std::forward(x)); + } +} + // Used for all, any and select relational built-in functions template inline constexpr T msbMask(T) { using UT = make_unsigned_t; diff --git a/sycl/include/sycl/detail/image_ocl_types.hpp b/sycl/include/sycl/detail/image_ocl_types.hpp index 57f17a33bd13f..95de2eabd1042 100644 --- a/sycl/include/sycl/detail/image_ocl_types.hpp +++ b/sycl/include/sycl/detail/image_ocl_types.hpp @@ -47,9 +47,8 @@ template struct sampled_opencl_image_type; #define __SYCL_INVOKE_SPIRV_CALL_ARG1(call) \ template inline R __invoke_##call(T1 ParT1) { \ using Ret = sycl::detail::ConvertToOpenCLType_t; \ - T1 Arg1 = ParT1; \ - Ret RetVar = __spirv_##call(Arg1); \ - return sycl::detail::convertDataToType(RetVar); \ + return sycl::detail::convertFromOpenCLTypeFor( \ + __spirv_##call(ParT1)); \ } // The macro defines the function __invoke_ImageXXXX, @@ -74,12 +73,10 @@ static RetType __invoke__ImageRead(ImageT Img, CoordT Coords) { // Convert from sycl types to builtin types to get correct function mangling. using TempRetT = sycl::detail::ConvertToOpenCLType_t; - auto TmpCoords = sycl::detail::convertToOpenCLType(Coords); - TempRetT Ret = - __spirv_ImageRead(Img, TmpCoords); - return sycl::detail::convertDataToType(Ret); + return sycl::detail::convertFromOpenCLTypeFor( + __spirv_ImageRead(Img, TmpCoords)); } template @@ -98,10 +95,9 @@ static RetType __invoke__ImageReadLod(SmpImageT SmpImg, CoordT Coords, // Sampled Image must be an object whose type is OpTypeSampledImage // Image Operands encodes what operands follow. Either Lod // or Grad image operands must be present - TempRetT Ret = + return sycl::detail::convertFromOpenCLTypeFor( __spirv_ImageSampleExplicitLod( - SmpImg, TmpCoords, ImageOperands::Lod, Level); - return sycl::detail::convertDataToType(Ret); + SmpImg, TmpCoords, ImageOperands::Lod, Level)); } template @@ -122,10 +118,9 @@ static RetType __invoke__ImageReadGrad(SmpImageT SmpImg, CoordT Coords, // Sampled Image must be an object whose type is OpTypeSampledImage // Image Operands encodes what operands follow. Either Lod // or Grad image operands must be present - TempRetT Ret = + return sycl::detail::convertFromOpenCLTypeFor( __spirv_ImageSampleExplicitLod( - SmpImg, TmpCoords, ImageOperands::Grad, TmpGraddX, TmpGraddY); - return sycl::detail::convertDataToType(Ret); + SmpImg, TmpCoords, ImageOperands::Grad, TmpGraddX, TmpGraddY)); } template @@ -150,11 +145,10 @@ static RetType __invoke__ImageReadSampler(ImageT Img, CoordT Coords, enum ImageOperands { Lod = 0x2 }; // Lod value is zero as mipmap is not supported. - TempRetT Ret = + return sycl::detail::convertFromOpenCLTypeFor( __spirv_ImageSampleExplicitLod( __spirv_SampledImage(Img, Smpl), TmpCoords, - ImageOperands::Lod, 0.0f); - return sycl::detail::convertDataToType(Ret); + ImageOperands::Lod, 0.0f)); } namespace sycl { diff --git a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp index 3054e661b4a32..26ff67e54a2a8 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp @@ -95,8 +95,7 @@ inline __SYCL_ALWAYS_INLINE std::enable_if_t< sycl::detail::is_svgenfloatf_v || sycl::detail::is_svgenfloath_v, T> tanh(T x) __NOEXC { #if defined(__NVPTX__) - using _ocl_T = sycl::detail::ConvertToOpenCLType_t; - return sycl::detail::convertDataToType<_ocl_T, T>( + return sycl::detail::convertFromOpenCLTypeFor( __clc_native_tanh(sycl::detail::convertToOpenCLType(x))); #else return __sycl_std::__invoke_tanh(x); @@ -145,8 +144,7 @@ inline __SYCL_ALWAYS_INLINE std::enable_if_t, T> exp2(T x) __NOEXC { #if defined(__NVPTX__) - using _ocl_T = sycl::detail::ConvertToOpenCLType_t; - return sycl::detail::convertDataToType<_ocl_T, T>( + return sycl::detail::convertFromOpenCLTypeFor( __clc_native_exp2(sycl::detail::convertToOpenCLType(x))); #else return __sycl_std::__invoke_exp2(x);