From 3f28fe548749a0b2444cb2cbc31ebdd6d26780d1 Mon Sep 17 00:00:00 2001 From: Marya Sharf Date: Fri, 5 Jul 2024 03:42:23 +0100 Subject: [PATCH 01/14] WIP: Add support for lround --- libclc/amdgcn-amdhsa/libspirv/SOURCES | 1 + libclc/amdgcn-amdhsa/libspirv/math/lround.cl | 30 +++++++++++++++++++ libclc/generic/lib/SOURCES | 1 + libclc/generic/lib/math/lround.cl | 8 +++++ libclc/generic/libspirv/SOURCES | 1 + libclc/generic/libspirv/math/lround.cl | 19 ++++++++++++ libclc/ptx-nvidiacl/include/libdevice.h | 2 ++ libclc/ptx-nvidiacl/libspirv/SOURCES | 1 + libclc/ptx-nvidiacl/libspirv/math/lround.cl | 17 +++++++++++ libdevice/cmath_wrapper.cpp | 3 ++ libdevice/cmath_wrapper_fp64.cpp | 3 ++ libdevice/device_math.h | 6 ++++ libdevice/fallback-cmath-fp64.cpp | 3 ++ libdevice/fallback-cmath.cpp | 3 ++ .../sycl/detail/builtins/math_functions.inc | 1 + sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp | 5 ++-- sycl/test-e2e/DeviceLib/cmath_test.cpp | 5 ++-- 17 files changed, 105 insertions(+), 4 deletions(-) create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/lround.cl create mode 100644 libclc/generic/lib/math/lround.cl create mode 100644 libclc/generic/libspirv/math/lround.cl create mode 100644 libclc/ptx-nvidiacl/libspirv/math/lround.cl diff --git a/libclc/amdgcn-amdhsa/libspirv/SOURCES b/libclc/amdgcn-amdhsa/libspirv/SOURCES index 2d334ee4e326e..0681454a03ae8 100644 --- a/libclc/amdgcn-amdhsa/libspirv/SOURCES +++ b/libclc/amdgcn-amdhsa/libspirv/SOURCES @@ -50,6 +50,7 @@ math/log2.cl math/log10.cl math/log1p.cl math/logb.cl +math/lround.cl math/modf.cl math/nextafter.cl math/pow.cl diff --git a/libclc/amdgcn-amdhsa/libspirv/math/lround.cl b/libclc/amdgcn-amdhsa/libspirv/math/lround.cl new file mode 100644 index 0000000000000..e936c5d35f610 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/lround.cl @@ -0,0 +1,30 @@ +//===----------------------------------------------------------------------===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. + // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + // + //===----------------------------------------------------------------------===// + + #include + #include + + #define __CLC_FUNCTION __spirv_ocl_lround + #define __CLC_BUILTIN __ocml_lround + + float __ocml_lround_f32(float); + #define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) + + #ifdef cl_khr_fp64 + #pragma OPENCL EXTENSION cl_khr_fp64 : enable + double __ocml_lround_f64(double); + #define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) + #endif // cl_khr_fp64 + + #ifdef cl_khr_fp16 + #pragma OPENCL EXTENSION cl_khr_fp16 : enable + half __ocml_lround_f16(half); + #define __CLC_BUILTIN_H __CLC_XCONCAT(__CLC_BUILTIN, _f16) + #endif // cl_khr_fp16 + + #include diff --git a/libclc/generic/lib/SOURCES b/libclc/generic/lib/SOURCES index 0a7558459be24..a9e618393a38e 100644 --- a/libclc/generic/lib/SOURCES +++ b/libclc/generic/lib/SOURCES @@ -132,6 +132,7 @@ math/log10.cl math/log1p.cl math/log2.cl math/logb.cl +math/lround.cl math/mad.cl math/maxmag.cl math/minmag.cl diff --git a/libclc/generic/lib/math/lround.cl b/libclc/generic/lib/math/lround.cl new file mode 100644 index 0000000000000..c840a945abd56 --- /dev/null +++ b/libclc/generic/lib/math/lround.cl @@ -0,0 +1,8 @@ +#include + #include + + #include + + #define __CLC_BUILTIN __spirv_ocl_lround + #define __CLC_FUNCTION lround + #include diff --git a/libclc/generic/libspirv/SOURCES b/libclc/generic/libspirv/SOURCES index a222a1f7281a3..6af573fd6bd63 100644 --- a/libclc/generic/libspirv/SOURCES +++ b/libclc/generic/libspirv/SOURCES @@ -143,6 +143,7 @@ math/log10.cl math/log1p.cl math/log2.cl math/logb.cl +math/lround.cl math/mad.cl math/maxmag.cl math/minmag.cl diff --git a/libclc/generic/libspirv/math/lround.cl b/libclc/generic/libspirv/math/lround.cl new file mode 100644 index 0000000000000..6289dd022ee5f --- /dev/null +++ b/libclc/generic/libspirv/math/lround.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. + // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + // + //===----------------------------------------------------------------------===// + + #include + #include + + // Map the llvm intrinsic to an OpenCL function. + #define __CLC_FUNCTION __clc___spirv_ocl_lround + #define __CLC_INTRINSIC "llvm.lround" + #include + + #undef __CLC_FUNCTION + #define __CLC_FUNCTION __spirv_ocl_lround + #include diff --git a/libclc/ptx-nvidiacl/include/libdevice.h b/libclc/ptx-nvidiacl/include/libdevice.h index 5b1c8f6e9bee4..55924942e81aa 100644 --- a/libclc/ptx-nvidiacl/include/libdevice.h +++ b/libclc/ptx-nvidiacl/include/libdevice.h @@ -260,6 +260,8 @@ double __nv_logb(double); float __nv_logbf(float); float __nv_logf(float); double __nv_longlong_as_double(long); +long int __nv_lround(double); +long int __nv_lroundf(float); int __nv_max(int, int); int __nv_min(int, int); double __nv_modf(double, double *); diff --git a/libclc/ptx-nvidiacl/libspirv/SOURCES b/libclc/ptx-nvidiacl/libspirv/SOURCES index 569da16923c6b..3b528fccc48f2 100644 --- a/libclc/ptx-nvidiacl/libspirv/SOURCES +++ b/libclc/ptx-nvidiacl/libspirv/SOURCES @@ -42,6 +42,7 @@ math/log10.cl math/log1p.cl math/log2.cl math/logb.cl +math/lround.cl math/modf.cl math/native_cos.cl math/native_divide.cl diff --git a/libclc/ptx-nvidiacl/libspirv/math/lround.cl b/libclc/ptx-nvidiacl/libspirv/math/lround.cl new file mode 100644 index 0000000000000..b6240ae5dd0dd --- /dev/null +++ b/libclc/ptx-nvidiacl/libspirv/math/lround.cl @@ -0,0 +1,17 @@ +//===----------------------------------------------------------------------===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. + // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + // + //===----------------------------------------------------------------------===// + + #include + + #include "../../include/libdevice.h" + #include + + #define __CLC_FUNCTION __spirv_ocl_lround + #define __CLC_BUILTIN __nv_lround + #define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, f) + #include diff --git a/libdevice/cmath_wrapper.cpp b/libdevice/cmath_wrapper.cpp index cf40373a90efb..8576dfb085a06 100644 --- a/libdevice/cmath_wrapper.cpp +++ b/libdevice/cmath_wrapper.cpp @@ -39,6 +39,9 @@ extern "C" SYCL_EXTERNAL float __devicelib_fminf(float, float); DEVICE_EXTERN_C_INLINE float fminf(float x, float y) { return __devicelib_fminf(x, y); } +DEVICE_EXTERN_C_INLINE +double lround(float x) { return __devicelib_lround(x); } + DEVICE_EXTERN_C_INLINE float truncf(float x) { return __devicelib_truncf(x); } diff --git a/libdevice/cmath_wrapper_fp64.cpp b/libdevice/cmath_wrapper_fp64.cpp index bfc1a122f0f18..5d2a8466375cf 100644 --- a/libdevice/cmath_wrapper_fp64.cpp +++ b/libdevice/cmath_wrapper_fp64.cpp @@ -54,6 +54,9 @@ double log(double x) { return __devicelib_log(x); } DEVICE_EXTERN_C_INLINE double round(double x) { return __devicelib_round(x); } +DEVICE_EXTERN_C_INLINE +double lround(double x) { return __devicelib_lround(x); } + DEVICE_EXTERN_C_INLINE double floor(double x) { return __devicelib_floor(x); } diff --git a/libdevice/device_math.h b/libdevice/device_math.h index 01085013dae57..1dd1c674d58f3 100644 --- a/libdevice/device_math.h +++ b/libdevice/device_math.h @@ -52,6 +52,12 @@ float __devicelib_ceilf(float x); DEVICE_EXTERN_C double __devicelib_ceil(double x); +DEVICE_EXTERN_C +long int __devicelib_lround(double x); + +DEVICE_EXTERN_C +long int __devicelib_lroundf(float x); + DEVICE_EXTERN_C float __devicelib_copysignf(float x, float y); diff --git a/libdevice/fallback-cmath-fp64.cpp b/libdevice/fallback-cmath-fp64.cpp index 49832ef966b5f..dc94bb22ada76 100644 --- a/libdevice/fallback-cmath-fp64.cpp +++ b/libdevice/fallback-cmath-fp64.cpp @@ -71,6 +71,9 @@ double __devicelib_modf(double x, double *intpart) { return __spirv_ocl_modf(x, intpart); } +DEVICE_EXTERN_C_INLINE +long int __devicelib_lround(double x) { return __spirv_ocl_lround(x); } + DEVICE_EXTERN_C_INLINE double __devicelib_round(double x) { return __spirv_ocl_round(x); } diff --git a/libdevice/fallback-cmath.cpp b/libdevice/fallback-cmath.cpp index 6289126272da4..90b1d90444e86 100644 --- a/libdevice/fallback-cmath.cpp +++ b/libdevice/fallback-cmath.cpp @@ -36,6 +36,9 @@ float __devicelib_copysignf(float x, float y) { return __spirv_ocl_copysign(x, y); } +DEVICE_EXTERN_C_INLINE +long int __devicelib_lroundf(float x) { return __spirv_ocl_lround(x); } + DEVICE_EXTERN_C_INLINE float __devicelib_cospif(float x) { return __spirv_ocl_cospi(x); } diff --git a/sycl/include/sycl/detail/builtins/math_functions.inc b/sycl/include/sycl/detail/builtins/math_functions.inc index 8a5ff1b1e47ab..7a7610c0865f6 100644 --- a/sycl/include/sycl/detail/builtins/math_functions.inc +++ b/sycl/include/sycl/detail/builtins/math_functions.inc @@ -141,6 +141,7 @@ BUILTIN_GENF(ONE_ARG, floor) BUILTIN_GENF(ONE_ARG, lgamma) BUILTIN_GENF(ONE_ARG, log1p) BUILTIN_GENF(ONE_ARG, logb) +BUILTIN_GENF(ONE_ARG, lround) BUILTIN_GENF(ONE_ARG, rint) BUILTIN_GENF(ONE_ARG, round) BUILTIN_GENF(ONE_ARG, sinh) diff --git a/sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp b/sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp index c4ce47cb79991..b7d234b335771 100644 --- a/sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp +++ b/sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp @@ -20,10 +20,10 @@ namespace s = sycl; constexpr s::access::mode sycl_read = s::access::mode::read; constexpr s::access::mode sycl_write = s::access::mode::write; -#define TEST_NUM 73 +#define TEST_NUM 74 double ref[TEST_NUM] = { - 100, 0.5, 1.0, 0, 0, -2, 1, 2, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, + 3, 100, 0.5, 1.0, 0, 0, -2, 1, 2, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0.5, 0, 2, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, 1, 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; @@ -61,6 +61,7 @@ template void device_cmath_test(s::queue &deviceQueue) { T minus_infinity = -INFINITY; double subnormal; *((uint64_t *)&subnormal) = 0xFFFFFFFFFFFFFULL; + res_access[i++] = std::lround(2.5); res_access[i++] = sycl::exp10(2.0); res_access[i++] = sycl::rsqrt(4.0); res_access[i++] = std::trunc(1.3); diff --git a/sycl/test-e2e/DeviceLib/cmath_test.cpp b/sycl/test-e2e/DeviceLib/cmath_test.cpp index 54e0c14d08bfa..230f37cec354f 100644 --- a/sycl/test-e2e/DeviceLib/cmath_test.cpp +++ b/sycl/test-e2e/DeviceLib/cmath_test.cpp @@ -22,9 +22,9 @@ namespace s = sycl; constexpr s::access::mode sycl_read = s::access::mode::read; constexpr s::access::mode sycl_write = s::access::mode::write; -#define TEST_NUM 70 +#define TEST_NUM 71 -float ref[TEST_NUM] = {100, 0.5, 1.0, 0, 0, -2, 1, 2, 1, 1, 0, 1, 1, 0, +float ref[TEST_NUM] = {3, 100, 0.5, 1.0, 0, 0, -2, 1, 2, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0.5, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, 1, 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, 0, 0, 0, 0, 0, 0, @@ -58,6 +58,7 @@ template void device_cmath_test_1(s::queue &deviceQueue) { float subnormal; *((uint32_t *)&subnormal) = 0x7FFFFF; + res_access[i++] = std::lround(2.5f); res_access[i++] = sycl::exp10(2.0f); res_access[i++] = sycl::rsqrt(4.0f); res_access[i++] = std::trunc(1.2f); From 0f3b5bd032182c9887293f90c482a0c4459b022e Mon Sep 17 00:00:00 2001 From: Marya Sharf Date: Fri, 5 Jul 2024 16:08:08 +0100 Subject: [PATCH 02/14] update --- libclc/generic/include/spirv/spirv_builtins.h | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/libclc/generic/include/spirv/spirv_builtins.h b/libclc/generic/include/spirv/spirv_builtins.h index 5812039a0e73b..700e6821662dd 100644 --- a/libclc/generic/include/spirv/spirv_builtins.h +++ b/libclc/generic/include/spirv/spirv_builtins.h @@ -17035,6 +17035,49 @@ _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp16_t __spirv_ocl_round(__clc_vec16_fp16_t); #endif +_CLC_OVERLOAD +_CLC_DECL _CLC_CONSTFN __clc_fp32_t __spirv_ocl_lround(__clc_fp32_t); +_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp32_t + __spirv_ocl_lround(__clc_vec2_fp32_t); +_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec3_fp32_t + __spirv_ocl_lround(__clc_vec3_fp32_t); +_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec4_fp32_t + __spirv_ocl_lround(__clc_vec4_fp32_t); +_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec8_fp32_t + __spirv_ocl_lround(__clc_vec8_fp32_t); +_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp32_t + __spirv_ocl_lround(__clc_vec16_fp32_t); + +#ifdef cl_khr_fp64 +_CLC_OVERLOAD +_CLC_DECL _CLC_CONSTFN __clc_fp64_t __spirv_ocl_lround(__clc_fp64_t); +_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp64_t + __spirv_ocl_lround(__clc_vec2_fp64_t); +_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec3_fp64_t + __spirv_ocl_lround(__clc_vec3_fp64_t); +_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec4_fp64_t + __spirv_ocl_lround(__clc_vec4_fp64_t); +_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec8_fp64_t + __spirv_ocl_lround(__clc_vec8_fp64_t); +_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp64_t + __spirv_ocl_lround(__clc_vec16_fp64_t); +#endif + +#ifdef cl_khr_fp16 +_CLC_OVERLOAD +_CLC_DECL _CLC_CONSTFN __clc_fp16_t __spirv_ocl_lround(__clc_fp16_t); +_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp16_t + __spirv_ocl_lround(__clc_vec2_fp16_t); +_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec3_fp16_t + __spirv_ocl_lround(__clc_vec3_fp16_t); +_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec4_fp16_t + __spirv_ocl_lround(__clc_vec4_fp16_t); +_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec8_fp16_t + __spirv_ocl_lround(__clc_vec8_fp16_t); +_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp16_t + __spirv_ocl_lround(__clc_vec16_fp16_t); +#endif + _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_fp32_t __spirv_ocl_rsqrt(__clc_fp32_t); _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp32_t From 3f8bf92063179196b32f78ca017d9d49b99b3b4b Mon Sep 17 00:00:00 2001 From: Marya Sharf Date: Tue, 9 Jul 2024 03:14:00 +0100 Subject: [PATCH 03/14] update, rm and keep only generoc --- libclc/amdgcn-amdhsa/libspirv/SOURCES | 1 - libclc/amdgcn-amdhsa/libspirv/math/lround.cl | 30 -------------------- libclc/ptx-nvidiacl/include/libdevice.h | 2 -- libclc/ptx-nvidiacl/libspirv/SOURCES | 1 - libclc/ptx-nvidiacl/libspirv/math/lround.cl | 17 ----------- 5 files changed, 51 deletions(-) delete mode 100644 libclc/amdgcn-amdhsa/libspirv/math/lround.cl delete mode 100644 libclc/ptx-nvidiacl/libspirv/math/lround.cl diff --git a/libclc/amdgcn-amdhsa/libspirv/SOURCES b/libclc/amdgcn-amdhsa/libspirv/SOURCES index 0681454a03ae8..2d334ee4e326e 100644 --- a/libclc/amdgcn-amdhsa/libspirv/SOURCES +++ b/libclc/amdgcn-amdhsa/libspirv/SOURCES @@ -50,7 +50,6 @@ math/log2.cl math/log10.cl math/log1p.cl math/logb.cl -math/lround.cl math/modf.cl math/nextafter.cl math/pow.cl diff --git a/libclc/amdgcn-amdhsa/libspirv/math/lround.cl b/libclc/amdgcn-amdhsa/libspirv/math/lround.cl deleted file mode 100644 index e936c5d35f610..0000000000000 --- a/libclc/amdgcn-amdhsa/libspirv/math/lround.cl +++ /dev/null @@ -1,30 +0,0 @@ -//===----------------------------------------------------------------------===// - // - // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - // See https://llvm.org/LICENSE.txt for license information. - // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - // - //===----------------------------------------------------------------------===// - - #include - #include - - #define __CLC_FUNCTION __spirv_ocl_lround - #define __CLC_BUILTIN __ocml_lround - - float __ocml_lround_f32(float); - #define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) - - #ifdef cl_khr_fp64 - #pragma OPENCL EXTENSION cl_khr_fp64 : enable - double __ocml_lround_f64(double); - #define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) - #endif // cl_khr_fp64 - - #ifdef cl_khr_fp16 - #pragma OPENCL EXTENSION cl_khr_fp16 : enable - half __ocml_lround_f16(half); - #define __CLC_BUILTIN_H __CLC_XCONCAT(__CLC_BUILTIN, _f16) - #endif // cl_khr_fp16 - - #include diff --git a/libclc/ptx-nvidiacl/include/libdevice.h b/libclc/ptx-nvidiacl/include/libdevice.h index 55924942e81aa..5b1c8f6e9bee4 100644 --- a/libclc/ptx-nvidiacl/include/libdevice.h +++ b/libclc/ptx-nvidiacl/include/libdevice.h @@ -260,8 +260,6 @@ double __nv_logb(double); float __nv_logbf(float); float __nv_logf(float); double __nv_longlong_as_double(long); -long int __nv_lround(double); -long int __nv_lroundf(float); int __nv_max(int, int); int __nv_min(int, int); double __nv_modf(double, double *); diff --git a/libclc/ptx-nvidiacl/libspirv/SOURCES b/libclc/ptx-nvidiacl/libspirv/SOURCES index 3b528fccc48f2..569da16923c6b 100644 --- a/libclc/ptx-nvidiacl/libspirv/SOURCES +++ b/libclc/ptx-nvidiacl/libspirv/SOURCES @@ -42,7 +42,6 @@ math/log10.cl math/log1p.cl math/log2.cl math/logb.cl -math/lround.cl math/modf.cl math/native_cos.cl math/native_divide.cl diff --git a/libclc/ptx-nvidiacl/libspirv/math/lround.cl b/libclc/ptx-nvidiacl/libspirv/math/lround.cl deleted file mode 100644 index b6240ae5dd0dd..0000000000000 --- a/libclc/ptx-nvidiacl/libspirv/math/lround.cl +++ /dev/null @@ -1,17 +0,0 @@ -//===----------------------------------------------------------------------===// - // - // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - // See https://llvm.org/LICENSE.txt for license information. - // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - // - //===----------------------------------------------------------------------===// - - #include - - #include "../../include/libdevice.h" - #include - - #define __CLC_FUNCTION __spirv_ocl_lround - #define __CLC_BUILTIN __nv_lround - #define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, f) - #include From c19fe226914bc1a4f930669db66e79a35a572270 Mon Sep 17 00:00:00 2001 From: Marya Sharf Date: Tue, 9 Jul 2024 15:43:10 +0100 Subject: [PATCH 04/14] update --- libclc/generic/lib/math/lround.cl | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/libclc/generic/lib/math/lround.cl b/libclc/generic/lib/math/lround.cl index c840a945abd56..5b55dd55eed90 100644 --- a/libclc/generic/lib/math/lround.cl +++ b/libclc/generic/lib/math/lround.cl @@ -1,8 +1,24 @@ + #include - #include - - #include - - #define __CLC_BUILTIN __spirv_ocl_lround - #define __CLC_FUNCTION lround - #include +#include + +#include "../../libspirv/math/tables.h" +#include + +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64 : enable +#endif // cl_khr_fp64 + +_CLC_OVERLOAD _CLC_DEF long int lround(float x) { + return __spirv_ocl_lround(x); +} + +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long int, lround, float); + +#ifdef cl_khr_fp64 +_CLC_OVERLOAD _CLC_DEF long int lround(double x) { + return __spirv_ocl_lround(x); +} + +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long int, log10, double); +#endif // cl_khr_fp64 From b7a2e11823f9c4ab8d5f361fe3266d25bbeceb2e Mon Sep 17 00:00:00 2001 From: Marya Sharf Date: Tue, 9 Jul 2024 15:57:41 +0100 Subject: [PATCH 05/14] update --- libclc/generic/lib/math/lround.cl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libclc/generic/lib/math/lround.cl b/libclc/generic/lib/math/lround.cl index 5b55dd55eed90..ce39296e87701 100644 --- a/libclc/generic/lib/math/lround.cl +++ b/libclc/generic/lib/math/lround.cl @@ -20,5 +20,5 @@ _CLC_OVERLOAD _CLC_DEF long int lround(double x) { return __spirv_ocl_lround(x); } -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long int, log10, double); +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long int, lround, double); #endif // cl_khr_fp64 From 01b3fcfb1fd29724c1b35d9dd209922330d75795 Mon Sep 17 00:00:00 2001 From: Marya Sharf Date: Thu, 11 Jul 2024 03:30:21 +0100 Subject: [PATCH 06/14] update new --- .../generic/include/math/lround_builtin.inc | 54 +++++++++++++++++++ libclc/generic/include/math/unary_intrin.inc | 40 +++++++++++++- libclc/generic/include/spirv/spirv_builtins.h | 36 ++++++------- libclc/generic/lib/math/lround.cl | 21 ++------ libclc/generic/libspirv/math/lround.cl | 2 +- 5 files changed, 114 insertions(+), 39 deletions(-) create mode 100644 libclc/generic/include/math/lround_builtin.inc diff --git a/libclc/generic/include/math/lround_builtin.inc b/libclc/generic/include/math/lround_builtin.inc new file mode 100644 index 0000000000000..b3294d5b4fd1e --- /dev/null +++ b/libclc/generic/include/math/lround_builtin.inc @@ -0,0 +1,54 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LROUND_H +#define LROUND_H + +#include "utils.h" +#include + +#ifndef __CLC_BUILTIN +#define __CLC_BUILTIN __CLC_XCONCAT(__clc_, __CLC_FUNCTION) +#endif + +#ifndef __CLC_BUILTIN_D +#define __CLC_BUILTIN_D __CLC_BUILTIN +#endif + +#ifndef __CLC_BUILTIN_F +#define __CLC_BUILTIN_F __CLC_BUILTIN +#endif + +#ifndef __CLC_BUILTIN_H +#define __CLC_BUILTIN_H __CLC_BUILTIN_F +#endif + +_CLC_DEFINE_UNARY_BUILTIN(long, __CLC_FUNCTION, __CLC_BUILTIN_F, float) + +#ifndef __FLOAT_ONLY + +#ifdef cl_khr_fp64 + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +_CLC_DEFINE_UNARY_BUILTIN(long, __CLC_FUNCTION, __CLC_BUILTIN_D, double) + +#endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_DEFINE_UNARY_BUILTIN(long, __CLC_FUNCTION, __CLC_BUILTIN_H, half) + +#endif + + + +#endif // !__FLOAT_ONLY +#endif // LROUND_H \ No newline at end of file diff --git a/libclc/generic/include/math/unary_intrin.inc b/libclc/generic/include/math/unary_intrin.inc index 532bb1f9d2615..67a7025a39b81 100644 --- a/libclc/generic/include/math/unary_intrin.inc +++ b/libclc/generic/include/math/unary_intrin.inc @@ -1,3 +1,38 @@ +#ifdef __CLC_FUNCTION_lround +#define __CLC_RETURN_TYPE long +#else +#define __CLC_RETURN_TYPE __CLC_INPUT_TYPE +#endif + +#ifdef __CLC_FUNCTION_lround +_CLC_OVERLOAD __CLC_RETURN_TYPE __CLC_FUNCTION(float f) __asm(__CLC_INTRINSIC ".f32"); +_CLC_OVERLOAD long2 __CLC_FUNCTION(float2 f) __asm(__CLC_INTRINSIC ".v2f32"); +_CLC_OVERLOAD long3 __CLC_FUNCTION(float3 f) __asm(__CLC_INTRINSIC ".v3f32"); +_CLC_OVERLOAD long4 __CLC_FUNCTION(float4 f) __asm(__CLC_INTRINSIC ".v4f32"); +_CLC_OVERLOAD long8 __CLC_FUNCTION(float8 f) __asm(__CLC_INTRINSIC ".v8f32"); +_CLC_OVERLOAD long16 __CLC_FUNCTION(float16 f) __asm(__CLC_INTRINSIC ".v16f32"); + +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64 : enable +_CLC_OVERLOAD __CLC_RETURN_TYPE __CLC_FUNCTION(double d) __asm(__CLC_INTRINSIC ".f64"); +_CLC_OVERLOAD long2 __CLC_FUNCTION(double2 d) __asm(__CLC_INTRINSIC ".v2f64"); +_CLC_OVERLOAD long3 __CLC_FUNCTION(double3 d) __asm(__CLC_INTRINSIC ".v3f64"); +_CLC_OVERLOAD long4 __CLC_FUNCTION(double4 d) __asm(__CLC_INTRINSIC ".v4f64"); +_CLC_OVERLOAD long8 __CLC_FUNCTION(double8 d) __asm(__CLC_INTRINSIC ".v8f64"); +_CLC_OVERLOAD long16 __CLC_FUNCTION(double16 d) __asm(__CLC_INTRINSIC ".v16f64"); +#endif + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +_CLC_OVERLOAD __CLC_RETURN_TYPE __CLC_FUNCTION(half d) __asm(__CLC_INTRINSIC ".f16"); +_CLC_OVERLOAD long2 __CLC_FUNCTION(half2 d) __asm(__CLC_INTRINSIC ".v2f16"); +_CLC_OVERLOAD long3 __CLC_FUNCTION(half3 d) __asm(__CLC_INTRINSIC ".v3f16"); +_CLC_OVERLOAD long4 __CLC_FUNCTION(half4 d) __asm(__CLC_INTRINSIC ".v4f16"); +_CLC_OVERLOAD long8 __CLC_FUNCTION(half8 d) __asm(__CLC_INTRINSIC ".v8f16"); +_CLC_OVERLOAD long16 __CLC_FUNCTION(half16 d) __asm(__CLC_INTRINSIC ".v16f16"); +#endif + +#else _CLC_OVERLOAD float __CLC_FUNCTION(float f) __asm(__CLC_INTRINSIC ".f32"); _CLC_OVERLOAD float2 __CLC_FUNCTION(float2 f) __asm(__CLC_INTRINSIC ".v2f32"); _CLC_OVERLOAD float3 __CLC_FUNCTION(float3 f) __asm(__CLC_INTRINSIC ".v3f32"); @@ -16,7 +51,7 @@ _CLC_OVERLOAD double16 __CLC_FUNCTION(double16 d) __asm(__CLC_INTRINSIC ".v16f64 #endif #ifdef cl_khr_fp16 -#pragma OPENCL EXTENSION cl_khr_fp16: enable +#pragma OPENCL EXTENSION cl_khr_fp16 : enable _CLC_OVERLOAD half __CLC_FUNCTION(half d) __asm(__CLC_INTRINSIC ".f16"); _CLC_OVERLOAD half2 __CLC_FUNCTION(half2 d) __asm(__CLC_INTRINSIC ".v2f16"); _CLC_OVERLOAD half3 __CLC_FUNCTION(half3 d) __asm(__CLC_INTRINSIC ".v3f16"); @@ -24,6 +59,7 @@ _CLC_OVERLOAD half4 __CLC_FUNCTION(half4 d) __asm(__CLC_INTRINSIC ".v4f16"); _CLC_OVERLOAD half8 __CLC_FUNCTION(half8 d) __asm(__CLC_INTRINSIC ".v8f16"); _CLC_OVERLOAD half16 __CLC_FUNCTION(half16 d) __asm(__CLC_INTRINSIC ".v16f16"); #endif +#endif // __CLC_FUNCTION_lround #undef __CLC_FUNCTION -#undef __CLC_INTRINSIC +#undef __CLC_INTRINSIC \ No newline at end of file diff --git a/libclc/generic/include/spirv/spirv_builtins.h b/libclc/generic/include/spirv/spirv_builtins.h index 700e6821662dd..9c57a6c7e0c87 100644 --- a/libclc/generic/include/spirv/spirv_builtins.h +++ b/libclc/generic/include/spirv/spirv_builtins.h @@ -17036,45 +17036,45 @@ _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp16_t #endif _CLC_OVERLOAD -_CLC_DECL _CLC_CONSTFN __clc_fp32_t __spirv_ocl_lround(__clc_fp32_t); -_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp32_t +_CLC_DECL _CLC_CONSTFN __clc_int64_t __spirv_ocl_lround(__clc_fp32_t); +_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_int64_t __spirv_ocl_lround(__clc_vec2_fp32_t); -_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec3_fp32_t +_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec3_int64_t __spirv_ocl_lround(__clc_vec3_fp32_t); -_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec4_fp32_t +_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec4_int64_t __spirv_ocl_lround(__clc_vec4_fp32_t); -_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec8_fp32_t +_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec8_int64_t __spirv_ocl_lround(__clc_vec8_fp32_t); -_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp32_t +_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_int64_t __spirv_ocl_lround(__clc_vec16_fp32_t); #ifdef cl_khr_fp64 _CLC_OVERLOAD -_CLC_DECL _CLC_CONSTFN __clc_fp64_t __spirv_ocl_lround(__clc_fp64_t); -_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp64_t +_CLC_DECL _CLC_CONSTFN __clc_int64_t __spirv_ocl_lround(__clc_fp64_t); +_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_int64_t __spirv_ocl_lround(__clc_vec2_fp64_t); -_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec3_fp64_t +_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec3_int64_t __spirv_ocl_lround(__clc_vec3_fp64_t); -_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec4_fp64_t +_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec4_int64_t __spirv_ocl_lround(__clc_vec4_fp64_t); -_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec8_fp64_t +_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec8_int64_t __spirv_ocl_lround(__clc_vec8_fp64_t); -_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp64_t +_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_int64_t __spirv_ocl_lround(__clc_vec16_fp64_t); #endif #ifdef cl_khr_fp16 _CLC_OVERLOAD -_CLC_DECL _CLC_CONSTFN __clc_fp16_t __spirv_ocl_lround(__clc_fp16_t); -_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp16_t +_CLC_DECL _CLC_CONSTFN __clc_int64_t __spirv_ocl_lround(__clc_fp16_t); +_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_int64_t __spirv_ocl_lround(__clc_vec2_fp16_t); -_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec3_fp16_t +_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec3_int64_t __spirv_ocl_lround(__clc_vec3_fp16_t); -_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec4_fp16_t +_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec4_int64_t __spirv_ocl_lround(__clc_vec4_fp16_t); -_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec8_fp16_t +_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec8_int64_t __spirv_ocl_lround(__clc_vec8_fp16_t); -_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp16_t +_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_int64_t __spirv_ocl_lround(__clc_vec16_fp16_t); #endif diff --git a/libclc/generic/lib/math/lround.cl b/libclc/generic/lib/math/lround.cl index ce39296e87701..f10cf8d84e786 100644 --- a/libclc/generic/lib/math/lround.cl +++ b/libclc/generic/lib/math/lround.cl @@ -2,23 +2,8 @@ #include #include -#include "../../libspirv/math/tables.h" #include -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable -#endif // cl_khr_fp64 - -_CLC_OVERLOAD _CLC_DEF long int lround(float x) { - return __spirv_ocl_lround(x); -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long int, lround, float); - -#ifdef cl_khr_fp64 -_CLC_OVERLOAD _CLC_DEF long int lround(double x) { - return __spirv_ocl_lround(x); -} - -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long int, lround, double); -#endif // cl_khr_fp64 +#define __CLC_BUILTIN __spirv_ocl_lround +#define __CLC_FUNCTION lround +#include diff --git a/libclc/generic/libspirv/math/lround.cl b/libclc/generic/libspirv/math/lround.cl index 6289dd022ee5f..b50b918291ed4 100644 --- a/libclc/generic/libspirv/math/lround.cl +++ b/libclc/generic/libspirv/math/lround.cl @@ -16,4 +16,4 @@ #undef __CLC_FUNCTION #define __CLC_FUNCTION __spirv_ocl_lround - #include + #include From 05198ba909eaebd623cf48923252d1bdee7f0fc2 Mon Sep 17 00:00:00 2001 From: Marya Sharf Date: Thu, 11 Jul 2024 13:46:38 +0100 Subject: [PATCH 07/14] update the format --- libdevice/fallback-cmath-fp64.cpp | 2 +- libdevice/fallback-cmath.cpp | 2 +- sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp | 8 ++++---- sycl/test-e2e/DeviceLib/cmath_test.cpp | 10 +++++----- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/libdevice/fallback-cmath-fp64.cpp b/libdevice/fallback-cmath-fp64.cpp index dc94bb22ada76..0fc4129e1b02b 100644 --- a/libdevice/fallback-cmath-fp64.cpp +++ b/libdevice/fallback-cmath-fp64.cpp @@ -72,7 +72,7 @@ double __devicelib_modf(double x, double *intpart) { } DEVICE_EXTERN_C_INLINE -long int __devicelib_lround(double x) { return __spirv_ocl_lround(x); } +long int __devicelib_lround(double x) { return __spirv_ocl_lround(x); } DEVICE_EXTERN_C_INLINE double __devicelib_round(double x) { return __spirv_ocl_round(x); } diff --git a/libdevice/fallback-cmath.cpp b/libdevice/fallback-cmath.cpp index 90b1d90444e86..06f986b4cda16 100644 --- a/libdevice/fallback-cmath.cpp +++ b/libdevice/fallback-cmath.cpp @@ -37,7 +37,7 @@ float __devicelib_copysignf(float x, float y) { } DEVICE_EXTERN_C_INLINE -long int __devicelib_lroundf(float x) { return __spirv_ocl_lround(x); } +long int __devicelib_lroundf(float x) { return __spirv_ocl_lround(x); } DEVICE_EXTERN_C_INLINE float __devicelib_cospif(float x) { return __spirv_ocl_cospi(x); } diff --git a/sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp b/sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp index b7d234b335771..6daa64360ee10 100644 --- a/sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp +++ b/sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp @@ -23,10 +23,10 @@ constexpr s::access::mode sycl_write = s::access::mode::write; #define TEST_NUM 74 double ref[TEST_NUM] = { - 3, 100, 0.5, 1.0, 0, 0, -2, 1, 2, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, - 1, 1, 0.5, 0, 2, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, 1, 2, - 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + 3, 100, 0.5, 1.0, 0, 0, -2, 1, 2, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, + 0, 1, 1, 0.5, 0, 2, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, 1, + 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; double refIptr = 1; diff --git a/sycl/test-e2e/DeviceLib/cmath_test.cpp b/sycl/test-e2e/DeviceLib/cmath_test.cpp index 230f37cec354f..db33429dfd8d3 100644 --- a/sycl/test-e2e/DeviceLib/cmath_test.cpp +++ b/sycl/test-e2e/DeviceLib/cmath_test.cpp @@ -24,11 +24,11 @@ constexpr s::access::mode sycl_write = s::access::mode::write; #define TEST_NUM 71 -float ref[TEST_NUM] = {3, 100, 0.5, 1.0, 0, 0, -2, 1, 2, 1, 1, 0, 1, 1, 0, - 0, 0, 0, 0, 1, 1, 0.5, 0, 0, 1, 0, 2, 0, 0, - 0, 0, 0, 1, 0, 1, 2, 0, 1, 2, 5, 0, 0, 0, - 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +float ref[TEST_NUM] = {3, 100, 0.5, 1.0, 0, 0, -2, 1, 2, 1, 1, 0, 1, 1, 0, + 0, 0, 0, 0, 1, 1, 0.5, 0, 0, 1, 0, 2, 0, 0, 0, + 0, 0, 1, 0, 1, 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, + 0.5, NAN, NAN, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; float refIptr = 1; From 79c605d2d8cf0e88a1e8b2be9dae8181f44d892d Mon Sep 17 00:00:00 2001 From: Marya Sharf Date: Fri, 12 Jul 2024 00:47:40 +0100 Subject: [PATCH 08/14] readding amd lround --- libclc/amdgcn-amdhsa/libspirv/SOURCES | 1 + libclc/amdgcn-amdhsa/libspirv/math/lround.cl | 30 ++++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/lround.cl diff --git a/libclc/amdgcn-amdhsa/libspirv/SOURCES b/libclc/amdgcn-amdhsa/libspirv/SOURCES index 2d334ee4e326e..0681454a03ae8 100644 --- a/libclc/amdgcn-amdhsa/libspirv/SOURCES +++ b/libclc/amdgcn-amdhsa/libspirv/SOURCES @@ -50,6 +50,7 @@ math/log2.cl math/log10.cl math/log1p.cl math/logb.cl +math/lround.cl math/modf.cl math/nextafter.cl math/pow.cl diff --git a/libclc/amdgcn-amdhsa/libspirv/math/lround.cl b/libclc/amdgcn-amdhsa/libspirv/math/lround.cl new file mode 100644 index 0000000000000..a83f505990d0e --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/lround.cl @@ -0,0 +1,30 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +#define __CLC_FUNCTION __spirv_ocl_lround +#define __CLC_BUILTIN __ocml_lround + +long __ocml_lround_f32(float); +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) + +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64 : enable +long __ocml_lround_f64(double); +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#endif // cl_khr_fp64 + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +long __ocml_lround_f16(half); +#define __CLC_BUILTIN_H __CLC_XCONCAT(__CLC_BUILTIN, _f16) +#endif // cl_khr_fp16 + +#include \ No newline at end of file From 2a6536ebde28045d918691a3fd67428f794e4da9 Mon Sep 17 00:00:00 2001 From: Marya Sharf Date: Fri, 12 Jul 2024 02:07:23 +0100 Subject: [PATCH 09/14] adding lround for ptx --- libclc/ptx-nvidiacl/include/libdevice.h | 2 ++ libclc/ptx-nvidiacl/libspirv/SOURCES | 1 + libclc/ptx-nvidiacl/libspirv/math/lround.cl | 17 +++++++++++++++++ 3 files changed, 20 insertions(+) create mode 100644 libclc/ptx-nvidiacl/libspirv/math/lround.cl diff --git a/libclc/ptx-nvidiacl/include/libdevice.h b/libclc/ptx-nvidiacl/include/libdevice.h index 5b1c8f6e9bee4..e559cac6e0160 100644 --- a/libclc/ptx-nvidiacl/include/libdevice.h +++ b/libclc/ptx-nvidiacl/include/libdevice.h @@ -234,6 +234,8 @@ double __nv_ldexp(double, int); float __nv_ldexpf(float, int); double __nv_lgamma(double); float __nv_lgammaf(float); +long __nv_lroundf(float); +long __nv_lround(double); double __nv_ll2double_rd(long); double __nv_ll2double_rn(long); double __nv_ll2double_ru(long); diff --git a/libclc/ptx-nvidiacl/libspirv/SOURCES b/libclc/ptx-nvidiacl/libspirv/SOURCES index 569da16923c6b..3b528fccc48f2 100644 --- a/libclc/ptx-nvidiacl/libspirv/SOURCES +++ b/libclc/ptx-nvidiacl/libspirv/SOURCES @@ -42,6 +42,7 @@ math/log10.cl math/log1p.cl math/log2.cl math/logb.cl +math/lround.cl math/modf.cl math/native_cos.cl math/native_divide.cl diff --git a/libclc/ptx-nvidiacl/libspirv/math/lround.cl b/libclc/ptx-nvidiacl/libspirv/math/lround.cl new file mode 100644 index 0000000000000..9a17205af73d1 --- /dev/null +++ b/libclc/ptx-nvidiacl/libspirv/math/lround.cl @@ -0,0 +1,17 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include "../../include/libdevice.h" +#include + +#define __CLC_FUNCTION __spirv_ocl_lround +#define __CLC_BUILTIN __nv_round +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, f) +#include From 9914c07551c31fc090458870726b6db1a37c28b2 Mon Sep 17 00:00:00 2001 From: Marya Sharf Date: Fri, 12 Jul 2024 21:49:21 +0100 Subject: [PATCH 10/14] update --- libdevice/cmath_wrapper_fp64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdevice/cmath_wrapper_fp64.cpp b/libdevice/cmath_wrapper_fp64.cpp index 5d2a8466375cf..f63a32b2dee7f 100644 --- a/libdevice/cmath_wrapper_fp64.cpp +++ b/libdevice/cmath_wrapper_fp64.cpp @@ -55,7 +55,7 @@ DEVICE_EXTERN_C_INLINE double round(double x) { return __devicelib_round(x); } DEVICE_EXTERN_C_INLINE -double lround(double x) { return __devicelib_lround(x); } +long lround(double x) { return __devicelib_lround(x); } DEVICE_EXTERN_C_INLINE double floor(double x) { return __devicelib_floor(x); } From 203822faa894620255a7ce9505e2de45bb70fe79 Mon Sep 17 00:00:00 2001 From: Marya Sharf Date: Mon, 22 Jul 2024 00:46:03 +0100 Subject: [PATCH 11/14] last update --- libclc/generic/libspirv/math/lround.cl | 95 +++++++++++++++---- libdevice/fallback-cmath-fp64.cpp | 2 +- libdevice/fallback-cmath.cpp | 2 +- .../sycl/detail/builtins/math_functions.inc | 2 +- 4 files changed, 79 insertions(+), 22 deletions(-) diff --git a/libclc/generic/libspirv/math/lround.cl b/libclc/generic/libspirv/math/lround.cl index b50b918291ed4..8170926c84793 100644 --- a/libclc/generic/libspirv/math/lround.cl +++ b/libclc/generic/libspirv/math/lround.cl @@ -1,19 +1,76 @@ -//===----------------------------------------------------------------------===// - // - // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - // See https://llvm.org/LICENSE.txt for license information. - // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - // - //===----------------------------------------------------------------------===// - - #include - #include - - // Map the llvm intrinsic to an OpenCL function. - #define __CLC_FUNCTION __clc___spirv_ocl_lround - #define __CLC_INTRINSIC "llvm.lround" - #include - - #undef __CLC_FUNCTION - #define __CLC_FUNCTION __spirv_ocl_lround - #include +#include "utils.h" +#include + +#ifndef __CLC_BUILTIN +#define __CLC_BUILTIN __CLC_XCONCAT(__clc_, __CLC_FUNCTION) +#endif + +#ifndef __CLC_BUILTIN_D +#define __CLC_BUILTIN_D __CLC_BUILTIN +#endif + +#ifndef __CLC_BUILTIN_F +#define __CLC_BUILTIN_F __CLC_BUILTIN +#endif + +#ifndef __CLC_BUILTIN_H +#define __CLC_BUILTIN_H __CLC_BUILTIN_F +#endif + +// Define the lround function for float type +#define _CLC_DEFINE_LROUND_BUILTIN(FUNC, BUILTIN, TYPE) \ +_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN long FUNC(TYPE x) { \ + return (long)BUILTIN(x); \ +} + +#define _CLC_DEFINE_LROUND_VECTOR_BUILTIN(FUNC, BUILTIN, VTYPE, RTYPE) \ +_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN RTYPE FUNC(VTYPE x) { \ + return (RTYPE)BUILTIN(x); \ +} + +#define __CLC_FUNCTION lround + +_CLC_DEFINE_LROUND_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, float) + +#ifndef __FLOAT_ONLY + +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64 : enable +_CLC_DEFINE_LROUND_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, double) +#endif + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +_CLC_DEFINE_LROUND_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, half) +#endif + +#endif // !__FLOAT_ONLY + +// Define lround for vector types of float +_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec2_float, __clc_vec2_long) +_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec3_float, __clc_vec3_long) +_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec4_float, __clc_vec4_long) +_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec8_float, __clc_vec8_long) +_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec16_float, __clc_vec16_long) + +#ifdef cl_khr_fp64 +_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec2_double, __clc_vec2_long) +_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec3_double, __clc_vec3_long) +_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec4_double, __clc_vec4_long) +_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec8_double, __clc_vec8_long) +_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec16_double, __clc_vec16_long) +#endif + +#ifdef cl_khr_fp16 +_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec2_half, __clc_vec2_long) +_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec3_half, __clc_vec3_long) +_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec4_half, __clc_vec4_long) +_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec8_half, __clc_vec8_long) +_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec16_half, __clc_vec16_long) +#endif + +#undef __CLC_FUNCTION +#undef __CLC_BUILTIN +#undef __CLC_BUILTIN_D +#undef __CLC_BUILTIN_F +#undef __CLC_BUILTIN_H diff --git a/libdevice/fallback-cmath-fp64.cpp b/libdevice/fallback-cmath-fp64.cpp index 0fc4129e1b02b..e8de9ec6c918e 100644 --- a/libdevice/fallback-cmath-fp64.cpp +++ b/libdevice/fallback-cmath-fp64.cpp @@ -72,7 +72,7 @@ double __devicelib_modf(double x, double *intpart) { } DEVICE_EXTERN_C_INLINE -long int __devicelib_lround(double x) { return __spirv_ocl_lround(x); } +long int __devicelib_lround(double x) { return static_cast(__spirv_ocl_round(x)); }//__spirv_ocl_lround(x); } DEVICE_EXTERN_C_INLINE double __devicelib_round(double x) { return __spirv_ocl_round(x); } diff --git a/libdevice/fallback-cmath.cpp b/libdevice/fallback-cmath.cpp index 06f986b4cda16..89389e58f356d 100644 --- a/libdevice/fallback-cmath.cpp +++ b/libdevice/fallback-cmath.cpp @@ -37,7 +37,7 @@ float __devicelib_copysignf(float x, float y) { } DEVICE_EXTERN_C_INLINE -long int __devicelib_lroundf(float x) { return __spirv_ocl_lround(x); } +long int __devicelib_lroundf(float x) { return static_cast(__spirv_ocl_round(x)); }//__spirv_ocl_lround(x); } DEVICE_EXTERN_C_INLINE float __devicelib_cospif(float x) { return __spirv_ocl_cospi(x); } diff --git a/sycl/include/sycl/detail/builtins/math_functions.inc b/sycl/include/sycl/detail/builtins/math_functions.inc index 7a7610c0865f6..803f37d54a5ea 100644 --- a/sycl/include/sycl/detail/builtins/math_functions.inc +++ b/sycl/include/sycl/detail/builtins/math_functions.inc @@ -141,7 +141,7 @@ BUILTIN_GENF(ONE_ARG, floor) BUILTIN_GENF(ONE_ARG, lgamma) BUILTIN_GENF(ONE_ARG, log1p) BUILTIN_GENF(ONE_ARG, logb) -BUILTIN_GENF(ONE_ARG, lround) +//BUILTIN_GENF(ONE_ARG, lround) BUILTIN_GENF(ONE_ARG, rint) BUILTIN_GENF(ONE_ARG, round) BUILTIN_GENF(ONE_ARG, sinh) From 9f8d9d5faf0da7b9f9985cbf3140b755f085fa18 Mon Sep 17 00:00:00 2001 From: Marya Sharf Date: Mon, 22 Jul 2024 02:27:38 +0100 Subject: [PATCH 12/14] emoving test --- libdevice/fallback-cmath-fp64.cpp | 4 +++- sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp | 12 ++++++------ sycl/test-e2e/DeviceLib/cmath_test.cpp | 14 +++++++------- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/libdevice/fallback-cmath-fp64.cpp b/libdevice/fallback-cmath-fp64.cpp index e8de9ec6c918e..7e9dafd2f89b5 100644 --- a/libdevice/fallback-cmath-fp64.cpp +++ b/libdevice/fallback-cmath-fp64.cpp @@ -72,7 +72,9 @@ double __devicelib_modf(double x, double *intpart) { } DEVICE_EXTERN_C_INLINE -long int __devicelib_lround(double x) { return static_cast(__spirv_ocl_round(x)); }//__spirv_ocl_lround(x); } +long int __devicelib_lround(double x) { + return static_cast(__spirv_ocl_round(x)); +} //__spirv_ocl_lround(x); } DEVICE_EXTERN_C_INLINE double __devicelib_round(double x) { return __spirv_ocl_round(x); } diff --git a/sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp b/sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp index 6daa64360ee10..f9ebb8a597da5 100644 --- a/sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp +++ b/sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp @@ -20,13 +20,13 @@ namespace s = sycl; constexpr s::access::mode sycl_read = s::access::mode::read; constexpr s::access::mode sycl_write = s::access::mode::write; -#define TEST_NUM 74 +#define TEST_NUM 73 double ref[TEST_NUM] = { - 3, 100, 0.5, 1.0, 0, 0, -2, 1, 2, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, - 0, 1, 1, 0.5, 0, 2, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, 1, - 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + 100, 0.5, 1.0, 0, 0, -2, 1, 2, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, + 1, 1, 0.5, 0, 2, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, 1, 2, + 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; double refIptr = 1; @@ -61,7 +61,7 @@ template void device_cmath_test(s::queue &deviceQueue) { T minus_infinity = -INFINITY; double subnormal; *((uint64_t *)&subnormal) = 0xFFFFFFFFFFFFFULL; - res_access[i++] = std::lround(2.5); + // res_access[i++] = std::lround(2.5); res_access[i++] = sycl::exp10(2.0); res_access[i++] = sycl::rsqrt(4.0); res_access[i++] = std::trunc(1.3); diff --git a/sycl/test-e2e/DeviceLib/cmath_test.cpp b/sycl/test-e2e/DeviceLib/cmath_test.cpp index db33429dfd8d3..d8040157cf584 100644 --- a/sycl/test-e2e/DeviceLib/cmath_test.cpp +++ b/sycl/test-e2e/DeviceLib/cmath_test.cpp @@ -22,13 +22,13 @@ namespace s = sycl; constexpr s::access::mode sycl_read = s::access::mode::read; constexpr s::access::mode sycl_write = s::access::mode::write; -#define TEST_NUM 71 +#define TEST_NUM 70 -float ref[TEST_NUM] = {3, 100, 0.5, 1.0, 0, 0, -2, 1, 2, 1, 1, 0, 1, 1, 0, - 0, 0, 0, 0, 1, 1, 0.5, 0, 0, 1, 0, 2, 0, 0, 0, - 0, 0, 1, 0, 1, 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, - 0.5, NAN, NAN, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +float ref[TEST_NUM] = {100, 0.5, 1.0, 0, 0, -2, 1, 2, 1, 1, 0, 1, 1, 0, + 0, 0, 0, 0, 1, 1, 0.5, 0, 0, 1, 0, 2, 0, 0, + 0, 0, 0, 1, 0, 1, 2, 0, 1, 2, 5, 0, 0, 0, + 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; float refIptr = 1; @@ -58,7 +58,7 @@ template void device_cmath_test_1(s::queue &deviceQueue) { float subnormal; *((uint32_t *)&subnormal) = 0x7FFFFF; - res_access[i++] = std::lround(2.5f); + // res_access[i++] = std::lround(2.5f); res_access[i++] = sycl::exp10(2.0f); res_access[i++] = sycl::rsqrt(4.0f); res_access[i++] = std::trunc(1.2f); From 78e8c19ec47794d41abdeffbaeaffa294310e51f Mon Sep 17 00:00:00 2001 From: Marya Sharf Date: Mon, 22 Jul 2024 02:34:33 +0100 Subject: [PATCH 13/14] emoving test --- libdevice/fallback-cmath.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libdevice/fallback-cmath.cpp b/libdevice/fallback-cmath.cpp index 89389e58f356d..e194355121350 100644 --- a/libdevice/fallback-cmath.cpp +++ b/libdevice/fallback-cmath.cpp @@ -37,7 +37,9 @@ float __devicelib_copysignf(float x, float y) { } DEVICE_EXTERN_C_INLINE -long int __devicelib_lroundf(float x) { return static_cast(__spirv_ocl_round(x)); }//__spirv_ocl_lround(x); } +long int __devicelib_lroundf(float x) { + return static_cast(__spirv_ocl_round(x)); +} //__spirv_ocl_lround(x); } DEVICE_EXTERN_C_INLINE float __devicelib_cospif(float x) { return __spirv_ocl_cospi(x); } From ceb37368c6ebb8e83a4922d57a2c920a6368db46 Mon Sep 17 00:00:00 2001 From: Marya Sharf Date: Tue, 6 Aug 2024 01:38:08 +0100 Subject: [PATCH 14/14] lround --- sycl/include/sycl/detail/builtins/math_functions.inc | 2 +- sycl/source/builtins/math_functions.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/sycl/include/sycl/detail/builtins/math_functions.inc b/sycl/include/sycl/detail/builtins/math_functions.inc index 803f37d54a5ea..7a7610c0865f6 100644 --- a/sycl/include/sycl/detail/builtins/math_functions.inc +++ b/sycl/include/sycl/detail/builtins/math_functions.inc @@ -141,7 +141,7 @@ BUILTIN_GENF(ONE_ARG, floor) BUILTIN_GENF(ONE_ARG, lgamma) BUILTIN_GENF(ONE_ARG, log1p) BUILTIN_GENF(ONE_ARG, logb) -//BUILTIN_GENF(ONE_ARG, lround) +BUILTIN_GENF(ONE_ARG, lround) BUILTIN_GENF(ONE_ARG, rint) BUILTIN_GENF(ONE_ARG, round) BUILTIN_GENF(ONE_ARG, sinh) diff --git a/sycl/source/builtins/math_functions.cpp b/sycl/source/builtins/math_functions.cpp index c840ded374555..717501f056304 100644 --- a/sycl/source/builtins/math_functions.cpp +++ b/sycl/source/builtins/math_functions.cpp @@ -64,6 +64,7 @@ BUILTIN_GENF_CUSTOM(ONE_ARG, exp10, [](auto x) -> decltype(x) { return std::pow(10, x); }) BUILTIN_GENF(ONE_ARG, expm1) BUILTIN_GENF(ONE_ARG, fabs) +BUILTIN_GENF(ONE_ARG, lround) BUILTIN_GENF(TWO_ARGS, fdim) BUILTIN_GENF(ONE_ARG, floor) BUILTIN_GENF(THREE_ARGS, fma)