intel · MaryaSharf · Jul 5, 2024 · Jul 5, 2024 · Jul 9, 2024 · Jul 9, 2024
@@ -50,6 +50,7 @@ math/log2.cl
 math/log10.cl
 math/log1p.cl
 math/logb.cl
+math/lround.cl
 math/modf.cl
 math/nextafter.cl
 math/pow.cl

@@ -0,0 +1,30 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clcmacro.h>
+#include <spirv/spirv.h>
+
+#define __CLC_FUNCTION __spirv_ocl_lround
+#define __CLC_BUILTIN __ocml_lround
+
+long __ocml_lround_f32(float);
+#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32)
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+long __ocml_lround_f64(double);
+#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64)
+#endif // cl_khr_fp64
+
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+long __ocml_lround_f16(half);
+#define __CLC_BUILTIN_H __CLC_XCONCAT(__CLC_BUILTIN, _f16)
+#endif // cl_khr_fp16
+
+#include <math/lround_builtin.inc>
@@ -0,0 +1,54 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LROUND_H
+#define LROUND_H
+
+#include "utils.h"
+#include <clcmacro.h>
+
+#ifndef __CLC_BUILTIN
+#define __CLC_BUILTIN __CLC_XCONCAT(__clc_, __CLC_FUNCTION)
+#endif
+
+#ifndef __CLC_BUILTIN_D
+#define __CLC_BUILTIN_D __CLC_BUILTIN
+#endif
+
+#ifndef __CLC_BUILTIN_F
+#define __CLC_BUILTIN_F __CLC_BUILTIN
+#endif
+
+#ifndef __CLC_BUILTIN_H
+#define __CLC_BUILTIN_H __CLC_BUILTIN_F
+#endif
+
+_CLC_DEFINE_UNARY_BUILTIN(long, __CLC_FUNCTION, __CLC_BUILTIN_F, float)
+
+#ifndef __FLOAT_ONLY
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_DEFINE_UNARY_BUILTIN(long, __CLC_FUNCTION, __CLC_BUILTIN_D, double)
+
+#endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_DEFINE_UNARY_BUILTIN(long, __CLC_FUNCTION, __CLC_BUILTIN_H, half)
+
+#endif
+
+
+
+#endif // !__FLOAT_ONLY
+#endif // LROUND_H
@@ -1,3 +1,38 @@
+#ifdef __CLC_FUNCTION_lround
+#define __CLC_RETURN_TYPE long
+#else
+#define __CLC_RETURN_TYPE __CLC_INPUT_TYPE
+#endif
+
+#ifdef __CLC_FUNCTION_lround
+_CLC_OVERLOAD __CLC_RETURN_TYPE __CLC_FUNCTION(float f) __asm(__CLC_INTRINSIC ".f32");
+_CLC_OVERLOAD long2 __CLC_FUNCTION(float2 f) __asm(__CLC_INTRINSIC ".v2f32");
+_CLC_OVERLOAD long3 __CLC_FUNCTION(float3 f) __asm(__CLC_INTRINSIC ".v3f32");
+_CLC_OVERLOAD long4 __CLC_FUNCTION(float4 f) __asm(__CLC_INTRINSIC ".v4f32");
+_CLC_OVERLOAD long8 __CLC_FUNCTION(float8 f) __asm(__CLC_INTRINSIC ".v8f32");
+_CLC_OVERLOAD long16 __CLC_FUNCTION(float16 f) __asm(__CLC_INTRINSIC ".v16f32");
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+_CLC_OVERLOAD __CLC_RETURN_TYPE __CLC_FUNCTION(double d) __asm(__CLC_INTRINSIC ".f64");
+_CLC_OVERLOAD long2 __CLC_FUNCTION(double2 d) __asm(__CLC_INTRINSIC ".v2f64");
+_CLC_OVERLOAD long3 __CLC_FUNCTION(double3 d) __asm(__CLC_INTRINSIC ".v3f64");
+_CLC_OVERLOAD long4 __CLC_FUNCTION(double4 d) __asm(__CLC_INTRINSIC ".v4f64");
+_CLC_OVERLOAD long8 __CLC_FUNCTION(double8 d) __asm(__CLC_INTRINSIC ".v8f64");
+_CLC_OVERLOAD long16 __CLC_FUNCTION(double16 d) __asm(__CLC_INTRINSIC ".v16f64");
+#endif
+
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+_CLC_OVERLOAD __CLC_RETURN_TYPE __CLC_FUNCTION(half d) __asm(__CLC_INTRINSIC ".f16");
+_CLC_OVERLOAD long2 __CLC_FUNCTION(half2 d) __asm(__CLC_INTRINSIC ".v2f16");
+_CLC_OVERLOAD long3 __CLC_FUNCTION(half3 d) __asm(__CLC_INTRINSIC ".v3f16");
+_CLC_OVERLOAD long4 __CLC_FUNCTION(half4 d) __asm(__CLC_INTRINSIC ".v4f16");
+_CLC_OVERLOAD long8 __CLC_FUNCTION(half8 d) __asm(__CLC_INTRINSIC ".v8f16");
+_CLC_OVERLOAD long16 __CLC_FUNCTION(half16 d) __asm(__CLC_INTRINSIC ".v16f16");
+#endif
+
+#else
 _CLC_OVERLOAD float __CLC_FUNCTION(float f) __asm(__CLC_INTRINSIC ".f32");
 _CLC_OVERLOAD float2 __CLC_FUNCTION(float2 f) __asm(__CLC_INTRINSIC ".v2f32");
 _CLC_OVERLOAD float3 __CLC_FUNCTION(float3 f) __asm(__CLC_INTRINSIC ".v3f32");
@@ -16,14 +51,15 @@ _CLC_OVERLOAD double16 __CLC_FUNCTION(double16 d) __asm(__CLC_INTRINSIC ".v16f64
 #endif
 
 #ifdef cl_khr_fp16
-#pragma OPENCL EXTENSION cl_khr_fp16: enable
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
 _CLC_OVERLOAD half __CLC_FUNCTION(half d) __asm(__CLC_INTRINSIC ".f16");
 _CLC_OVERLOAD half2 __CLC_FUNCTION(half2 d) __asm(__CLC_INTRINSIC ".v2f16");
 _CLC_OVERLOAD half3 __CLC_FUNCTION(half3 d) __asm(__CLC_INTRINSIC ".v3f16");
 _CLC_OVERLOAD half4 __CLC_FUNCTION(half4 d) __asm(__CLC_INTRINSIC ".v4f16");
 _CLC_OVERLOAD half8 __CLC_FUNCTION(half8 d) __asm(__CLC_INTRINSIC ".v8f16");
 _CLC_OVERLOAD half16 __CLC_FUNCTION(half16 d) __asm(__CLC_INTRINSIC ".v16f16");
 #endif
+#endif // __CLC_FUNCTION_lround
 
 #undef __CLC_FUNCTION
-#undef __CLC_INTRINSIC
+#undef __CLC_INTRINSIC
@@ -17035,6 +17035,49 @@ _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp16_t
  __spirv_ocl_round(__clc_vec16_fp16_t);
 #endif
 
+_CLC_OVERLOAD
+_CLC_DECL _CLC_CONSTFN __clc_int64_t __spirv_ocl_lround(__clc_fp32_t);
+_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_int64_t
+ __spirv_ocl_lround(__clc_vec2_fp32_t);
+_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec3_int64_t
+ __spirv_ocl_lround(__clc_vec3_fp32_t);
+_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec4_int64_t
+ __spirv_ocl_lround(__clc_vec4_fp32_t);
+_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec8_int64_t
+ __spirv_ocl_lround(__clc_vec8_fp32_t);
+_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_int64_t
+ __spirv_ocl_lround(__clc_vec16_fp32_t);
+
+#ifdef cl_khr_fp64
+_CLC_OVERLOAD
+_CLC_DECL _CLC_CONSTFN __clc_int64_t __spirv_ocl_lround(__clc_fp64_t);
+_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_int64_t
+ __spirv_ocl_lround(__clc_vec2_fp64_t);
+_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec3_int64_t
+ __spirv_ocl_lround(__clc_vec3_fp64_t);
+_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec4_int64_t
+ __spirv_ocl_lround(__clc_vec4_fp64_t);
+_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec8_int64_t
+ __spirv_ocl_lround(__clc_vec8_fp64_t);
+_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_int64_t
+ __spirv_ocl_lround(__clc_vec16_fp64_t);
+#endif
+
+#ifdef cl_khr_fp16
+_CLC_OVERLOAD
+_CLC_DECL _CLC_CONSTFN __clc_int64_t __spirv_ocl_lround(__clc_fp16_t);
+_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_int64_t
+ __spirv_ocl_lround(__clc_vec2_fp16_t);
+_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec3_int64_t
+ __spirv_ocl_lround(__clc_vec3_fp16_t);
+_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec4_int64_t
+ __spirv_ocl_lround(__clc_vec4_fp16_t);
+_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec8_int64_t
+ __spirv_ocl_lround(__clc_vec8_fp16_t);
+_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_int64_t
+ __spirv_ocl_lround(__clc_vec16_fp16_t);
+#endif
+
 _CLC_OVERLOAD
 _CLC_DECL _CLC_CONSTFN __clc_fp32_t __spirv_ocl_rsqrt(__clc_fp32_t);
 _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp32_t

@@ -132,6 +132,7 @@ math/log10.cl
 math/log1p.cl
 math/log2.cl
 math/logb.cl
+math/lround.cl
 math/mad.cl
 math/maxmag.cl
 math/minmag.cl

@@ -0,0 +1,9 @@
+
+#include <clc/clc.h>
+#include <spirv/spirv.h>
+
+#include <clcmacro.h>
+
+#define __CLC_BUILTIN __spirv_ocl_lround
+#define __CLC_FUNCTION lround
+#include <math/lround_builtin.inc>
@@ -143,6 +143,7 @@ math/log10.cl
 math/log1p.cl
 math/log2.cl
 math/logb.cl
+math/lround.cl
 math/mad.cl
 math/maxmag.cl
 math/minmag.cl

@@ -0,0 +1,76 @@
+#include "utils.h"
+#include <clcmacro.h>
+
+#ifndef __CLC_BUILTIN
+#define __CLC_BUILTIN __CLC_XCONCAT(__clc_, __CLC_FUNCTION)
+#endif
+
+#ifndef __CLC_BUILTIN_D
+#define __CLC_BUILTIN_D __CLC_BUILTIN
+#endif
+
+#ifndef __CLC_BUILTIN_F
+#define __CLC_BUILTIN_F __CLC_BUILTIN
+#endif
+
+#ifndef __CLC_BUILTIN_H
+#define __CLC_BUILTIN_H __CLC_BUILTIN_F
+#endif
+
+// Define the lround function for float type
+#define _CLC_DEFINE_LROUND_BUILTIN(FUNC, BUILTIN, TYPE) \
+_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN long FUNC(TYPE x) { \
+ return (long)BUILTIN(x); \
+}
+
+#define _CLC_DEFINE_LROUND_VECTOR_BUILTIN(FUNC, BUILTIN, VTYPE, RTYPE) \
+_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN RTYPE FUNC(VTYPE x) { \
+ return (RTYPE)BUILTIN(x); \
+}
+
+#define __CLC_FUNCTION lround
+
+_CLC_DEFINE_LROUND_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, float)
+
+#ifndef __FLOAT_ONLY
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+_CLC_DEFINE_LROUND_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, double)
+#endif
+
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+_CLC_DEFINE_LROUND_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, half)
+#endif
+
+#endif // !__FLOAT_ONLY
+
+// Define lround for vector types of float
+_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec2_float, __clc_vec2_long)
+_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec3_float, __clc_vec3_long)
+_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec4_float, __clc_vec4_long)
+_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec8_float, __clc_vec8_long)
+_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec16_float, __clc_vec16_long)
+
+#ifdef cl_khr_fp64
+_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec2_double, __clc_vec2_long)
+_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec3_double, __clc_vec3_long)
+_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec4_double, __clc_vec4_long)
+_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec8_double, __clc_vec8_long)
+_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec16_double, __clc_vec16_long)
+#endif
+
+#ifdef cl_khr_fp16
+_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec2_half, __clc_vec2_long)
+_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec3_half, __clc_vec3_long)
+_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec4_half, __clc_vec4_long)
+_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec8_half, __clc_vec8_long)
+_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec16_half, __clc_vec16_long)
+#endif
+
+#undef __CLC_FUNCTION
+#undef __CLC_BUILTIN
+#undef __CLC_BUILTIN_D
+#undef __CLC_BUILTIN_F
+#undef __CLC_BUILTIN_H
@@ -234,6 +234,8 @@ double __nv_ldexp(double, int);
 float __nv_ldexpf(float, int);
 double __nv_lgamma(double);
 float __nv_lgammaf(float);
+long __nv_lroundf(float);
+long __nv_lround(double);
 double __nv_ll2double_rd(long);
 double __nv_ll2double_rn(long);
 double __nv_ll2double_ru(long);

@@ -42,6 +42,7 @@ math/log10.cl
 math/log1p.cl
 math/log2.cl
 math/logb.cl
+math/lround.cl
 math/modf.cl
 math/native_cos.cl
 math/native_divide.cl

@@ -0,0 +1,17 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <spirv/spirv.h>
+
+#include "../../include/libdevice.h"
+#include <clcmacro.h>
+
+#define __CLC_FUNCTION __spirv_ocl_lround
+#define __CLC_BUILTIN __nv_round
+#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, f)
+#include <math/lround_builtin.inc>
@@ -39,6 +39,9 @@ extern "C" SYCL_EXTERNAL float __devicelib_fminf(float, float);
 DEVICE_EXTERN_C_INLINE
 float fminf(float x, float y) { return __devicelib_fminf(x, y); }
 
+DEVICE_EXTERN_C_INLINE
+double lround(float x) { return __devicelib_lround(x); }
+
 DEVICE_EXTERN_C_INLINE
 float truncf(float x) { return __devicelib_truncf(x); }
 

@@ -54,6 +54,9 @@ double log(double x) { return __devicelib_log(x); }
 DEVICE_EXTERN_C_INLINE
 double round(double x) { return __devicelib_round(x); }
 
+DEVICE_EXTERN_C_INLINE
+long lround(double x) { return __devicelib_lround(x); }
+
 DEVICE_EXTERN_C_INLINE
 double floor(double x) { return __devicelib_floor(x); }
 

@@ -52,6 +52,12 @@ float __devicelib_ceilf(float x);
 DEVICE_EXTERN_C
 double __devicelib_ceil(double x);
 
+DEVICE_EXTERN_C
+long int __devicelib_lround(double x);
+
+DEVICE_EXTERN_C
+long int __devicelib_lroundf(float x);
+
 DEVICE_EXTERN_C
 float __devicelib_copysignf(float x, float y);
 

@@ -71,6 +71,11 @@ double __devicelib_modf(double x, double *intpart) {
  return __spirv_ocl_modf(x, intpart);
 }
 
+DEVICE_EXTERN_C_INLINE
+long int __devicelib_lround(double x) {
+ return static_cast<long int>(__spirv_ocl_round(x));
+} //__spirv_ocl_lround(x); }
+
 DEVICE_EXTERN_C_INLINE
 double __devicelib_round(double x) { return __spirv_ocl_round(x); }
 

@@ -36,6 +36,11 @@ float __devicelib_copysignf(float x, float y) {
  return __spirv_ocl_copysign(x, y);
 }
 
+DEVICE_EXTERN_C_INLINE
+long int __devicelib_lroundf(float x) {
+ return static_cast<long int>(__spirv_ocl_round(x));
+} //__spirv_ocl_lround(x); }
+
 DEVICE_EXTERN_C_INLINE
 float __devicelib_cospif(float x) { return __spirv_ocl_cospi(x); }