Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Add support for lround #14456

Draft
wants to merge 14 commits into
base: sycl
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions libclc/amdgcn-amdhsa/libspirv/SOURCES
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ math/log2.cl
math/log10.cl
math/log1p.cl
math/logb.cl
math/lround.cl
math/modf.cl
math/nextafter.cl
math/pow.cl
Expand Down
30 changes: 30 additions & 0 deletions libclc/amdgcn-amdhsa/libspirv/math/lround.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <clcmacro.h>
#include <spirv/spirv.h>

#define __CLC_FUNCTION __spirv_ocl_lround
#define __CLC_BUILTIN __ocml_lround

long __ocml_lround_f32(float);
#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32)

#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
long __ocml_lround_f64(double);
#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64)
#endif // cl_khr_fp64

#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
long __ocml_lround_f16(half);
#define __CLC_BUILTIN_H __CLC_XCONCAT(__CLC_BUILTIN, _f16)
#endif // cl_khr_fp16

#include <math/lround_builtin.inc>
54 changes: 54 additions & 0 deletions libclc/generic/include/math/lround_builtin.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LROUND_H
#define LROUND_H

#include "utils.h"
#include <clcmacro.h>

#ifndef __CLC_BUILTIN
#define __CLC_BUILTIN __CLC_XCONCAT(__clc_, __CLC_FUNCTION)
#endif

#ifndef __CLC_BUILTIN_D
#define __CLC_BUILTIN_D __CLC_BUILTIN
#endif

#ifndef __CLC_BUILTIN_F
#define __CLC_BUILTIN_F __CLC_BUILTIN
#endif

#ifndef __CLC_BUILTIN_H
#define __CLC_BUILTIN_H __CLC_BUILTIN_F
#endif

_CLC_DEFINE_UNARY_BUILTIN(long, __CLC_FUNCTION, __CLC_BUILTIN_F, float)

#ifndef __FLOAT_ONLY

#ifdef cl_khr_fp64

#pragma OPENCL EXTENSION cl_khr_fp64 : enable

_CLC_DEFINE_UNARY_BUILTIN(long, __CLC_FUNCTION, __CLC_BUILTIN_D, double)

#endif

#ifdef cl_khr_fp16

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

_CLC_DEFINE_UNARY_BUILTIN(long, __CLC_FUNCTION, __CLC_BUILTIN_H, half)

#endif



#endif // !__FLOAT_ONLY
#endif // LROUND_H
40 changes: 38 additions & 2 deletions libclc/generic/include/math/unary_intrin.inc
Original file line number Diff line number Diff line change
@@ -1,3 +1,38 @@
#ifdef __CLC_FUNCTION_lround
#define __CLC_RETURN_TYPE long
#else
#define __CLC_RETURN_TYPE __CLC_INPUT_TYPE
#endif

#ifdef __CLC_FUNCTION_lround
_CLC_OVERLOAD __CLC_RETURN_TYPE __CLC_FUNCTION(float f) __asm(__CLC_INTRINSIC ".f32");
_CLC_OVERLOAD long2 __CLC_FUNCTION(float2 f) __asm(__CLC_INTRINSIC ".v2f32");
_CLC_OVERLOAD long3 __CLC_FUNCTION(float3 f) __asm(__CLC_INTRINSIC ".v3f32");
_CLC_OVERLOAD long4 __CLC_FUNCTION(float4 f) __asm(__CLC_INTRINSIC ".v4f32");
_CLC_OVERLOAD long8 __CLC_FUNCTION(float8 f) __asm(__CLC_INTRINSIC ".v8f32");
_CLC_OVERLOAD long16 __CLC_FUNCTION(float16 f) __asm(__CLC_INTRINSIC ".v16f32");

#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
_CLC_OVERLOAD __CLC_RETURN_TYPE __CLC_FUNCTION(double d) __asm(__CLC_INTRINSIC ".f64");
_CLC_OVERLOAD long2 __CLC_FUNCTION(double2 d) __asm(__CLC_INTRINSIC ".v2f64");
_CLC_OVERLOAD long3 __CLC_FUNCTION(double3 d) __asm(__CLC_INTRINSIC ".v3f64");
_CLC_OVERLOAD long4 __CLC_FUNCTION(double4 d) __asm(__CLC_INTRINSIC ".v4f64");
_CLC_OVERLOAD long8 __CLC_FUNCTION(double8 d) __asm(__CLC_INTRINSIC ".v8f64");
_CLC_OVERLOAD long16 __CLC_FUNCTION(double16 d) __asm(__CLC_INTRINSIC ".v16f64");
#endif

#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
_CLC_OVERLOAD __CLC_RETURN_TYPE __CLC_FUNCTION(half d) __asm(__CLC_INTRINSIC ".f16");
_CLC_OVERLOAD long2 __CLC_FUNCTION(half2 d) __asm(__CLC_INTRINSIC ".v2f16");
_CLC_OVERLOAD long3 __CLC_FUNCTION(half3 d) __asm(__CLC_INTRINSIC ".v3f16");
_CLC_OVERLOAD long4 __CLC_FUNCTION(half4 d) __asm(__CLC_INTRINSIC ".v4f16");
_CLC_OVERLOAD long8 __CLC_FUNCTION(half8 d) __asm(__CLC_INTRINSIC ".v8f16");
_CLC_OVERLOAD long16 __CLC_FUNCTION(half16 d) __asm(__CLC_INTRINSIC ".v16f16");
#endif

#else
_CLC_OVERLOAD float __CLC_FUNCTION(float f) __asm(__CLC_INTRINSIC ".f32");
_CLC_OVERLOAD float2 __CLC_FUNCTION(float2 f) __asm(__CLC_INTRINSIC ".v2f32");
_CLC_OVERLOAD float3 __CLC_FUNCTION(float3 f) __asm(__CLC_INTRINSIC ".v3f32");
Expand All @@ -16,14 +51,15 @@ _CLC_OVERLOAD double16 __CLC_FUNCTION(double16 d) __asm(__CLC_INTRINSIC ".v16f64
#endif

#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16: enable
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
_CLC_OVERLOAD half __CLC_FUNCTION(half d) __asm(__CLC_INTRINSIC ".f16");
_CLC_OVERLOAD half2 __CLC_FUNCTION(half2 d) __asm(__CLC_INTRINSIC ".v2f16");
_CLC_OVERLOAD half3 __CLC_FUNCTION(half3 d) __asm(__CLC_INTRINSIC ".v3f16");
_CLC_OVERLOAD half4 __CLC_FUNCTION(half4 d) __asm(__CLC_INTRINSIC ".v4f16");
_CLC_OVERLOAD half8 __CLC_FUNCTION(half8 d) __asm(__CLC_INTRINSIC ".v8f16");
_CLC_OVERLOAD half16 __CLC_FUNCTION(half16 d) __asm(__CLC_INTRINSIC ".v16f16");
#endif
#endif // __CLC_FUNCTION_lround

#undef __CLC_FUNCTION
#undef __CLC_INTRINSIC
#undef __CLC_INTRINSIC
43 changes: 43 additions & 0 deletions libclc/generic/include/spirv/spirv_builtins.h
Original file line number Diff line number Diff line change
Expand Up @@ -17035,6 +17035,49 @@ _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp16_t
__spirv_ocl_round(__clc_vec16_fp16_t);
#endif

_CLC_OVERLOAD
_CLC_DECL _CLC_CONSTFN __clc_int64_t __spirv_ocl_lround(__clc_fp32_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_int64_t
__spirv_ocl_lround(__clc_vec2_fp32_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec3_int64_t
__spirv_ocl_lround(__clc_vec3_fp32_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec4_int64_t
__spirv_ocl_lround(__clc_vec4_fp32_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec8_int64_t
__spirv_ocl_lround(__clc_vec8_fp32_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_int64_t
__spirv_ocl_lround(__clc_vec16_fp32_t);

#ifdef cl_khr_fp64
_CLC_OVERLOAD
_CLC_DECL _CLC_CONSTFN __clc_int64_t __spirv_ocl_lround(__clc_fp64_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_int64_t
__spirv_ocl_lround(__clc_vec2_fp64_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec3_int64_t
__spirv_ocl_lround(__clc_vec3_fp64_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec4_int64_t
__spirv_ocl_lround(__clc_vec4_fp64_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec8_int64_t
__spirv_ocl_lround(__clc_vec8_fp64_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_int64_t
__spirv_ocl_lround(__clc_vec16_fp64_t);
#endif

#ifdef cl_khr_fp16
_CLC_OVERLOAD
_CLC_DECL _CLC_CONSTFN __clc_int64_t __spirv_ocl_lround(__clc_fp16_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_int64_t
__spirv_ocl_lround(__clc_vec2_fp16_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec3_int64_t
__spirv_ocl_lround(__clc_vec3_fp16_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec4_int64_t
__spirv_ocl_lround(__clc_vec4_fp16_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec8_int64_t
__spirv_ocl_lround(__clc_vec8_fp16_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_int64_t
__spirv_ocl_lround(__clc_vec16_fp16_t);
#endif

_CLC_OVERLOAD
_CLC_DECL _CLC_CONSTFN __clc_fp32_t __spirv_ocl_rsqrt(__clc_fp32_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp32_t
Expand Down
1 change: 1 addition & 0 deletions libclc/generic/lib/SOURCES
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ math/log10.cl
math/log1p.cl
math/log2.cl
math/logb.cl
math/lround.cl
math/mad.cl
math/maxmag.cl
math/minmag.cl
Expand Down
9 changes: 9 additions & 0 deletions libclc/generic/lib/math/lround.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@

#include <clc/clc.h>
#include <spirv/spirv.h>

#include <clcmacro.h>

#define __CLC_BUILTIN __spirv_ocl_lround
#define __CLC_FUNCTION lround
#include <math/lround_builtin.inc>
1 change: 1 addition & 0 deletions libclc/generic/libspirv/SOURCES
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ math/log10.cl
math/log1p.cl
math/log2.cl
math/logb.cl
math/lround.cl
math/mad.cl
math/maxmag.cl
math/minmag.cl
Expand Down
76 changes: 76 additions & 0 deletions libclc/generic/libspirv/math/lround.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#include "utils.h"
#include <clcmacro.h>

#ifndef __CLC_BUILTIN
#define __CLC_BUILTIN __CLC_XCONCAT(__clc_, __CLC_FUNCTION)
#endif

#ifndef __CLC_BUILTIN_D
#define __CLC_BUILTIN_D __CLC_BUILTIN
#endif

#ifndef __CLC_BUILTIN_F
#define __CLC_BUILTIN_F __CLC_BUILTIN
#endif

#ifndef __CLC_BUILTIN_H
#define __CLC_BUILTIN_H __CLC_BUILTIN_F
#endif

// Define the lround function for float type
#define _CLC_DEFINE_LROUND_BUILTIN(FUNC, BUILTIN, TYPE) \
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN long FUNC(TYPE x) { \
return (long)BUILTIN(x); \
}

#define _CLC_DEFINE_LROUND_VECTOR_BUILTIN(FUNC, BUILTIN, VTYPE, RTYPE) \
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN RTYPE FUNC(VTYPE x) { \
return (RTYPE)BUILTIN(x); \
}

#define __CLC_FUNCTION lround

_CLC_DEFINE_LROUND_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, float)

#ifndef __FLOAT_ONLY

#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
_CLC_DEFINE_LROUND_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, double)
#endif

#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
_CLC_DEFINE_LROUND_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, half)
#endif

#endif // !__FLOAT_ONLY

// Define lround for vector types of float
_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec2_float, __clc_vec2_long)
_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec3_float, __clc_vec3_long)
_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec4_float, __clc_vec4_long)
_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec8_float, __clc_vec8_long)
_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec16_float, __clc_vec16_long)

#ifdef cl_khr_fp64
_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec2_double, __clc_vec2_long)
_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec3_double, __clc_vec3_long)
_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec4_double, __clc_vec4_long)
_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec8_double, __clc_vec8_long)
_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec16_double, __clc_vec16_long)
#endif

#ifdef cl_khr_fp16
_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec2_half, __clc_vec2_long)
_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec3_half, __clc_vec3_long)
_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec4_half, __clc_vec4_long)
_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec8_half, __clc_vec8_long)
_CLC_DEFINE_LROUND_VECTOR_BUILTIN(__spirv_ocl_lround, __spirv_ocl_rint, __clc_vec16_half, __clc_vec16_long)
#endif

#undef __CLC_FUNCTION
#undef __CLC_BUILTIN
#undef __CLC_BUILTIN_D
#undef __CLC_BUILTIN_F
#undef __CLC_BUILTIN_H
2 changes: 2 additions & 0 deletions libclc/ptx-nvidiacl/include/libdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,8 @@ double __nv_ldexp(double, int);
float __nv_ldexpf(float, int);
double __nv_lgamma(double);
float __nv_lgammaf(float);
long __nv_lroundf(float);
long __nv_lround(double);
double __nv_ll2double_rd(long);
double __nv_ll2double_rn(long);
double __nv_ll2double_ru(long);
Expand Down
1 change: 1 addition & 0 deletions libclc/ptx-nvidiacl/libspirv/SOURCES
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ math/log10.cl
math/log1p.cl
math/log2.cl
math/logb.cl
math/lround.cl
math/modf.cl
math/native_cos.cl
math/native_divide.cl
Expand Down
17 changes: 17 additions & 0 deletions libclc/ptx-nvidiacl/libspirv/math/lround.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <spirv/spirv.h>

#include "../../include/libdevice.h"
#include <clcmacro.h>

#define __CLC_FUNCTION __spirv_ocl_lround
#define __CLC_BUILTIN __nv_round
#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, f)
#include <math/lround_builtin.inc>
3 changes: 3 additions & 0 deletions libdevice/cmath_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ extern "C" SYCL_EXTERNAL float __devicelib_fminf(float, float);
DEVICE_EXTERN_C_INLINE
float fminf(float x, float y) { return __devicelib_fminf(x, y); }

DEVICE_EXTERN_C_INLINE
double lround(float x) { return __devicelib_lround(x); }

DEVICE_EXTERN_C_INLINE
float truncf(float x) { return __devicelib_truncf(x); }

Expand Down
3 changes: 3 additions & 0 deletions libdevice/cmath_wrapper_fp64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ double log(double x) { return __devicelib_log(x); }
DEVICE_EXTERN_C_INLINE
double round(double x) { return __devicelib_round(x); }

DEVICE_EXTERN_C_INLINE
long lround(double x) { return __devicelib_lround(x); }

DEVICE_EXTERN_C_INLINE
double floor(double x) { return __devicelib_floor(x); }

Expand Down
6 changes: 6 additions & 0 deletions libdevice/device_math.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ float __devicelib_ceilf(float x);
DEVICE_EXTERN_C
double __devicelib_ceil(double x);

DEVICE_EXTERN_C
long int __devicelib_lround(double x);

DEVICE_EXTERN_C
long int __devicelib_lroundf(float x);

DEVICE_EXTERN_C
float __devicelib_copysignf(float x, float y);

Expand Down
5 changes: 5 additions & 0 deletions libdevice/fallback-cmath-fp64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@ double __devicelib_modf(double x, double *intpart) {
return __spirv_ocl_modf(x, intpart);
}

DEVICE_EXTERN_C_INLINE
long int __devicelib_lround(double x) {
return static_cast<long int>(__spirv_ocl_round(x));
} //__spirv_ocl_lround(x); }

DEVICE_EXTERN_C_INLINE
double __devicelib_round(double x) { return __spirv_ocl_round(x); }

Expand Down
5 changes: 5 additions & 0 deletions libdevice/fallback-cmath.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ float __devicelib_copysignf(float x, float y) {
return __spirv_ocl_copysign(x, y);
}

DEVICE_EXTERN_C_INLINE
long int __devicelib_lroundf(float x) {
return static_cast<long int>(__spirv_ocl_round(x));
} //__spirv_ocl_lround(x); }

DEVICE_EXTERN_C_INLINE
float __devicelib_cospif(float x) { return __spirv_ocl_cospi(x); }

Expand Down
Loading
Loading