forked from llvm/llvm-project
-
Notifications
You must be signed in to change notification settings - Fork 55
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[libc][math][c23] Add cospif16 function (llvm#113001)
Implementation of `cos` for half precision floating point inputs scaled by pi (i.e., `cospi`), correctly rounded for all rounding modes. --------- Co-authored-by: OverMighty <its.overmighty@gmail.com>
- Loading branch information
1 parent
a8398bd
commit 7395ef5
Showing
15 changed files
with
347 additions
and
88 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
//===-- Implementation header for cospif16 ----------------------*- C++ -*-===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef LLVM_LIBC_SRC_MATH_COSPIF16_H | ||
#define LLVM_LIBC_SRC_MATH_COSPIF16_H | ||
|
||
#include "src/__support/macros/config.h" | ||
#include "src/__support/macros/properties/types.h" | ||
|
||
namespace LIBC_NAMESPACE_DECL { | ||
|
||
float16 cospif16(float16 x); | ||
|
||
} // namespace LIBC_NAMESPACE_DECL | ||
|
||
#endif // LLVM_LIBC_SRC_MATH_SINPIF16_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
//===-- Half-precision cospif function ------------------------------------===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "src/math/cospif16.h" | ||
#include "hdr/errno_macros.h" | ||
#include "hdr/fenv_macros.h" | ||
#include "sincosf16_utils.h" | ||
#include "src/__support/FPUtil/FEnvImpl.h" | ||
#include "src/__support/FPUtil/FPBits.h" | ||
#include "src/__support/FPUtil/cast.h" | ||
#include "src/__support/FPUtil/multiply_add.h" | ||
#include "src/__support/macros/optimization.h" | ||
|
||
namespace LIBC_NAMESPACE_DECL { | ||
|
||
LLVM_LIBC_FUNCTION(float16, cospif16, (float16 x)) { | ||
using FPBits = typename fputil::FPBits<float16>; | ||
FPBits xbits(x); | ||
|
||
uint16_t x_u = xbits.uintval(); | ||
uint16_t x_abs = x_u & 0x7fff; | ||
float xf = x; | ||
|
||
// Range reduction: | ||
// For |x| > 1/32, we perform range reduction as follows: | ||
// Find k and y such that: | ||
// x = (k + y) * 1/32 | ||
// k is an integer | ||
// |y| < 0.5 | ||
// | ||
// This is done by performing: | ||
// k = round(x * 32) | ||
// y = x * 32 - k | ||
// | ||
// Once k and y are computed, we then deduce the answer by the sine of sum | ||
// formula: | ||
// cos(x * pi) = cos((k + y) * pi/32) | ||
// = cos(k * pi/32) * cos(y * pi/32) + | ||
// sin(y * pi/32) * sin(k * pi/32) | ||
|
||
// For signed zeros | ||
if (LIBC_UNLIKELY(x_abs == 0U)) | ||
return fputil::cast<float16>(1.0f); | ||
|
||
// Numbers greater or equal to 2^10 are integers, or infinity, or NaN | ||
if (LIBC_UNLIKELY(x_abs >= 0x6400)) { | ||
if (LIBC_UNLIKELY(x_abs <= 0x67FF)) | ||
return fputil::cast<float16>((x_abs & 0x1) ? -1.0f : 1.0f); | ||
|
||
// Check for NaN or infintiy values | ||
if (LIBC_UNLIKELY(x_abs >= 0x7c00)) { | ||
// If value is equal to infinity | ||
if (x_abs == 0x7c00) { | ||
fputil::set_errno_if_required(EDOM); | ||
fputil::raise_except_if_required(FE_INVALID); | ||
} | ||
|
||
return x + FPBits::quiet_nan().get_val(); | ||
} | ||
|
||
return fputil::cast<float16>(1.0f); | ||
} | ||
|
||
float sin_k, cos_k, sin_y, cosm1_y; | ||
sincospif16_eval(xf, sin_k, cos_k, sin_y, cosm1_y); | ||
|
||
if (LIBC_UNLIKELY(sin_y == 0 && cos_k == 0)) | ||
return fputil::cast<float16>(0.0f); | ||
|
||
// Since, cosm1_y = cos_y - 1, therefore: | ||
// cos(x * pi) = cos_k(cosm1_y) + cos_k - sin_k * sin_y | ||
return fputil::cast<float16>(fputil::multiply_add( | ||
cos_k, cosm1_y, fputil::multiply_add(-sin_k, sin_y, cos_k))); | ||
} | ||
|
||
} // namespace LIBC_NAMESPACE_DECL |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
//===-- Collection of utils for sinf16/cosf16 -------------------*- C++ -*-===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef LLVM_LIBC_SRC_MATH_GENERIC_SINCOSF16_UTILS_H | ||
#define LLVM_LIBC_SRC_MATH_GENERIC_SINCOSF16_UTILS_H | ||
|
||
#include "src/__support/FPUtil/FPBits.h" | ||
#include "src/__support/FPUtil/PolyEval.h" | ||
#include "src/__support/FPUtil/nearest_integer.h" | ||
#include "src/__support/common.h" | ||
#include "src/__support/macros/config.h" | ||
|
||
namespace LIBC_NAMESPACE_DECL { | ||
|
||
// Lookup table for sin(k * pi / 32) with k = 0, ..., 63. | ||
// Table is generated with Sollya as follows: | ||
// > display = hexadecimmal; | ||
// > for k from 0 to 63 do { round(sin(k * pi/32), SG, RN); }; | ||
constexpr float SIN_K_PI_OVER_32[64] = { | ||
0x0.0p0, 0x1.917a6cp-4, 0x1.8f8b84p-3, 0x1.294062p-2, | ||
0x1.87de2ap-2, 0x1.e2b5d4p-2, 0x1.1c73b4p-1, 0x1.44cf32p-1, | ||
0x1.6a09e6p-1, 0x1.8bc806p-1, 0x1.a9b662p-1, 0x1.c38b3p-1, | ||
0x1.d906bcp-1, 0x1.e9f416p-1, 0x1.f6297cp-1, 0x1.fd88dap-1, | ||
0x1p0, 0x1.fd88dap-1, 0x1.f6297cp-1, 0x1.e9f416p-1, | ||
0x1.d906bcp-1, 0x1.c38b3p-1, 0x1.a9b662p-1, 0x1.8bc806p-1, | ||
0x1.6a09e6p-1, 0x1.44cf32p-1, 0x1.1c73b4p-1, 0x1.e2b5d4p-2, | ||
0x1.87de2ap-2, 0x1.294062p-2, 0x1.8f8b84p-3, 0x1.917a6cp-4, | ||
0x0.0p0, -0x1.917a6cp-4, -0x1.8f8b84p-3, -0x1.294062p-2, | ||
-0x1.87de2ap-2, -0x1.e2b5d4p-2, -0x1.1c73b4p-1, -0x1.44cf32p-1, | ||
-0x1.6a09e6p-1, -0x1.8bc806p-1, -0x1.a9b662p-1, -0x1.c38b3p-1, | ||
-0x1.d906bcp-1, -0x1.e9f416p-1, -0x1.f6297ep-1, -0x1.fd88dap-1, | ||
-0x1p0, -0x1.fd88dap-1, -0x1.f6297cp-1, -0x1.e9f416p-1, | ||
-0x1.d906bcp-1, -0x1.c38b3p-1, -0x1.a9b662p-1, -0x1.8bc806p-1, | ||
-0x1.6a09e6p-1, -0x1.44cf32p-1, -0x1.1c73b4p-1, -0x1.e2b5d4p-2, | ||
-0x1.87de2ap-2, -0x1.294062p-2, -0x1.8f8b84p-3, -0x1.917a6cp-4}; | ||
|
||
LIBC_INLINE int32_t range_reduction_sincospif16(float x, float &y) { | ||
float kf = fputil::nearest_integer(x * 32); | ||
y = fputil::multiply_add<float>(x, 32.0, -kf); | ||
|
||
return static_cast<int32_t>(kf); | ||
} | ||
|
||
LIBC_INLINE void sincospif16_eval(float xf, float &sin_k, float &cos_k, | ||
float &sin_y, float &cosm1_y) { | ||
float y; | ||
int32_t k = range_reduction_sincospif16(xf, y); | ||
|
||
sin_k = SIN_K_PI_OVER_32[k & 63]; | ||
cos_k = SIN_K_PI_OVER_32[(k + 16) & 63]; | ||
|
||
// Recall, after range reduction, -0.5 <= y <= 0.5. For very small values of | ||
// y, calculating sin(y * p/32) can be inaccurate. Generating a polynomial for | ||
// sin(y * p/32)/y instead significantly reduces the relative errors. | ||
float ysq = y * y; | ||
|
||
// Degree-6 minimax even polynomial for sin(y*pi/32)/y generated by Sollya | ||
// with: | ||
// > Q = fpminimax(sin(y * pi/32)/y, [|0, 2, 4, 6|], [|SG...|], [0, 0.5]); | ||
sin_y = y * fputil::polyeval(ysq, 0x1.921fb6p-4f, -0x1.4aeabcp-13f, | ||
0x1.a03354p-21f, -0x1.ad02d2p-20f); | ||
|
||
// Degree-6 minimax even polynomial for cos(y*pi/32) generated by Sollya | ||
// with: | ||
// > P = fpminimax(cos(y * pi/32), [|0, 2, 4, 6|],[|1, SG...|], [0, 0.5]); | ||
cosm1_y = ysq * fputil::polyeval(ysq, -0x1.3bd3ccp-8f, 0x1.03a61ap-18f, | ||
0x1.a6f7a2p-29f); | ||
} | ||
|
||
} // namespace LIBC_NAMESPACE_DECL | ||
|
||
#endif // LLVM_LIBC_SRC_MATH_GENERIC_SINCOSF16_UTILS_H |
Oops, something went wrong.