[libc][math][c23] Add cospif16 function (llvm#113001)

Implementation of `cos` for half precision floating point inputs scaled by pi (i.e., `cospi`), correctly rounded for all rounding modes. --------- Co-authored-by: OverMighty <its.overmighty@gmail.com>
ROCm · Oct 29, 2024 · 7395ef5 · 7395ef5
1 parent a8398bd
commit 7395ef5
Show file tree

Hide file tree

Showing 15 changed files with 347 additions and 88 deletions.
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
@@ -607,6 +607,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
     libc.src.math.canonicalizef16
     libc.src.math.ceilf16
     libc.src.math.copysignf16
+    libc.src.math.cospif16
     # TODO: aarch64 bug
     # Please see https://github.com/llvm/llvm-project/pull/100632#issuecomment-2258772681
     # libc.src.math.expf16

diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
@@ -611,6 +611,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
     libc.src.math.ceilf16
     libc.src.math.copysignf16
     libc.src.math.coshf16
+    libc.src.math.cospif16
     libc.src.math.exp10f16
     libc.src.math.exp10m1f16
     libc.src.math.exp2f16

diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst
@@ -280,7 +280,7 @@ Higher Math Functions
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | cosh      | |check|          |                 |                        | |check|              |                        | 7.12.5.4               | F.10.2.4                   |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| cospi     | |check|          |                 |                        |                      |                        | 7.12.4.12              | F.10.1.12                  |
+| cospi     | |check|          |                 |                        | |check|              |                        | 7.12.4.12              | F.10.1.12                  |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | dsqrt     | N/A              | N/A             |   |check|              | N/A                  |       |check|\*        | 7.12.14.6              | F.10.11                    |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+

diff --git a/libc/newhdrgen/yaml/math.yaml b/libc/newhdrgen/yaml/math.yaml
@@ -206,6 +206,13 @@ functions:
     return_type: float
     arguments:
       - type: float
+  - name: cospif16
+    standards:
+      - stdc
+    return_type: _Float16
+    arguments:
+      - type: _Float16
+    guard: LIBC_TYPES_HAS_FLOAT16
   - name: coshf16
     standards:
       - stdc

diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
@@ -95,6 +95,7 @@ add_math_entrypoint_object(coshf)
 add_math_entrypoint_object(coshf16)
 
 add_math_entrypoint_object(cospif)
+add_math_entrypoint_object(cospif16)
 
 add_math_entrypoint_object(daddl)
 add_math_entrypoint_object(daddf128)

diff --git a/libc/src/math/cospif16.h b/libc/src/math/cospif16.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for cospif16 ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_COSPIF16_H
+#define LLVM_LIBC_SRC_MATH_COSPIF16_H
+
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+float16 cospif16(float16 x);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_MATH_SINPIF16_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
@@ -351,6 +351,17 @@ add_header_library(
     libc.src.__support.common
 )
 
+add_header_library(
+  sincosf16_utils
+  HDRS
+    sincosf16_utils.h
+  DEPENDS
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.polyeval
+    libc.src.__support.FPUtil.nearest_integer
+    libc.src.__support.common  
+)
+
 add_header_library(
   sincos_eval
   HDRS
@@ -422,6 +433,25 @@ add_entrypoint_object(
     -O3
 )
 
+add_entrypoint_object(
+  cospif16
+  SRCS
+    cospif16.cpp
+  HDRS
+    ../cospif16.h
+  DEPENDS
+    .sincosf16_utils
+    libc.hdr.errno_macros
+    libc.hdr.fenv_macros
+    libc.src.__support.FPUtil.cast
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.macros.optimization
+  COMPILE_OPTIONS
+    -O3
+)
+
 add_entrypoint_object(
   sin
   SRCS
@@ -535,14 +565,14 @@ add_entrypoint_object(
   HDRS
     ../sinpif16.h
   DEPENDS
-    libc.src.__support.common
+    .sincosf16_utils
+    libc.hdr.errno_macros
+    libc.hdr.fenv_macros
     libc.src.__support.FPUtil.cast
     libc.src.__support.FPUtil.fenv_impl
-    libc.src.__support.FPUtil.fp_bits 
+    libc.src.__support.FPUtil.fp_bits
     libc.src.__support.FPUtil.multiply_add
-    libc.src.__support.FPUtil.nearest_integer
-    libc.src.__support.FPUtil.polyeval
-    libc.src.__support.macros.properties.types
+    libc.src.__support.macros.optimization
   COMPILE_OPTIONS
     -O3
 )

diff --git a/libc/src/math/generic/cospif16.cpp b/libc/src/math/generic/cospif16.cpp
@@ -0,0 +1,81 @@
+//===-- Half-precision cospif function ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/cospif16.h"
+#include "hdr/errno_macros.h"
+#include "hdr/fenv_macros.h"
+#include "sincosf16_utils.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/cast.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/macros/optimization.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(float16, cospif16, (float16 x)) {
+  using FPBits = typename fputil::FPBits<float16>;
+  FPBits xbits(x);
+
+  uint16_t x_u = xbits.uintval();
+  uint16_t x_abs = x_u & 0x7fff;
+  float xf = x;
+
+  // Range reduction:
+  // For |x| > 1/32, we perform range reduction as follows:
+  // Find k and y such that:
+  //   x = (k + y) * 1/32
+  //   k is an integer
+  //   |y| < 0.5
+  //
+  // This is done by performing:
+  //   k = round(x * 32)
+  //   y = x * 32 - k
+  //
+  // Once k and y are computed, we then deduce the answer by the sine of sum
+  // formula:
+  //   cos(x * pi) = cos((k + y) * pi/32)
+  //               = cos(k * pi/32) * cos(y * pi/32) +
+  //                 sin(y * pi/32) * sin(k * pi/32)
+
+  // For signed zeros
+  if (LIBC_UNLIKELY(x_abs == 0U))
+    return fputil::cast<float16>(1.0f);
+
+  // Numbers greater or equal to 2^10 are integers, or infinity, or NaN
+  if (LIBC_UNLIKELY(x_abs >= 0x6400)) {
+    if (LIBC_UNLIKELY(x_abs <= 0x67FF))
+      return fputil::cast<float16>((x_abs & 0x1) ? -1.0f : 1.0f);
+
+    // Check for NaN or infintiy values
+    if (LIBC_UNLIKELY(x_abs >= 0x7c00)) {
+      // If value is equal to infinity
+      if (x_abs == 0x7c00) {
+        fputil::set_errno_if_required(EDOM);
+        fputil::raise_except_if_required(FE_INVALID);
+      }
+
+      return x + FPBits::quiet_nan().get_val();
+    }
+
+    return fputil::cast<float16>(1.0f);
+  }
+
+  float sin_k, cos_k, sin_y, cosm1_y;
+  sincospif16_eval(xf, sin_k, cos_k, sin_y, cosm1_y);
+
+  if (LIBC_UNLIKELY(sin_y == 0 && cos_k == 0))
+    return fputil::cast<float16>(0.0f);
+
+  // Since, cosm1_y = cos_y - 1, therefore:
+  // 	cos(x * pi) = cos_k(cosm1_y) + cos_k - sin_k * sin_y
+  return fputil::cast<float16>(fputil::multiply_add(
+      cos_k, cosm1_y, fputil::multiply_add(-sin_k, sin_y, cos_k)));
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/sincosf16_utils.h b/libc/src/math/generic/sincosf16_utils.h
@@ -0,0 +1,77 @@
+//===-- Collection of utils for sinf16/cosf16 -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_GENERIC_SINCOSF16_UTILS_H
+#define LLVM_LIBC_SRC_MATH_GENERIC_SINCOSF16_UTILS_H
+
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+// Lookup table for sin(k * pi / 32) with k = 0, ..., 63.
+// Table is generated with Sollya as follows:
+// > display = hexadecimmal;
+// > for k from 0 to 63 do { round(sin(k * pi/32), SG, RN); };
+constexpr float SIN_K_PI_OVER_32[64] = {
+    0x0.0p0,        0x1.917a6cp-4,  0x1.8f8b84p-3,  0x1.294062p-2,
+    0x1.87de2ap-2,  0x1.e2b5d4p-2,  0x1.1c73b4p-1,  0x1.44cf32p-1,
+    0x1.6a09e6p-1,  0x1.8bc806p-1,  0x1.a9b662p-1,  0x1.c38b3p-1,
+    0x1.d906bcp-1,  0x1.e9f416p-1,  0x1.f6297cp-1,  0x1.fd88dap-1,
+    0x1p0,          0x1.fd88dap-1,  0x1.f6297cp-1,  0x1.e9f416p-1,
+    0x1.d906bcp-1,  0x1.c38b3p-1,   0x1.a9b662p-1,  0x1.8bc806p-1,
+    0x1.6a09e6p-1,  0x1.44cf32p-1,  0x1.1c73b4p-1,  0x1.e2b5d4p-2,
+    0x1.87de2ap-2,  0x1.294062p-2,  0x1.8f8b84p-3,  0x1.917a6cp-4,
+    0x0.0p0,        -0x1.917a6cp-4, -0x1.8f8b84p-3, -0x1.294062p-2,
+    -0x1.87de2ap-2, -0x1.e2b5d4p-2, -0x1.1c73b4p-1, -0x1.44cf32p-1,
+    -0x1.6a09e6p-1, -0x1.8bc806p-1, -0x1.a9b662p-1, -0x1.c38b3p-1,
+    -0x1.d906bcp-1, -0x1.e9f416p-1, -0x1.f6297ep-1, -0x1.fd88dap-1,
+    -0x1p0,         -0x1.fd88dap-1, -0x1.f6297cp-1, -0x1.e9f416p-1,
+    -0x1.d906bcp-1, -0x1.c38b3p-1,  -0x1.a9b662p-1, -0x1.8bc806p-1,
+    -0x1.6a09e6p-1, -0x1.44cf32p-1, -0x1.1c73b4p-1, -0x1.e2b5d4p-2,
+    -0x1.87de2ap-2, -0x1.294062p-2, -0x1.8f8b84p-3, -0x1.917a6cp-4};
+
+LIBC_INLINE int32_t range_reduction_sincospif16(float x, float &y) {
+  float kf = fputil::nearest_integer(x * 32);
+  y = fputil::multiply_add<float>(x, 32.0, -kf);
+
+  return static_cast<int32_t>(kf);
+}
+
+LIBC_INLINE void sincospif16_eval(float xf, float &sin_k, float &cos_k,
+                                  float &sin_y, float &cosm1_y) {
+  float y;
+  int32_t k = range_reduction_sincospif16(xf, y);
+
+  sin_k = SIN_K_PI_OVER_32[k & 63];
+  cos_k = SIN_K_PI_OVER_32[(k + 16) & 63];
+
+  // Recall, after range reduction, -0.5 <= y <= 0.5. For very small values of
+  // y, calculating sin(y * p/32) can be inaccurate. Generating a polynomial for
+  // sin(y * p/32)/y instead significantly reduces the relative errors.
+  float ysq = y * y;
+
+  // Degree-6 minimax even polynomial for sin(y*pi/32)/y generated by Sollya
+  // with:
+  // > Q = fpminimax(sin(y * pi/32)/y, [|0, 2, 4, 6|], [|SG...|], [0, 0.5]);
+  sin_y = y * fputil::polyeval(ysq, 0x1.921fb6p-4f, -0x1.4aeabcp-13f,
+                               0x1.a03354p-21f, -0x1.ad02d2p-20f);
+
+  // Degree-6 minimax even polynomial for cos(y*pi/32) generated by Sollya
+  // with:
+  // > P = fpminimax(cos(y * pi/32), [|0, 2, 4, 6|],[|1, SG...|], [0, 0.5]);
+  cosm1_y = ysq * fputil::polyeval(ysq, -0x1.3bd3ccp-8f, 0x1.03a61ap-18f,
+                                   0x1.a6f7a2p-29f);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_MATH_GENERIC_SINCOSF16_UTILS_H