[APFloat] Add APFloat support for E8M0 type

This patch adds an APFloat type for unsigned E8M0 format. This format is used for representing the "scale-format" in the MX specification: https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf This format does not support {Inf, denorms, zeroes}. Like FP32, this format's exponents are 8-bits (all bits here) and the bias value is 127. However, it differs from IEEE-FP32 in that the minExponent is -127 (instead of -126). There are updates done in the APFloat utility functions to handle these constraints for this format. * The bias calculation is different and convertIEEE* APIs are updated to handle this. * Since there are no significand bits, the isSignificandAll{Zeroes/Ones} methods are updated accordingly. * Although the format does not have any precision, the precision bit in the fltSemantics is set to 1 for consistency with APFloat's internal representation. * Many utility functions are updated to handle the fact that this format does not support Zero. * Provide a separate initFromAPInt() implementation to handle the quirks of the format. * Add specific tests to verify the range of values for this format. Signed-off-by: Durgadoss R <durgadossr@nvidia.com>
llvm · Sep 25, 2024 · 886ac70 · 886ac70
1 parent 2a29d24
commit 886ac70
Show file tree

Hide file tree

Showing 3 changed files with 478 additions and 25 deletions.
diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h
@@ -195,6 +195,13 @@ struct APFloatBase {
  // improved range compared to half (16-bit) formats, at (potentially)
  // greater throughput than single precision (32-bit) formats.
  S_FloatTF32,
+ // 8-bit floating point number with (all the) 8 bits for the exponent
+ // like in FP32. There are no zeroes, no infinities, and no denormal values.
+ // This format has unsigned representation only. (U -> Unsigned only).
+ // NaN is represented with all bits set to 1. Bias is 127.
+ // This format represents the scale data type in the MX specification from:
+ // https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf
+ S_Float8E8M0FNU,
  // 6-bit floating point number with bit layout S1E3M2. Unlike IEEE-754
  // types, there are no infinity or NaN values. The format is detailed in
  // https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf
@@ -229,6 +236,7 @@ struct APFloatBase {
  static const fltSemantics &Float8E4M3B11FNUZ() LLVM_READNONE;
  static const fltSemantics &Float8E3M4() LLVM_READNONE;
  static const fltSemantics &FloatTF32() LLVM_READNONE;
+ static const fltSemantics &Float8E8M0FNU() LLVM_READNONE;
  static const fltSemantics &Float6E3M2FN() LLVM_READNONE;
  static const fltSemantics &Float6E2M3FN() LLVM_READNONE;
  static const fltSemantics &Float4E2M1FN() LLVM_READNONE;
@@ -591,6 +599,7 @@ class IEEEFloat final : public APFloatBase {
  unsigned int significandLSB() const;
  unsigned int significandMSB() const;
  void zeroSignificand();
+ unsigned int getNumHighBits() const;
  /// Return true if the significand excluding the integral bit is all ones.
  bool isSignificandAllOnes() const;
  bool isSignificandAllOnesExceptLSB() const;
@@ -652,6 +661,7 @@ class IEEEFloat final : public APFloatBase {
  APInt convertFloat8E4M3B11FNUZAPFloatToAPInt() const;
  APInt convertFloat8E3M4APFloatToAPInt() const;
  APInt convertFloatTF32APFloatToAPInt() const;
+ APInt convertFloat8E8M0FNUAPFloatToAPInt() const;
  APInt convertFloat6E3M2FNAPFloatToAPInt() const;
  APInt convertFloat6E2M3FNAPFloatToAPInt() const;
  APInt convertFloat4E2M1FNAPFloatToAPInt() const;
@@ -672,6 +682,7 @@ class IEEEFloat final : public APFloatBase {
  void initFromFloat8E4M3B11FNUZAPInt(const APInt &api);
  void initFromFloat8E3M4APInt(const APInt &api);
  void initFromFloatTF32APInt(const APInt &api);
+ void initFromFloat8E8M0FNUAPInt(const APInt &api);
  void initFromFloat6E3M2FNAPInt(const APInt &api);
  void initFromFloat6E2M3FNAPInt(const APInt &api);
  void initFromFloat4E2M1FNAPInt(const APInt &api);
@@ -1079,6 +1090,9 @@ class APFloat : public APFloatBase {
  /// \param Semantics - type float semantics
  static APFloat getAllOnesValue(const fltSemantics &Semantics);
 
+ /// Returns true if the given semantics supports either NaN or Infinity.
+ ///
+ /// \param Sem - type float semantics
  static bool hasNanOrInf(const fltSemantics &Sem) {
  switch (SemanticsToEnum(Sem)) {
  default:
@@ -1091,6 +1105,13 @@ class APFloat : public APFloatBase {
  }
  }
 
+ /// Returns true if the given semantics has actual significand.
+ ///
+ /// \param Sem - type float semantics
+ static bool hasSignificand(const fltSemantics &Sem) {
+ return &Sem != &Float8E8M0FNU();
+ }
+
  /// Used to insert APFloat objects, or objects that contain APFloat objects,
  /// into FoldingSets.
  void Profile(FoldingSetNodeID &NID) const;