Skip to content

Commit

Permalink
[APFloat] Add APFloat support for E8M0 type
Browse files Browse the repository at this point in the history
This patch adds an APFloat type for unsigned E8M0 format.
This format is used for representing the "scale-format"
in the MX specification:
https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf

This format does not support {Inf, denorms, zeroes}.
Like FP32, this format's exponents are 8-bits (all bits here)
and the bias value is 127. However, it differs from IEEE-FP32
in that the minExponent is -127 (instead of -126).
There are updates done in the APFloat utility functions
to handle these constraints for this format.

* The bias calculation is different and convertIEEE* APIs
  are updated to handle this.
* Since there are no significand bits, the
  isSignificandAll{Zeroes/Ones} methods are updated accordingly.
* Although the format does not have any precision, the precision
  bit in the fltSemantics is set to 1 for consistency with
  APFloat's internal representation.
* Many utility functions are updated to handle the fact that this
  format does not support Zero.
* Provide a separate initFromAPInt() implementation to
  handle the quirks of the format.
* Add specific tests to verify the range of values for this format.

Signed-off-by: Durgadoss R <durgadossr@nvidia.com>
  • Loading branch information
durga4github committed Sep 16, 2024
1 parent b54be00 commit 850fae2
Show file tree
Hide file tree
Showing 3 changed files with 445 additions and 42 deletions.
35 changes: 35 additions & 0 deletions llvm/include/llvm/ADT/APFloat.h
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,12 @@ struct APFloatBase {
// improved range compared to half (16-bit) formats, at (potentially)
// greater throughput than single precision (32-bit) formats.
S_FloatTF32,
// 8-bit floating point number with (all the) 8 bits for the exponent
// like in FP32. There are no zeroes, no infinities, and no denormal values.
// NaN is represented with all bits set to 1. Bias is 127.
// This represents the scale data type in the MX specification from
// https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf
S_Float8E8M0FN,
// 6-bit floating point number with bit layout S1E3M2. Unlike IEEE-754
// types, there are no infinity or NaN values. The format is detailed in
// https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf
Expand Down Expand Up @@ -229,6 +235,7 @@ struct APFloatBase {
static const fltSemantics &Float8E4M3B11FNUZ() LLVM_READNONE;
static const fltSemantics &Float8E3M4() LLVM_READNONE;
static const fltSemantics &FloatTF32() LLVM_READNONE;
static const fltSemantics &Float8E8M0FN() LLVM_READNONE;
static const fltSemantics &Float6E3M2FN() LLVM_READNONE;
static const fltSemantics &Float6E2M3FN() LLVM_READNONE;
static const fltSemantics &Float4E2M1FN() LLVM_READNONE;
Expand Down Expand Up @@ -591,6 +598,7 @@ class IEEEFloat final : public APFloatBase {
unsigned int significandLSB() const;
unsigned int significandMSB() const;
void zeroSignificand();
unsigned int getNumHighBits() const;
/// Return true if the significand excluding the integral bit is all ones.
bool isSignificandAllOnes() const;
bool isSignificandAllOnesExceptLSB() const;
Expand Down Expand Up @@ -652,6 +660,7 @@ class IEEEFloat final : public APFloatBase {
APInt convertFloat8E4M3B11FNUZAPFloatToAPInt() const;
APInt convertFloat8E3M4APFloatToAPInt() const;
APInt convertFloatTF32APFloatToAPInt() const;
APInt convertFloat8E8M0FNAPFloatToAPInt() const;
APInt convertFloat6E3M2FNAPFloatToAPInt() const;
APInt convertFloat6E2M3FNAPFloatToAPInt() const;
APInt convertFloat4E2M1FNAPFloatToAPInt() const;
Expand All @@ -672,6 +681,7 @@ class IEEEFloat final : public APFloatBase {
void initFromFloat8E4M3B11FNUZAPInt(const APInt &api);
void initFromFloat8E3M4APInt(const APInt &api);
void initFromFloatTF32APInt(const APInt &api);
void initFromFloat8E8M0FNAPInt(const APInt &api);
void initFromFloat6E3M2FNAPInt(const APInt &api);
void initFromFloat6E2M3FNAPInt(const APInt &api);
void initFromFloat4E2M1FNAPInt(const APInt &api);
Expand Down Expand Up @@ -1079,6 +1089,9 @@ class APFloat : public APFloatBase {
/// \param Semantics - type float semantics
static APFloat getAllOnesValue(const fltSemantics &Semantics);

/// Returns true if the given semantics supports either NaN or Infinity.
///
/// \param Sem - type float semantics
static bool hasNanOrInf(const fltSemantics &Sem) {
switch (SemanticsToEnum(Sem)) {
default:
Expand All @@ -1091,6 +1104,28 @@ class APFloat : public APFloatBase {
}
}

/// Returns true if the given semantics can represent Zero.
///
/// \param Sem - type float semantics
static bool hasZero(const fltSemantics &Sem) {
return &Sem != &Float8E8M0FN();
}

/// Returns true if the given semantics has actual significand.
///
/// \param Sem - type float semantics
static bool hasSignificand(const fltSemantics &Sem) {
return &Sem != &Float8E8M0FN();
}

/// Returns true if the given semantics has only exponent
/// and no significand.
///
/// \param Sem - type float semantics
static bool hasExponentOnly(const fltSemantics &Sem) {
return !hasSignificand(Sem);
}

/// Used to insert APFloat objects, or objects that contain APFloat objects,
/// into FoldingSets.
void Profile(FoldingSetNodeID &NID) const;
Expand Down
Loading

0 comments on commit 850fae2

Please sign in to comment.