From 9f00286c18fcc61e30401f9e7725c350ccb56cdb Mon Sep 17 00:00:00 2001 From: Paul Balanca Date: Mon, 12 Aug 2024 16:07:27 +0100 Subject: [PATCH] Add to/from E8M0 scale MX format conversion. Implementation using bitmasking & shifting, so hopefully decently fast! --- jax_scalify/core/pow2.py | 24 ++++++++++- jax_scalify/quantization/__init__.py | 2 + jax_scalify/quantization/scale.py | 62 ++++++++++++++++++++++++++++ tests/quantization/test_scale.py | 58 ++++++++++++++++++++++++++ 4 files changed, 145 insertions(+), 1 deletion(-) create mode 100644 jax_scalify/quantization/__init__.py create mode 100644 jax_scalify/quantization/scale.py create mode 100644 tests/quantization/test_scale.py diff --git a/jax_scalify/core/pow2.py b/jax_scalify/core/pow2.py index a9f1edc..ec2fe6d 100644 --- a/jax_scalify/core/pow2.py +++ b/jax_scalify/core/pow2.py @@ -4,6 +4,7 @@ from functools import partial from typing import Any, Dict, Optional, Sequence, Tuple, Union +import jax.numpy as jnp import numpy as np from jax import core from jax.interpreters import mlir @@ -14,6 +15,9 @@ # Exponent bits masking. _exponent_bits_mask: Dict[Any, NDArray[Any]] = { + np.dtype(jnp.bfloat16): np.packbits( + np.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1], dtype=np.uint8) + ).view(np.int16), np.dtype(np.float16): np.packbits(np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0], dtype=np.uint8)).view( np.int16 ), @@ -31,6 +35,24 @@ """ +def dtype_exponent_mask(dtype: DTypeLike, sign_bit: bool = False) -> NDArray[Any]: + """Get the exponent mask for a given Numpy/JAX dtype. + + Args: + dtype: Numpy/JAX dtype. + sign_bit: Include sign bit in the mask. + Returns: + Array mask as integer dtype. + """ + mask = _exponent_bits_mask[dtype] + if sign_bit: + # Negative value to add sign. + intdtype = mask.dtype + mask = (-mask.view(dtype)).view(intdtype) + return mask + return mask + + def pow2_decompose_round_down_impl(vin: Array, scale_dtype: DTypeLike) -> Array: """Pow-2 decompose with rounding down. @@ -42,7 +64,7 @@ def pow2_decompose_round_down_impl(vin: Array, scale_dtype: DTypeLike) -> Array: # NOTE: `jnp.frexp` is buggy for subnormals. dtype = np.dtype(np.float32) minval = np.finfo(dtype).smallest_normal - exponent_mask = _exponent_bits_mask[dtype] + exponent_mask = dtype_exponent_mask(dtype) intdtype = exponent_mask.dtype val = vin.astype(dtype) # Masking mantissa bits, keeping only the exponents ones. diff --git a/jax_scalify/quantization/__init__.py b/jax_scalify/quantization/__init__.py new file mode 100644 index 0000000..83707b8 --- /dev/null +++ b/jax_scalify/quantization/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) 2024 Graphcore Ltd. All rights reserved. +from .scale import as_e8m0 # noqa: F401 diff --git a/jax_scalify/quantization/scale.py b/jax_scalify/quantization/scale.py new file mode 100644 index 0000000..330d30f --- /dev/null +++ b/jax_scalify/quantization/scale.py @@ -0,0 +1,62 @@ +# Copyright (c) 2024 Graphcore Ltd. All rights reserved. +import jax.numpy as jnp +import numpy as np + +from jax_scalify.core import Array, DTypeLike, get_numpy_api +from jax_scalify.core.pow2 import dtype_exponent_mask + + +def pow2_truncate(arr: Array) -> Array: + """Convert an Array to a power of 2, using mantissa truncation. + + NOTE: all sub-normals values are flushed to zero. + """ + np_api = get_numpy_api(arr) + # Masking mantissa & sign-bit, keeping only exponent values. + exponent_mask = dtype_exponent_mask(arr.dtype, sign_bit=True) + intdtype = exponent_mask.dtype + # Masking mantissa bits, keeping only the exponents ones. + arr_pow2 = np_api.bitwise_and(arr.view(intdtype), exponent_mask).view(arr.dtype).reshape(arr.shape) + return arr_pow2 + + +def as_e8m0(arr: Array) -> Array: + """Convert an Array to e8m0 format (i.e. power of two values). + + This function is only implementing a truncation + saturation variant, in line with + the MX OCP format. + + Args: + arr: Input array (FP16, FP32 or BF16). + Returns: + E8M0 array (as uint8). + """ + np_api = get_numpy_api(arr) + # assert len(arr.shape) < 2 + assert arr.dtype in {np.dtype(jnp.bfloat16), np.dtype(jnp.float32)} + # Saturation => negative values saturating to min value (i.e. zero bits) in E8M0. + arr = np_api.maximum(arr, np.array(0, arr.dtype)) + arr = pow2_truncate(arr) + + # Bit masking to extract the exponent as uint8 array. + arr_u8 = arr.view(np.uint8).reshape((*arr.shape, -1)) + arr_e8m0 = np_api.bitwise_or(np_api.left_shift(arr_u8[..., -1], 1), np_api.right_shift(arr_u8[..., -2], 7)) + return arr_e8m0 + + +def from_e8m0(arr: Array, dtype: DTypeLike) -> Array: + """Convert an Array of e8m0 values (i.e. power of two values) to a given dtype. + + Args: + arr: E8M0 array (assuming uint8 storage dtype). + dtype: Output dtype. FP32 or BF16 supported. + Returns: + Converted output. + """ + np_api = get_numpy_api(arr) + assert arr.dtype == np.uint8 + assert np.dtype(dtype) in {np.dtype(jnp.bfloat16), np.dtype(jnp.float32)} + # Avoid issues with 7 mantissa bits in BF16. + # TODO: more efficient implementation! + arr = np_api.exp2(arr.astype(np.float32) - 127) + return arr.astype(dtype) diff --git a/tests/quantization/test_scale.py b/tests/quantization/test_scale.py new file mode 100644 index 0000000..fd11c63 --- /dev/null +++ b/tests/quantization/test_scale.py @@ -0,0 +1,58 @@ +# Copyright (c) 2024 Graphcore Ltd. All rights reserved. +import chex +import ml_dtypes +import numpy as np +import numpy.testing as npt +from absl.testing import parameterized + +from jax_scalify.quantization.scale import as_e8m0, from_e8m0, pow2_truncate + + +class QuantizationScaleTests(chex.TestCase): + @parameterized.parameters( + {"dtype": np.float16}, + {"dtype": np.float32}, + {"dtype": ml_dtypes.bfloat16}, + ) + def test__pow2_truncate__proper_result(self, dtype): + vin = np.array([-2, 0, 2, 1, 9, 15]).astype(dtype) + vout = pow2_truncate(vin) + assert vout.dtype == vin.dtype + npt.assert_array_equal(vout, [-2.0, 0.0, 2.0, 1.0, 8.0, 8.0]) + + @parameterized.parameters( + # {"dtype": np.float16}, + {"dtype": np.float32}, + {"dtype": ml_dtypes.bfloat16}, + ) + def test__as_e8m0__positive_values(self, dtype): + vin = np.array([0.6, 2, 1, 9, 15, 127]).astype(dtype).reshape((-1, 2)) + vout = as_e8m0(vin) + assert vout.dtype == np.uint8 + assert vout.shape == vin.shape + npt.assert_array_equal(vout, np.log2(pow2_truncate(vin)) + 127) + + @parameterized.parameters( + # {"dtype": np.float16}, + {"dtype": np.float32}, + {"dtype": ml_dtypes.bfloat16}, + ) + def test__as_e8m0__negative_values(self, dtype): + vin = np.array([-0.1, -3, 0, 2**-127]).astype(dtype) + vout = as_e8m0(vin) + assert vout.dtype == np.uint8 + # NOTE: uint8(0) is the smallest positive scale in E8M0. + npt.assert_array_equal(vout, np.uint8(0)) + + @parameterized.parameters( + # {"dtype": np.float16}, + {"dtype": np.float32}, + {"dtype": ml_dtypes.bfloat16}, + ) + def test__from_e8m0(self, dtype): + vin = np.array([2**-127, 0.25, 1, 2, 8, 2**127.0]).astype(dtype).reshape((-1, 2)) + vin_e8m0 = as_e8m0(vin) + vout = from_e8m0(vin_e8m0, dtype) + assert vin.dtype == vout.dtype + assert vout.shape == vin.shape + npt.assert_array_equal(vout, vin)