From 337d3a60ba4263756cea3a0fd2885806b39f1299 Mon Sep 17 00:00:00 2001 From: Andrew Fitzgibbon Date: Fri, 4 Oct 2024 19:29:57 +0100 Subject: [PATCH 1/2] doc --- src/gfloat/types.py | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/src/gfloat/types.py b/src/gfloat/types.py index 6172372..f67fad1 100644 --- a/src/gfloat/types.py +++ b/src/gfloat/types.py @@ -8,30 +8,31 @@ class RoundMode(Enum): """ Enum for IEEE-754 rounding modes. - Result r is obtained from input v depending on rounding mode as follows + Result :math:`r` is obtained from input :math:`v` depending on rounding mode as follows + + Notes on stochastic rounding: + + StochasticFast implements a stochastic rounding scheme that is unbiased in + infinite precision, but biased when the quantity to be rounded is computed to + a finite precision. + + StochasticFastest implements a stochastic rounding scheme that is biased + (the rounded value is on average farther from zero than the true value). + + With a lot of SRbits (say 8 or more), these biases are negligible, and there + may be some efficiency advantage in using StochasticFast or StochasticFastest. + """ - TowardZero = 1 #: :math:`\max \{ r ~ s.t. ~ |r| \le |v| \}` - TowardNegative = 2 #: :math:`\max \{ r ~ s.t. ~ r \le v \}` - TowardPositive = 3 #: :math:`\min \{ r ~ s.t. ~ r \ge v \}` + TowardZero = 1 #: Return the largest :math:`r` such that :math:`|r| \le |v|` + TowardNegative = 2 #: Return the largest :math:`r` such that :math:`r \le v` + TowardPositive = 3 #: Return the smallest :math:`r` such that :math:`r \ge v` TiesToEven = 4 #: Round to nearest, ties to even TiesToAway = 5 #: Round to nearest, ties away from zero - Stochastic = 6 #: Stochastic rounding - StochasticFast = 7 #: Stochastic rounding - faster, but biased, see [Note 1]. - StochasticFastest = 8 #: Stochastic rounding - incorrect, see [Note 1]. - StochasticOdd = 9 #: Stochastic rounding, RTNO before comparison - - -# [Note 1]: -# StochasticFast implements a stochastic rounding scheme that is unbiased in -# infinite precision, but biased when the quantity to be rounded is computed to -# a finite precision. -# -# StochasticFastest implements a stochastic rounding scheme that is biased -# (the rounded value is on average farther from zero than the true value). -# -# With a lot of SRbits (say 8 or more), these biases are negligible, and there -# may be some efficiency advantage in using StochasticFast or StochasticFastest. + Stochastic = 6 #: Stochastic rounding, RTNE before comparison + StochasticOdd = 7 #: Stochastic rounding, RTNO before comparison + StochasticFast = 8 #: Stochastic rounding - faster, but biased + StochasticFastest = 9 #: Stochastic rounding - even faster, but more biased class FloatClass(Enum): From 27bbc147345ebc626b3af836d4211fd21613d97e Mon Sep 17 00:00:00 2001 From: Andrew Fitzgibbon Date: Fri, 4 Oct 2024 19:53:54 +0100 Subject: [PATCH 2/2] Improve docs --- docs/source/05-stochastic-rounding.ipynb | 8 +- docs/source/formats.rst | 65 ++++++++++++++++ docs/source/index.rst | 95 ++++++------------------ 3 files changed, 93 insertions(+), 75 deletions(-) diff --git a/docs/source/05-stochastic-rounding.ipynb b/docs/source/05-stochastic-rounding.ipynb index aa1425f..f81ecbb 100644 --- a/docs/source/05-stochastic-rounding.ipynb +++ b/docs/source/05-stochastic-rounding.ipynb @@ -464,7 +464,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Implementation\n", + "## Implementation of SR\n", "\n", "The second part of this notebook goes deeper into the implementation of SR,\n", "and explores some subtleties that are not generally brought out in discussions of practical implementations. These subtleties might be summarized as\n", @@ -474,7 +474,7 @@ "\n", "Note that these details are independent of the quality of the random number generator (RNG) — all of the issues discussed here happen with perfect RNGs.\n", "\n", - "## Case 0: Infinite-precision inputs and real-valued random variables\n", + "### Case 0: Infinite-precision inputs and real-valued random variables\n", "\n", "To begin our discussion, let's start with \"high-school\" rounding,\n", "where we implement round-to-nearest with code like\n", @@ -493,7 +493,7 @@ "but needs to change subtly when they are supplied in fixed precision, as is true \n", "in a floating point system.\n", " \n", - "## Case 1: Infinite-precision inputs and limited-precision random variables\n", + "### Case 1: Infinite-precision inputs and limited-precision random variables\n", "\n", "Let's assume that `rand()` produces only `S` bits of randomness at every call,\n", "i.e. that its implementation is something like\n", @@ -868,7 +868,7 @@ "Good news. `SRFast` (the curve on the left) seems to have fixed things...\n", "What could be wrong? Why is that not the default?\n", "\n", - "## Case 2: Finite-precision inputs and limited-precision random variables\n", + "### Case 2: Finite-precision inputs and limited-precision random variables\n", "\n", "The answer is that we are still modelling the inputs `v` as being infinite precision (well, they are float64 here, but that's pretty much infinite precision).\n", "\n", diff --git a/docs/source/formats.rst b/docs/source/formats.rst index d6820e2..b7536a5 100644 --- a/docs/source/formats.rst +++ b/docs/source/formats.rst @@ -5,6 +5,71 @@ Defined Formats .. module:: gfloat.formats +Format parameters +----------------- + +This table (from example notebook :doc:`value-stats <02-value-stats>`) shows how +gfloat has been used to tabulate properties of various floating point formats. + + - name: Format + - B: Bits in the format + - P: Precision in bits + - E: Exponent field width in bits + - smallest: Smallest positive value + - smallest_normal: Smallest positive normal value, n/a if no finite values are normal + - max: Largest finite value + - num_nans: Number of NaN values + - num_infs: Number of infinities (2 or 0) + +======== === === === =========== ================= ============ =========== ====== +name B P E smallest smallest_normal max num_nans infs +======== === === === =========== ================= ============ =========== ====== +ocp_e2m1 4 2 2 0.5 1 6 0 0 +ocp_e2m3 6 4 2 0.125 1 7.5 0 0 +ocp_e3m2 6 3 3 0.0625 0.25 28 0 0 +ocp_e4m3 8 4 4 ≈0.0019531 0.015625 448 2 0 +ocp_e5m2 8 3 5 ≈1.5259e-05 ≈6.1035e-05 57344 6 2 +p3109_p1 8 1 7 ≈2.1684e-19 ≈2.1684e-19 ≈9.2234e+18 1 2 +p3109_p2 8 2 6 ≈2.3283e-10 ≈4.6566e-10 ≈2.1475e+09 1 2 +p3109_p3 8 3 5 ≈7.6294e-06 ≈3.0518e-05 49152 1 2 +p3109_p4 8 4 4 ≈0.00097656 0.0078125 224 1 2 +p3109_p5 8 5 3 0.0078125 0.125 15 1 2 +p3109_p6 8 6 2 0.015625 0.5 3.875 1 2 +binary16 16 11 5 ≈5.9605e-08 ≈6.1035e-05 65504 2046 2 +bfloat16 16 8 8 ≈9.1835e-41 ≈1.1755e-38 ≈3.3895e+38 254 2 +binary32 32 24 8 ≈1.4013e-45 ≈1.1755e-38 ≈3.4028e+38 ≈1.6777e+07 2 +binary64 64 53 11 4.9407e-324 ≈2.2251e-308 ≈1.7977e+308 ≈9.0072e+15 2 +ocp_e8m0 8 1 8 ≈5.8775e-39 ≈5.8775e-39 ≈1.7014e+38 1 0 +ocp_int8 8 8 0 0.015625 n/a ≈ 1.9844 0 0 +======== === === === =========== ================= ============ =========== ====== + +In the above table, values which are not exact are indicated with the "≈" symbol. +And here's the same table, but with values which don't render exactly as short floats +printed as rationals times powers of 2: + +======== === === === =========== ================= ======================================== ====================================== ====== +name B P E smallest smallest_normal max num_nans infs +======== === === === =========== ================= ======================================== ====================================== ====== +ocp_e2m1 4 2 2 0.5 1 6 0 0 +ocp_e2m3 6 4 2 0.125 1 7.5 0 0 +ocp_e3m2 6 3 3 0.0625 0.25 28 0 0 +ocp_e4m3 8 4 4 2^-9 0.015625 448 2 0 +ocp_e5m2 8 3 5 2^-16 2^-14 57344 6 2 +p3109_p1 8 1 7 2^-62 2^-62 2^63 1 2 +p3109_p2 8 2 6 2^-32 2^-31 2^31 1 2 +p3109_p3 8 3 5 2^-17 2^-15 49152 1 2 +p3109_p4 8 4 4 2^-10 0.0078125 224 1 2 +p3109_p5 8 5 3 0.0078125 0.125 15 1 2 +p3109_p6 8 6 2 0.015625 0.5 3.875 1 2 +binary16 16 11 5 2^-24 2^-14 65504 2046 2 +bfloat16 16 8 8 2^-133 2^-126 255/128*2^127 254 2 +binary32 32 24 8 2^-149 2^-126 16777215/8388608*2^127 8388607/4194304*2^23 2 +binary64 64 53 11 4.9407e-324 2^-1022 9007199254740991/9007199254740992*2^1024 4503599627370495/4503599627370496*2^53 2 +ocp_e8m0 8 1 8 2^-127 2^-127 2^127 1 0 +ocp_int8 8 8 0 0.015625 n/a 127/64*2^0 0 0 +======== === === === =========== ================= ======================================== ====================================== ====== + + IEEE 754 Formats ---------------- diff --git a/docs/source/index.rst b/docs/source/index.rst index cc8441f..b564a6a 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -10,8 +10,23 @@ GFloat: Generic floating point formats in Python ================================================ GFloat is designed to allow experimentation with a variety of floating-point -formats in Python. Formats are parameterized by the primary IEEE-754 parameters -of: +formats in Python. Headline features: + + * A wide variety of floating point formats defined in :py:class:`gfloat.formats` + + - IEEE 754, BFloat, OCP FP8 and MX, IEEE P3109 + + * Conversion between floats under numerous rounding modes + + - Scalar code is optimized for readability + - Array code is faster, and can operate on Numpy, JAX, or PyTorch arrays. + + * Notebooks useful for teaching and exploring float formats + +Provided Formats +---------------- + +Formats are parameterized by the primary IEEE-754 parameters of: * Width in bits (k) * Precision (p) @@ -55,75 +70,13 @@ As well as block formats from |ocp_mx_link|. IEEE P3109 -Supported rounding modes include: - - * Directed modes: Toward Zero, Toward Positive, Toward Negative - * Round-to-nearest, with Ties to Even or Ties to Away - * Stochastic rounding, with specified numbers of random bits - - -Example -------- -This table (from example notebook :doc:`value-stats <02-value-stats>`) shows how -gfloat has been used to tabulate properties of various floating point formats. - - - name: Format - - B: Bits in the format - - P: Precision in bits - - E: Exponent field width in bits - - smallest: Smallest positive value - - smallest_normal: Smallest positive normal value, n/a if no finite values are normal - - max: Largest finite value - - num_nans: Number of NaN values - - num_infs: Number of infinities (2 or 0) - -======== === === === =========== ================= ============ =========== ====== -name B P E smallest smallest_normal max num_nans infs -======== === === === =========== ================= ============ =========== ====== -ocp_e2m1 4 2 2 0.5 1 6 0 0 -ocp_e2m3 6 4 2 0.125 1 7.5 0 0 -ocp_e3m2 6 3 3 0.0625 0.25 28 0 0 -ocp_e4m3 8 4 4 ≈0.0019531 0.015625 448 2 0 -ocp_e5m2 8 3 5 ≈1.5259e-05 ≈6.1035e-05 57344 6 2 -p3109_p1 8 1 7 ≈2.1684e-19 ≈2.1684e-19 ≈9.2234e+18 1 2 -p3109_p2 8 2 6 ≈2.3283e-10 ≈4.6566e-10 ≈2.1475e+09 1 2 -p3109_p3 8 3 5 ≈7.6294e-06 ≈3.0518e-05 49152 1 2 -p3109_p4 8 4 4 ≈0.00097656 0.0078125 224 1 2 -p3109_p5 8 5 3 0.0078125 0.125 15 1 2 -p3109_p6 8 6 2 0.015625 0.5 3.875 1 2 -binary16 16 11 5 ≈5.9605e-08 ≈6.1035e-05 65504 2046 2 -bfloat16 16 8 8 ≈9.1835e-41 ≈1.1755e-38 ≈3.3895e+38 254 2 -binary32 32 24 8 ≈1.4013e-45 ≈1.1755e-38 ≈3.4028e+38 ≈1.6777e+07 2 -binary64 64 53 11 4.9407e-324 ≈2.2251e-308 ≈1.7977e+308 ≈9.0072e+15 2 -ocp_e8m0 8 1 8 ≈5.8775e-39 ≈5.8775e-39 ≈1.7014e+38 1 0 -ocp_int8 8 8 0 0.015625 n/a ≈ 1.9844 0 0 -======== === === === =========== ================= ============ =========== ====== - -In the above table, values which are not exact are indicated with the "≈" symbol. -And here's the same table, but with values which don't render exactly as short floats -printed as rationals times powers of 2: - -======== === === === =========== ================= ======================================== ====================================== ====== -name B P E smallest smallest_normal max num_nans infs -======== === === === =========== ================= ======================================== ====================================== ====== -ocp_e2m1 4 2 2 0.5 1 6 0 0 -ocp_e2m3 6 4 2 0.125 1 7.5 0 0 -ocp_e3m2 6 3 3 0.0625 0.25 28 0 0 -ocp_e4m3 8 4 4 2^-9 0.015625 448 2 0 -ocp_e5m2 8 3 5 2^-16 2^-14 57344 6 2 -p3109_p1 8 1 7 2^-62 2^-62 2^63 1 2 -p3109_p2 8 2 6 2^-32 2^-31 2^31 1 2 -p3109_p3 8 3 5 2^-17 2^-15 49152 1 2 -p3109_p4 8 4 4 2^-10 0.0078125 224 1 2 -p3109_p5 8 5 3 0.0078125 0.125 15 1 2 -p3109_p6 8 6 2 0.015625 0.5 3.875 1 2 -binary16 16 11 5 2^-24 2^-14 65504 2046 2 -bfloat16 16 8 8 2^-133 2^-126 255/128*2^127 254 2 -binary32 32 24 8 2^-149 2^-126 16777215/8388608*2^127 8388607/4194304*2^23 2 -binary64 64 53 11 4.9407e-324 2^-1022 9007199254740991/9007199254740992*2^1024 4503599627370495/4503599627370496*2^53 2 -ocp_e8m0 8 1 8 2^-127 2^-127 2^127 1 0 -ocp_int8 8 8 0 0.015625 n/a 127/64*2^0 0 0 -======== === === === =========== ================= ======================================== ====================================== ====== +Rounding modes +-------------- + +Various rounding modes: + * Directed modes: Toward Zero, Toward Positive, Toward Negative + * Round-to-nearest, with Ties to Even or Ties to Away + * Stochastic rounding, with specified numbers of random bits See Also