From 5bfd3c197d81c66dd07eb4d11aec99df2299c02e Mon Sep 17 00:00:00 2001
From: Jingyu Ma <mazong1123@gmail.com>
Date: Tue, 30 Jan 2018 01:13:31 +0800
Subject: Added Grisu3 algorithm support for double.ToString(). (#14646)

* Added Grisu3 algorithm support for double.ToString().

- Implemented Grisu3 algorithm.
- When calling double.ToString(), try Grisu3 first, if it fails, fall back to Dragon4.

Fix #14478

* Added comments. Fixed a minor bug.

* Optimized the performance in DigitGen.

* Added shortcut for numbers without fraction.

* Updated according to the review comments.

Added more comments. Changed the value of D_1_LOG2_10
---
 src/classlibnative/bcltype/CMakeLists.txt |   2 +
 src/classlibnative/bcltype/diyfp.cpp      |  75 ++++++
 src/classlibnative/bcltype/diyfp.h        |  93 ++++++++
 src/classlibnative/bcltype/fp.h           |  67 ++++++
 src/classlibnative/bcltype/grisu3.cpp     | 381 ++++++++++++++++++++++++++++++
 src/classlibnative/bcltype/grisu3.h       | 161 +++++++++++++
 src/classlibnative/bcltype/number.cpp     | 102 +++-----
 7 files changed, 817 insertions(+), 64 deletions(-)
 create mode 100644 src/classlibnative/bcltype/diyfp.cpp
 create mode 100644 src/classlibnative/bcltype/diyfp.h
 create mode 100644 src/classlibnative/bcltype/fp.h
 create mode 100644 src/classlibnative/bcltype/grisu3.cpp
 create mode 100644 src/classlibnative/bcltype/grisu3.h

diff --git a/src/classlibnative/bcltype/CMakeLists.txt b/src/classlibnative/bcltype/CMakeLists.txt
index 74ccda0a81..e2da2308bb 100644
--- a/src/classlibnative/bcltype/CMakeLists.txt
+++ b/src/classlibnative/bcltype/CMakeLists.txt
@@ -10,6 +10,8 @@ set(BCLTYPE_SOURCES
     bignum.cpp
     currency.cpp
     decimal.cpp
+    diyfp.cpp
+    grisu3.cpp
     windowsruntimebufferhelper.cpp
     number.cpp
     oavariant.cpp
diff --git a/src/classlibnative/bcltype/diyfp.cpp b/src/classlibnative/bcltype/diyfp.cpp
new file mode 100644
index 0000000000..cab242fe13
--- /dev/null
+++ b/src/classlibnative/bcltype/diyfp.cpp
@@ -0,0 +1,75 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+// File: diyfp.cpp
+//
+
+//
+
+#include "diyfp.h"
+#include "fp.h"
+
+void DiyFp::Minus(const DiyFp& rhs)
+{
+    _ASSERTE(m_e == rhs.e());
+    _ASSERTE(m_f >= rhs.f());
+
+    m_f -= rhs.f();
+}
+
+void DiyFp::Minus(const DiyFp& left, const DiyFp& right, DiyFp& result)
+{
+    result = left;
+    result.Minus(right);
+}
+
+void DiyFp::Multiply(const DiyFp& rhs)
+{
+    UINT64 m32 = 0xFFFFFFFF;
+
+    UINT64 a = m_f >> 32;
+    UINT64 b = m_f & m32;
+    UINT64 c = rhs.f() >> 32;
+    UINT64 d = rhs.f() & m32;
+
+    UINT64 ac = a * c;
+    UINT64 bc = b * c;
+    UINT64 ad = a * d;
+    UINT64 bd = b * d;
+
+    UINT64 tmp = (bd >> 32) + (ad & m32) + (bc & m32);
+    tmp += 1U << 31;
+
+    m_f = ac + (ad >> 32) + (bc >> 32) + (tmp >> 32);
+    m_e = m_e + rhs.e() + SIGNIFICAND_LENGTH;
+}
+
+void DiyFp::Multiply(const DiyFp& left, const DiyFp& right, DiyFp& result)
+{
+    result = left;
+    result.Multiply(right);
+}
+
+void DiyFp::GenerateNormalizedDiyFp(double value, DiyFp& result)
+{
+    _ASSERTE(value > 0.0);
+
+    UINT64 f = 0;
+    int e = 0;
+    ExtractFractionAndBiasedExponent(value, &f, &e);
+
+    UINT64 normalizeBit = (UINT64)1 << 52;
+    while ((f & normalizeBit) == 0)
+    {
+        f <<= 1;
+        --e;
+    }
+
+    int lengthDiff = DiyFp::SIGNIFICAND_LENGTH - 53;
+    f <<= lengthDiff;
+    e -= lengthDiff;
+
+    result.SetSignificand(f);
+    result.SetExponent(e);
+}
\ No newline at end of file
diff --git a/src/classlibnative/bcltype/diyfp.h b/src/classlibnative/bcltype/diyfp.h
new file mode 100644
index 0000000000..67dedcdcc7
--- /dev/null
+++ b/src/classlibnative/bcltype/diyfp.h
@@ -0,0 +1,93 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+// File: diyfp.h
+//
+
+//
+
+#ifndef _DIYFP_H
+#define _DIYFP_H
+
+#include <clrtypes.h>
+
+// An exteneded floating-point data structure which is required by Grisu3 algorithm.
+// It defines a 64-bit significand and a 32-bit exponent,
+// which is EXTENDED compare to IEEE double precision floating-point number (53 bits significand and 11 bits exponent).
+//
+// Original Grisu algorithm produces suboptimal results. To shorten the output (which is part of Grisu2/Grisu3's job),
+// we need additional 11 bits of the significand f and larger exponent e (A larger exponent range is used to avoid overflow. A 32-bit exponent is far big enough).
+// To fully understand how Grisu3 uses this data structure to get better result, please read http://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf
+class DiyFp
+{
+public:
+    DiyFp()
+        : m_f(0), m_e()
+    {
+    }
+
+    DiyFp(UINT64 f, int e)
+        : m_f(f), m_e(e)
+    {
+    }
+
+    DiyFp(const DiyFp& rhs)
+        : m_f(rhs.m_f), m_e(rhs.m_e)
+    {
+    }
+
+    DiyFp& operator=(const DiyFp& rhs)
+    {
+        m_f = rhs.m_f;
+        m_e = rhs.m_e;
+
+        return *this;
+    }
+
+    UINT64 f() const
+    {
+        return m_f;
+    }
+
+    int e() const
+    {
+        return m_e;
+    }
+
+    void SetSignificand(UINT64 f)
+    {
+        m_f = f;
+    }
+
+    void SetExponent(int e)
+    {
+        m_e = e;
+    }
+
+    void Minus(const DiyFp& rhs);
+    static void Minus(const DiyFp& left, const DiyFp& right, DiyFp& result);
+
+    void Multiply(const DiyFp& rhs);
+    static void Multiply(const DiyFp& left, const DiyFp& right, DiyFp& result);
+
+    static void GenerateNormalizedDiyFp(double value, DiyFp& result);
+
+public:
+    static const int SIGNIFICAND_LENGTH = 64;
+
+private:
+    // Extended significand.
+    // IEEE 754 double-precision numbers only require 53 bits significand.
+    // However, in Grisu3 we need additional 11 bits so we declare m_f as UINT64.
+    // Please note m_f does not include sign bit.
+    UINT64 m_f;
+
+    // Extended exponent.
+    // IEEE 754 double-precision numbers only require 11 bits exponent.
+    // However, in Grisu3 we need extra space to avoid overflow so we declare m_e as int.
+    // Please note m_e is a biased exponent if the DiyFp instance is generated by GenerateNormalizedDiyFp().
+    int m_e;
+};
+
+#endif
\ No newline at end of file
diff --git a/src/classlibnative/bcltype/fp.h b/src/classlibnative/bcltype/fp.h
new file mode 100644
index 0000000000..5a704db6e6
--- /dev/null
+++ b/src/classlibnative/bcltype/fp.h
@@ -0,0 +1,67 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+// File: fp.h
+//
+
+//
+
+#ifndef _FP_H
+#define _FP_H
+
+#include <clrtypes.h>
+
+struct FPSINGLE {
+    #if BIGENDIAN
+        unsigned int sign: 1;
+        unsigned int exp: 8;
+        unsigned int mant: 23;
+    #else
+        unsigned int mant: 23;
+        unsigned int exp: 8;
+        unsigned int sign: 1;
+    #endif
+};
+    
+struct FPDOUBLE {
+    #if BIGENDIAN
+        unsigned int sign: 1;
+        unsigned int exp: 11;
+        unsigned int mantHi: 20;
+        unsigned int mantLo;
+    #else
+        unsigned int mantLo;
+        unsigned int mantHi: 20;
+        unsigned int exp: 11;
+        unsigned int sign: 1;
+    #endif
+};
+
+static void ExtractFractionAndBiasedExponent(double value, UINT64* f, int* e)
+{
+    if (((FPDOUBLE*)&value)->exp != 0)
+    {
+        // For normalized value, according to https://en.wikipedia.org/wiki/Double-precision_floating-point_format
+        // value = 1.fraction * 2^(exp - 1023)
+        //       = (1 + mantissa / 2^52) * 2^(exp - 1023)
+        //       = (2^52 + mantissa) * 2^(exp - 1023 - 52)
+        //
+        // So f = (2^52 + mantissa), e = exp - 1075;
+        *f = ((UINT64)(((FPDOUBLE*)&value)->mantHi) << 32) | ((FPDOUBLE*)&value)->mantLo + ((UINT64)1 << 52);
+        *e = ((FPDOUBLE*)&value)->exp - 1075;
+    }
+    else
+    {
+        // For denormalized value, according to https://en.wikipedia.org/wiki/Double-precision_floating-point_format
+        // value = 0.fraction * 2^(1 - 1023)
+        //       = (mantissa / 2^52) * 2^(-1022)
+        //       = mantissa * 2^(-1022 - 52)
+        //       = mantissa * 2^(-1074)
+        // So f = mantissa, e = -1074
+        *f = ((UINT64)(((FPDOUBLE*)&value)->mantHi) << 32) | ((FPDOUBLE*)&value)->mantLo;
+        *e = -1074;
+    }
+}
+
+#endif // _FP_H
\ No newline at end of file
diff --git a/src/classlibnative/bcltype/grisu3.cpp b/src/classlibnative/bcltype/grisu3.cpp
new file mode 100644
index 0000000000..0fc9e88928
--- /dev/null
+++ b/src/classlibnative/bcltype/grisu3.cpp
@@ -0,0 +1,381 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+// File: grisu3.cpp
+//
+
+//
+
+#include "grisu3.h"
+#include <check.h>
+#include <math.h>
+
+// 1/lg(10)
+const double Grisu3::D_1_LOG2_10 = 0.30102999566398120;
+
+constexpr UINT32 Grisu3::m_cachedPowerOfTen[CACHED_POWER_OF_TEN_NUM]; 
+constexpr PowerOfTen Grisu3::m_cachedPowers[CACHED_POWER_NUM];
+
+bool Grisu3::Run(double value, int count, int* dec, int* sign, wchar_t* digits)
+{
+    // ========================================================================================================================================
+    // This implementation is based on the paper: http://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf
+    // You must read this paper to fully understand the code.
+    //
+    // Deviation: Instead of generating shortest digits, we generate the digits according to the input count.
+    // Therefore, we do not need m+ and m- which are used to determine the exact range of values.
+    // ======================================================================================================================================== 
+    //
+    // Overview:
+    //
+    // The idea of Grisu3 is to leverage additional bits and cached power of ten to produce the digits.
+    // We need to create a handmade floating point data structure DiyFp to extend the bits of double.
+    // We also need to cache the powers of ten for digits generation. By choosing the correct index of powers
+    // we need to start with, we can eliminate the expensive big num divide operation.
+    //
+    // Grisu3 is imprecision for some numbers. Fortunately, the algorithm itself can determine that and give us
+    // a success/fail flag. We may fall back to other algorithms (For instance, Dragon4) if it fails.
+    //
+    // w: the normalized DiyFp from the input value.
+    // mk: The index of the cached powers.
+    // cmk: The cached power.
+    // D: Product: w * cmk.
+    // kappa: A factor used for generating digits. See step 5 of the Grisu3 procedure in the paper.
+
+    // Handle sign bit.
+    if (value < 0)
+    {
+        value = -value;
+        *sign = 1;
+    }
+    else
+    {
+        *sign = 0;
+    }
+
+    // Step 1: Determine the normalized DiyFp w.
+
+    DiyFp w;
+    DiyFp::GenerateNormalizedDiyFp(value, w);
+
+    // Step 2: Find the cached power of ten.
+
+    // Compute the proper index mk.
+    int mk = KComp(w.e() + DiyFp::SIGNIFICAND_LENGTH);
+
+    // Retrieve the cached power of ten.
+    DiyFp cmk;
+    int decimalExponent;
+    CachedPower(mk, &cmk, &decimalExponent);
+
+    // Step 3: Scale the w with the cached power of ten.
+
+    DiyFp D;
+    DiyFp::Multiply(w, cmk, D);
+
+    // Step 4: Generate digits.
+
+    int kappa;
+    int length;
+    bool isSuccess = DigitGen(D, count, digits, &length, &kappa);
+    if (isSuccess)
+    {
+        digits[count] = 0;
+        *dec = length - decimalExponent + kappa;
+    }
+
+    return isSuccess;
+}
+
+bool Grisu3::RoundWeed(wchar_t* buffer,
+    int len,
+    UINT64 rest,
+    UINT64 tenKappa,
+    UINT64 ulp,
+    int* kappa)
+{
+    _ASSERTE(rest < tenKappa);
+
+    // 1. tenKappa <= ulp: we don't have an idea which way to round.
+    // 2. Even if tenKappa > ulp, but if tenKappa <= 2 * ulp we cannot find the way to round.
+    // Note: to prevent overflow, we need to use tenKappa - ulp <= ulp.
+    if (tenKappa <= ulp || tenKappa - ulp <=  ulp)
+    {
+        return false;
+    }
+
+    // tenKappa >= 2 * (rest + ulp). We should round down.
+    // Note: to prevent overflow, we need to check if tenKappa > 2 * rest as a prerequisite.
+    if ((tenKappa - rest > rest) && (tenKappa - 2 * rest >= 2 * ulp))
+    {
+        return true;
+    }
+
+    // tenKappa <= 2 * (rest - ulp). We should round up.
+    // Note: to prevent overflow, we need to check if rest > ulp as a prerequisite.
+    if ((rest > ulp) && (tenKappa <= (rest - ulp) || (tenKappa - (rest - ulp) <= (rest - ulp))))
+    {
+        // Find all 9s from end to start.
+        buffer[len - 1]++;
+        for (int i = len - 1; i > 0; --i)
+        {
+            if (buffer[i] != L'0' + 10)
+            {
+                // We end up a number less than 9.
+                break;
+            }
+
+            // Current number becomes 0 and add the promotion to the next number.
+            buffer[i] = L'0';
+            buffer[i - 1]++;
+        }
+
+        if (buffer[0] == L'0' + 10)
+        {
+            // First number is '0' + 10 means all numbers are 9.
+            // We simply make the first number to 1 and increase the kappa.
+            buffer[0] = L'1';
+            (*kappa) += 1;
+        }
+
+        return true;
+    }
+
+    return false;
+}
+
+bool Grisu3::DigitGen(const DiyFp& mp, int count, wchar_t* buffer, int* len, int* K)
+{
+    // Split the input mp to two parts. Part 1 is integral. Part 2 can be used to calculate
+    // fractional.
+    //
+    // mp: the input DiyFp scaled by cached power.
+    // K: final kappa.
+    // p1: part 1.
+    // p2: part 2.
+
+    _ASSERTE(count > 0);
+    _ASSERTE(buffer != NULL);
+    _ASSERTE(len != NULL);
+    _ASSERTE(K != NULL);
+    _ASSERTE(mp.e() >= ALPHA && mp.e() <= GAMA);
+
+    UINT64 ulp = 1;
+    DiyFp one = DiyFp(static_cast<UINT64>(1) << -mp.e(), mp.e());
+    UINT32 p1 = static_cast<UINT32>(mp.f() >> -one.e());
+    UINT64 p2 = mp.f() & (one.f() - 1);
+
+    // When p2 (fractional part) is zero, we can predicate if p1 is good to produce the numbers in requested digit count:
+    //
+    // - When requested digit count >= 11, p1 is not be able to exhaust the count as 10^(11 - 1) > UINT32_MAX >= p1.
+    // - When p1 < 10^(count - 1), p1 is not be able to exhaust the count.
+    // - Otherwise, p1 may have chance to exhaust the count.
+    if (p2 == 0 && (count >= 11 || p1 < m_cachedPowerOfTen[count - 1]))
+    {
+        return false;
+    }
+
+    // Note: The code in the paper simply assignes div to TEN9 and kappa to 10 directly.
+    // That means we need to check if any leading zero of the generated
+    // digits during the while loop, which hurts the performance.
+    //
+    // Now if we can estimate what the div and kappa, we do not need to check the leading zeros.
+    // The idea is to find the biggest power of 10 that is less than or equal to the given number.
+    // Then we don't need to worry about the leading zeros and we can get 10% performance gain.
+    int length = 0;
+    int kappa;
+    UINT32 div;
+    BiggestPowerTenLessThanOrEqualTo(p1, DiyFp::SIGNIFICAND_LENGTH - (-one.e()), &div, &kappa);
+    ++kappa;
+
+    // Produce integral.
+    while (kappa > 0)
+    {
+        int d = p1 / div;
+        buffer[length++] = L'0' + d;
+        --count;
+
+        p1 %= div;
+        --kappa;
+
+        if (count == 0)
+        {
+            break;
+        }
+
+        div /= 10;
+    }
+
+    // End up here if we already exhausted the digit count.
+    if (count == 0)
+    {
+        UINT64 rest = (static_cast<UINT64>(p1) << -one.e()) + p2;
+
+        *len = length;
+        *K = kappa;
+
+        return RoundWeed(buffer,
+            length,
+            rest,
+            static_cast<UINT64>(div) << -one.e(),
+            ulp,
+            K);
+    }
+
+    // We have to generate digits from part2 if we have requested digit count left
+    // and part2 is greater than ulp.
+    while (count > 0 && p2 > ulp)
+    {
+        p2 *= 10;
+
+        int d = static_cast<int>(p2 >> -one.e());
+        buffer[length++] = L'0' + d;
+        --count;
+
+        p2 &= one.f() - 1;
+        --kappa;
+
+        ulp *= 10;
+    }
+
+    // If we haven't exhausted the requested digit counts, the Grisu3 algorithm fails.
+    if (count != 0)
+    {
+        return false;
+    }
+
+    *len = length;
+    *K = kappa;
+
+    return RoundWeed(buffer, length, p2, one.f(), ulp, K);
+}
+
+int Grisu3::KComp(int e)
+{
+    return static_cast<int>(ceil((ALPHA - e + DiyFp::SIGNIFICAND_LENGTH - 1) * D_1_LOG2_10));
+}
+
+void Grisu3::CachedPower(int k, DiyFp* cmk, int* decimalExponent)
+{
+    _ASSERTE(cmk != NULL);
+    _ASSERTE(decimalExponent != NULL);
+
+    int index = (POWER_OFFSET + k - 1) / POWER_DECIMAL_EXPONENT_DISTANCE + 1;
+    PowerOfTen cachedPower = m_cachedPowers[index];
+
+    cmk->SetSignificand(cachedPower.significand);
+    cmk->SetExponent(cachedPower.binaryExponent);
+    *decimalExponent = cachedPower.decimalExponent;
+}
+
+// Returns the biggest power of ten that is less than or equal to the given number.
+void Grisu3::BiggestPowerTenLessThanOrEqualTo(UINT32 number,
+                            int bits,
+                            UINT32 *power,
+                            int *exponent)
+{
+    switch (bits)
+    {
+    case 32:
+    case 31:
+    case 30:
+        if (TEN9 <= number)
+        {
+            *power = TEN9;
+            *exponent = 9;
+            break;
+        }
+    case 29:
+    case 28:
+    case 27:
+        if (TEN8 <= number)
+        {
+            *power = TEN8;
+            *exponent = 8;
+            break;
+        }
+    case 26:
+    case 25:
+    case 24:
+        if (TEN7 <= number)
+        {
+            *power = TEN7;
+            *exponent = 7;
+            break;
+        }
+    case 23:
+    case 22:
+    case 21:
+    case 20:
+        if (TEN6 <= number)
+        {
+            *power = TEN6;
+            *exponent = 6;
+            break;
+        }
+    case 19:
+    case 18:
+    case 17:
+        if (TEN5 <= number)
+        {
+            *power = TEN5;
+            *exponent = 5;
+            break;
+        }
+    case 16:
+    case 15:
+    case 14:
+        if (TEN4 <= number)
+        {
+            *power = TEN4;
+            *exponent = 4;
+            break;
+        }
+    case 13:
+    case 12:
+    case 11:
+    case 10:
+        if (1000 <= number)
+        {
+            *power = 1000;
+            *exponent = 3;
+            break;
+        }
+    case 9:
+    case 8:
+    case 7:
+        if (100 <= number)
+        {
+            *power = 100;
+            *exponent = 2;
+            break;
+        }
+    case 6:
+    case 5:
+    case 4:
+        if (10 <= number)
+        {
+            *power = 10;
+            *exponent = 1;
+            break;
+        }
+    case 3:
+    case 2:
+    case 1:
+        if (1 <= number)
+        {
+            *power = 1;
+            *exponent = 0;
+            break;
+        }
+    case 0:
+        *power = 0;
+        *exponent = -1;
+        break;
+    default:
+        *power = 0;
+        *exponent = 0;
+        UNREACHABLE();
+    }
+}
\ No newline at end of file
diff --git a/src/classlibnative/bcltype/grisu3.h b/src/classlibnative/bcltype/grisu3.h
new file mode 100644
index 0000000000..dbbec06212
--- /dev/null
+++ b/src/classlibnative/bcltype/grisu3.h
@@ -0,0 +1,161 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+// File: grisu3.h
+//
+
+//
+
+#ifndef _GRISU3_H
+#define _GRISU3_H
+
+#include "diyfp.h"
+
+struct PowerOfTen
+{
+    UINT64 significand;
+    INT16 binaryExponent;
+    INT16 decimalExponent;
+};
+
+class Grisu3
+{
+public:
+    static bool Run(double value, int count, int* dec, int* sign, wchar_t* digits);
+
+private:
+    Grisu3();
+    Grisu3(const Grisu3&);
+    Grisu3& operator=(const Grisu3&);
+
+    static int KComp(int e);
+    static void CachedPower(int k, DiyFp* cmk, int* decimalExponent);
+    static bool DigitGen(const DiyFp& mp, int count, wchar_t* buffer, int* len, int* k);
+    static bool RoundWeed(wchar_t* buffer, int len, UINT64 rest, UINT64 tenKappa, UINT64 ulp, int* kappa);
+    static void Grisu3::BiggestPowerTenLessThanOrEqualTo(UINT32 number, int bits, UINT32 *power, int *exponent);
+
+    // 1/lg(10)
+    static const double D_1_LOG2_10;
+
+    static const int ALPHA = -59;
+    static const int GAMA = -32;
+
+    static const UINT32 TEN4 = 10000;
+    static const UINT32 TEN5 = 100000;
+    static const UINT32 TEN6 = 1000000;
+    static const UINT32 TEN7 = 10000000;
+    static const UINT32 TEN8 = 100000000;
+    static const UINT32 TEN9 = 1000000000; 
+
+    static const int CACHED_POWER_OF_TEN_NUM = 10;
+    static constexpr UINT32 m_cachedPowerOfTen[CACHED_POWER_OF_TEN_NUM] = {
+        1,              // 10^0
+        10,             // 10^1
+        100,            // 10^2
+        1000,           // 10^3
+        10000,          // 10^4
+        100000,         // 10^5
+        1000000,        // 10^6
+        10000000,       // 10^7
+        100000000,      // 10^8
+        1000000000     // 10^9
+    };
+
+    static const int POWER_DECIMAL_EXPONENT_DISTANCE = 8;
+    static const int POWER_MIN_DECIMAL_EXPONENT = -348;
+    static const int POWER_MAX_DECIMAL_EXPONENT = 340;
+    static const int POWER_OFFSET = -POWER_MIN_DECIMAL_EXPONENT;
+    static const int CACHED_POWER_NUM = 87;
+    static constexpr PowerOfTen m_cachedPowers[CACHED_POWER_NUM] = {
+        { 0xfa8fd5a0081c0288, -1220, POWER_MIN_DECIMAL_EXPONENT },
+        { 0xbaaee17fa23ebf76, -1193, -340 },
+        { 0x8b16fb203055ac76, -1166, -332 },
+        { 0xcf42894a5dce35ea, -1140, -324 },
+        { 0x9a6bb0aa55653b2d, -1113, -316 },
+        { 0xe61acf033d1a45df, -1087, -308 },
+        { 0xab70fe17c79ac6ca, -1060, -300 },
+        { 0xff77b1fcbebcdc4f, -1034, -292 },
+        { 0xbe5691ef416bd60c, -1007, -284 },
+        { 0x8dd01fad907ffc3c, -980, -276 },
+        { 0xd3515c2831559a83, -954, -268 },
+        { 0x9d71ac8fada6c9b5, -927, -260 },
+        { 0xea9c227723ee8bcb, -901, -252 },
+        { 0xaecc49914078536d, -874, -244 },
+        { 0x823c12795db6ce57, -847, -236 },
+        { 0xc21094364dfb5637, -821, -228 },
+        { 0x9096ea6f3848984f, -794, -220 },
+        { 0xd77485cb25823ac7, -768, -212 },
+        { 0xa086cfcd97bf97f4, -741, -204 },
+        { 0xef340a98172aace5, -715, -196 },
+        { 0xb23867fb2a35b28e, -688, -188 },
+        { 0x84c8d4dfd2c63f3b, -661, -180 },
+        { 0xc5dd44271ad3cdba, -635, -172 },
+        { 0x936b9fcebb25c996, -608, -164 },
+        { 0xdbac6c247d62a584, -582, -156 },
+        { 0xa3ab66580d5fdaf6, -555, -148 },
+        { 0xf3e2f893dec3f126, -529, -140 },
+        { 0xb5b5ada8aaff80b8, -502, -132 },
+        { 0x87625f056c7c4a8b, -475, -124 },
+        { 0xc9bcff6034c13053, -449, -116 },
+        { 0x964e858c91ba2655, -422, -108 },
+        { 0xdff9772470297ebd, -396, -100 },
+        { 0xa6dfbd9fb8e5b88f, -369, -92 },
+        { 0xf8a95fcf88747d94, -343, -84 },
+        { 0xb94470938fa89bcf, -316, -76 },
+        { 0x8a08f0f8bf0f156b, -289, -68 },
+        { 0xcdb02555653131b6, -263, -60 },
+        { 0x993fe2c6d07b7fac, -236, -52 },
+        { 0xe45c10c42a2b3b06, -210, -44 },
+        { 0xaa242499697392d3, -183, -36 },
+        { 0xfd87b5f28300ca0e, -157, -28 },
+        { 0xbce5086492111aeb, -130, -20 },
+        { 0x8cbccc096f5088cc, -103, -12 },
+        { 0xd1b71758e219652c, -77, -4 },
+        { 0x9c40000000000000, -50, 4 },
+        { 0xe8d4a51000000000, -24, 12 },
+        { 0xad78ebc5ac620000, 3, 20 },
+        { 0x813f3978f8940984, 30, 28 },
+        { 0xc097ce7bc90715b3, 56, 36 },
+        { 0x8f7e32ce7bea5c70, 83, 44 },
+        { 0xd5d238a4abe98068, 109, 52 },
+        { 0x9f4f2726179a2245, 136, 60 },
+        { 0xed63a231d4c4fb27, 162, 68 },
+        { 0xb0de65388cc8ada8, 189, 76 },
+        { 0x83c7088e1aab65db, 216, 84 },
+        { 0xc45d1df942711d9a, 242, 92 },
+        { 0x924d692ca61be758, 269, 100 },
+        { 0xda01ee641a708dea, 295, 108 },
+        { 0xa26da3999aef774a, 322, 116 },
+        { 0xf209787bb47d6b85, 348, 124 },
+        { 0xb454e4a179dd1877, 375, 132 },
+        { 0x865b86925b9bc5c2, 402, 140 },
+        { 0xc83553c5c8965d3d, 428, 148 },
+        { 0x952ab45cfa97a0b3, 455, 156 },
+        { 0xde469fbd99a05fe3, 481, 164 },
+        { 0xa59bc234db398c25, 508, 172 },
+        { 0xf6c69a72a3989f5c, 534, 180 },
+        { 0xb7dcbf5354e9bece, 561, 188 },
+        { 0x88fcf317f22241e2, 588, 196 },
+        { 0xcc20ce9bd35c78a5, 614, 204 },
+        { 0x98165af37b2153df, 641, 212 },
+        { 0xe2a0b5dc971f303a, 667, 220 },
+        { 0xa8d9d1535ce3b396, 694, 228 },
+        { 0xfb9b7cd9a4a7443c, 720, 236 },
+        { 0xbb764c4ca7a44410, 747, 244 },
+        { 0x8bab8eefb6409c1a, 774, 252 },
+        { 0xd01fef10a657842c, 800, 260 },
+        { 0x9b10a4e5e9913129, 827, 268 },
+        { 0xe7109bfba19c0c9d, 853, 276 },
+        { 0xac2820d9623bf429, 880, 284 },
+        { 0x80444b5e7aa7cf85, 907, 292 },
+        { 0xbf21e44003acdd2d, 933, 300 },
+        { 0x8e679c2f5e44ff8f, 960, 308 },
+        { 0xd433179d9c8cb841, 986, 316 },
+        { 0x9e19db92b4e31ba9, 1013, 324 },
+        { 0xeb96bf6ebadf77d9, 1039, 332 },
+        { 0xaf87023b9bf0ee6b, 1066, POWER_MAX_DECIMAL_EXPONENT }
+    };
+};
+
+#endif
\ No newline at end of file
diff --git a/src/classlibnative/bcltype/number.cpp b/src/classlibnative/bcltype/number.cpp
index eb50e45928..69ee0d6313 100644
--- a/src/classlibnative/bcltype/number.cpp
+++ b/src/classlibnative/bcltype/number.cpp
@@ -13,6 +13,8 @@
 #include "string.h"
 #include "decimal.h"
 #include "bignum.h"
+#include "grisu3.h"
+#include "fp.h"
 #include <stdlib.h>
 
 typedef wchar_t wchar;
@@ -29,32 +31,6 @@ typedef wchar_t wchar;
 #define SCALE_NAN 0x80000000
 #define SCALE_INF 0x7FFFFFFF
 
-struct FPSINGLE {
-#if BIGENDIAN
-    unsigned int sign: 1;
-    unsigned int exp: 8;
-    unsigned int mant: 23;
-#else
-    unsigned int mant: 23;
-    unsigned int exp: 8;
-    unsigned int sign: 1;
-#endif
-};
-
-struct FPDOUBLE {
-#if BIGENDIAN
-    unsigned int sign: 1;
-    unsigned int exp: 11;
-    unsigned int mantHi: 20;
-    unsigned int mantLo;
-#else
-    unsigned int mantLo;
-    unsigned int mantHi: 20;
-    unsigned int exp: 11;
-    unsigned int sign: 1;
-#endif
-};
-
 
 static const char* const posCurrencyFormats[] = {
     "$#", "#$", "$ #", "# $"};
@@ -117,11 +93,7 @@ L2:             dec     ecx
 
 #else // _TARGET_X86_ && !FEATURE_PAL
 
-// Convert a double value to a NUMBER struct.
-// 
-// 1. You should ensure the input value is not infinity or NaN.
-// 2. For 0.0, number->digits will be set as an empty string. i.e the value of the first bucket is 0.
-void DoubleToNumberWorker( double value, int count, int* dec, int* sign, wchar_t* digits )
+void Dragon4( double value, int count, int* dec, int* sign, wchar_t* digits )
 {
     // ========================================================================================================================================
     // This implementation is based on the paper: https://www.cs.indiana.edu/~dyb/pubs/FP-Printing-PLDI96.pdf
@@ -142,52 +114,21 @@ void DoubleToNumberWorker( double value, int count, int* dec, int* sign, wchar_t
     // s: denominator.
     // k: value = d0.d1d2 . . . dn * 10^k
 
-    _ASSERTE(dec != nullptr && sign != nullptr && digits != nullptr);
-
-    // The caller of DoubleToNumberWorker should already checked the Infinity and NAN values.
-    _ASSERTE(((FPDOUBLE*)&value)->exp != 0x7ff);
-
-    // Shortcut for zero.
-    if (value == 0.0)
-    {
-        *dec = 0;
-        *sign = 0;
-
-        // Instead of zeroing digits, we just make it as an empty string due to performance reason.
-        *digits = 0;
-
-        return;
-    } 
-
     // Step 1:
     // Extract meta data from the input double value.
     //
     // Refer to IEEE double precision floating point format.
     UINT64 f = 0;
     int e = 0;
+    ExtractFractionAndBiasedExponent(value, &f, &e);
+
     UINT32 mantissaHighBitIdx = 0;
     if (((FPDOUBLE*)&value)->exp != 0)
     {
-        // For normalized value, according to https://en.wikipedia.org/wiki/Double-precision_floating-point_format
-        // value = 1.fraction * 2^(exp - 1023) 
-        //       = (1 + mantissa / 2^52) * 2^(exp - 1023) 
-        //       = (2^52 + mantissa) * 2^(exp - 1023 - 52)
-        //
-        // So f = (2^52 + mantissa), e = exp - 1075; 
-        f = ((UINT64)(((FPDOUBLE*)&value)->mantHi) << 32) | ((FPDOUBLE*)&value)->mantLo + ((UINT64)1 << 52);
-        e = ((FPDOUBLE*)&value)->exp - 1075;
         mantissaHighBitIdx = 52;
     }
     else
     {
-        // For denormalized value, according to https://en.wikipedia.org/wiki/Double-precision_floating-point_format
-        // value = 0.fraction * 2^(1 - 1023)
-        //       = (mantissa / 2^52) * 2^(-1022)
-        //       = mantissa * 2^(-1022 - 52)
-        //       = mantissa * 2^(-1074)
-        // So f = mantissa, e = -1074
-        f = ((UINT64)(((FPDOUBLE*)&value)->mantHi) << 32) | ((FPDOUBLE*)&value)->mantLo;
-        e = -1074;
         mantissaHighBitIdx = BigNum::LogBase2(f);
     }
 
@@ -400,6 +341,39 @@ void DoubleToNumberWorker( double value, int count, int* dec, int* sign, wchar_t
     *sign = ((FPDOUBLE*)&value)->sign;
 }
 
+// Convert a double value to a NUMBER struct.
+//
+// 1. You should ensure the input value is not infinity or NaN.
+// 2. For 0.0, number->digits will be set as an empty string. i.e the value of the first bucket is 0.
+void DoubleToNumberWorker( double value, int count, int* dec, int* sign, wchar_t* digits )
+{
+    _ASSERTE(dec != nullptr && sign != nullptr && digits != nullptr);
+
+    // The caller of DoubleToNumberWorker should already checked the Infinity and NAN values.
+    _ASSERTE(((FPDOUBLE*)&value)->exp != 0x7ff);
+
+    // Shortcut for zero.
+    if (value == 0.0)
+    {
+        *dec = 0;
+        *sign = 0;
+
+        // Instead of zeroing digits, we just make it as an empty string due to performance reason.
+        *digits = 0;
+
+        return;
+    }
+
+    // Try Grisu3 first.
+    if (Grisu3::Run(value, count, dec, sign, digits))
+    {
+        return;
+    }
+
+    // Grisu3 failed, fall back to Dragon4.
+    Dragon4(value, count, dec, sign, digits);
+}
+
 void DoubleToNumber(double value, int precision, NUMBER* number)
 {
     WRAPPER_NO_CONTRACT
-- 
cgit v1.2.3