new number parsing (#1222)

* Remove our dependency on strtod_l by bundling our own slow path. * Ok. Let us drop strtod entirely. * Trimming down the powers to -342. * Removing useless line. * Many more comments. * Adding some DLL exports. * Let the gods help those who rely on windows+gcc. * Marking the subnormals as unlikely. This is pretty much "performance neutral", but it might help just a bit with twitter.json.
2020-10-10 12:47:49 -04:00 · 2020-10-10 12:47:49 -04:00 · 37e6d1e9c7
parent 1d9926698e
commit 37e6d1e9c7
11 changed files with 1735 additions and 863 deletions
--- a/include/simdjson/common_defs.h
+++ b/include/simdjson/common_defs.h
@ -13,6 +13,12 @@ namespace internal {
 * Defined in src/to_chars
 */
 char *to_chars(char *first, const char *last, double value);
+/**
+ * @private
+ * A number parsing routine.
+ * Defined in src/from_chars
+ */
+double from_chars(const char *first) noexcept;
 }

 #ifndef SIMDJSON_EXCEPTIONS
@ -209,48 +215,4 @@ namespace std {
 #define SIMDJSON_TRY(EXPR) { auto _err = (EXPR); if (_err) { return _err; } }


-/**
- * We may fall back on the system's number parsing, and we want
- * to be able to call a locale-insensitive number parser. It unfortunately
- * means that we need to load up locale headers.
- * The locale.h header is generally available:
- */
-#include <locale.h>
-/**
- * Determining whether we should import xlocale.h or not is 
- * a bit of a nightmare. Visual Studio and recent recent GLIBC (GCC) do not need it. 
- * However, FreeBSD and Apple platforms will need it.
- * And we would want to cover as many platforms as possible.
- */
-#ifdef __has_include
-// This is the easy case: we have __has_include and can check whether
-// xlocale is available. If so, we load it up.
-#if __has_include(<xlocale.h>)
-#include <xlocale.h>
-#endif // __has_include
-#else // We do not have __has_include
-// Here we do not have __has_include
-// We first check for __GLIBC__
-#ifdef __GLIBC__ // If we have __GLIBC__ then we should have features.h which should help.
-// Note that having __GLIBC__ does not imply that we are compiling against glibc. But
-// we hope that any platform that defines __GLIBC__ will mimick glibc.
-#include <features.h>
-// Check whether we have an old GLIBC.
-#if !((__GLIBC__ > 2) || ((__GLIBC__ == 2) && (__GLIBC_MINOR__ > 25)))
-#include <xlocale.h> // Old glibc needs xlocale, otherwise xlocale is unavailable.
-#endif // !((__GLIBC__ > 2) || ((__GLIBC__ == 2) && (__GLIBC_MINOR__ > 25)))
-#else // __GLIBC__
-// Ok. So we do not have __GLIBC__
-// We assume that everything that is not GLIBC and not on old freebsd or windows
-// needs xlocale.
-// It is likely that recent FreeBSD and Apple platforms load xlocale.h next:
-#if !(defined(_WIN32) || (__FreeBSD_version < 1000010))
-#include <xlocale.h> // Will always happen under apple.
-#endif // 
-#endif //  __GLIBC__
-#endif // __has_include
-/**
- * End of the crazy locale headers.
- */
-
 #endif // SIMDJSON_COMMON_DEFS_H
--- a/include/simdjson/generic/numberparsing.h
+++ b/include/simdjson/generic/numberparsing.h
@ -8,12 +8,7 @@ namespace {
 /// @private
 namespace numberparsing {

-using internal::FASTFLOAT_LARGEST_POWER;
-using internal::FASTFLOAT_SMALLEST_POWER;
-using internal::value128;
-using internal::power_of_ten;
-using internal::mantissa_64;
-using internal::mantissa_128;
+

 #ifdef JSON_TEST_NUMBERS
 #define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR)
@ -27,12 +22,25 @@ using internal::mantissa_128;
 #define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE))
 #endif

+namespace {
+// Convert a mantissa, an exponent and a sign bit into an ieee64 double.
+// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable).
+// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. 
+simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) {
+    double d;
+    mantissa &= ~(1ULL << 52);
+    mantissa |= real_exponent << 52;
+    mantissa |= (((uint64_t)negative) << 63);
+    memcpy(&d, &mantissa, sizeof(d));
+    return d;
+}
+}
 // Attempts to compute i * 10^(power) exactly; and if "negative" is
 // true, negate the result.
 // This function will only work in some cases, when it does not work, success is
 // set to false. This should work *most of the time* (like 99% of the time).
-// We assume that power is in the [FASTFLOAT_SMALLEST_POWER,
-// FASTFLOAT_LARGEST_POWER] interval: the caller is responsible for this check.
+// We assume that power is in the [smallest_power,
+// largest_power] interval: the caller is responsible for this check.
 simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) {
  // we start with a fast path
  // It was described in
@ -61,9 +69,9 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg
    // and s / p will produce correctly rounded values.
    //
    if (power < 0) {
-      d = d / power_of_ten[-power];
+      d = d / simdjson::internal::power_of_ten[-power];
    } else {
-      d = d * power_of_ten[power];
+      d = d * simdjson::internal::power_of_ten[power];
    }
    if (negative) {
      d = -d;
@ -97,16 +105,8 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg
    return true;
  }

-  // We are going to need to do some 64-bit arithmetic to get a more precise product.
-  // We use a table lookup approach.
-  // It is safe because
-  // power >= FASTFLOAT_SMALLEST_POWER
-  // and power <= FASTFLOAT_LARGEST_POWER
-  // We recover the mantissa of the power, it has a leading 1. It is always
-  // rounded down.
-  uint64_t factor_mantissa = mantissa_64[power - FASTFLOAT_SMALLEST_POWER];
-  
-  // The exponent is 1024 + 63 + power 
+
+  // The exponent is 1024 + 63 + power
  //     + floor(log(5**power)/log(2)).
  // The 1024 comes from the ieee64 standard.
  // The 63 comes from the fact that we use a 64-bit word.
@ -119,61 +119,89 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg
  // is equal to
  //  floor(log(5**power)/log(2)) + power
  //
-  // The 65536 is (1<<16) and corresponds to 
+  // The 65536 is (1<<16) and corresponds to
  // (65536 * power) >> 16 ---> power
  //
-  // ((152170 * power ) >> 16) is equal to 
-  // floor(log(5**power)/log(2)) 
+  // ((152170 * power ) >> 16) is equal to
+  // floor(log(5**power)/log(2))
  //
-  // Note that this is not magic: 152170/(1<<16) is 
+  // Note that this is not magic: 152170/(1<<16) is
  // approximatively equal to log(5)/log(2).
-  // The 1<<16 value is a power of two; we could use a 
+  // The 1<<16 value is a power of two; we could use a
  // larger power of 2 if we wanted to.
  //
  int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63;
-  
+

  // We want the most significant bit of i to be 1. Shift if needed.
  int lz = leading_zeroes(i);
  i <<= lz;
+
+
+  // We are going to need to do some 64-bit arithmetic to get a precise product.
+  // We use a table lookup approach.
+  // It is safe because
+  // power >= smallest_power
+  // and power <= largest_power
+  // We recover the mantissa of the power, it has a leading 1. It is always
+  // rounded down.
+  //
  // We want the most significant 64 bits of the product. We know
  // this will be non-zero because the most significant bit of i is
  // 1.
-  value128 product = jsoncharutils::full_multiplication(i, factor_mantissa);
-  uint64_t lower = product.low;
-  uint64_t upper = product.high;
+  const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); 
+  // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.)
+  //
+  // The full_multiplication function computes the 128-bit product of two 64-bit words
+  // with a returned value of type value128 with a "low component" corresponding to the
+  // 64-bit least significant bits of the product and with a "high component" corresponding
+  // to the 64-bit most significant bits of the product.
+  simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]);
+  // Both i and power_of_five_128[index] have their most significant bit set to 1 which
+  // implies that the either the most or the second most significant bit of the product 
+  // is 1. We pack values in this manner for efficiency reasons: it maximizes the use
+  // we make of the product. It also makes it easy to reason aboutthe product: there
+  // 0 or 1 leading zero in the product.

-  // We know that upper has at most one leading zero because
-  // both i and  factor_mantissa have a leading one. This means
-  // that the result is at least as large as ((1<<63)*(1<<63))/(1<<64).
-
-  // As long as the first 9 bits of "upper" are not "1", then we
-  // know that we have an exact computed value for the leading
-  // 55 bits because any imprecision would play out as a +1, in
-  // the worst case.
-  if (simdjson_unlikely((upper & 0x1FF) == 0x1FF) && (lower + i < lower)) {
-    uint64_t factor_mantissa_low =
-        mantissa_128[power - FASTFLOAT_SMALLEST_POWER];
-    // next, we compute the 64-bit x 128-bit multiplication, getting a 192-bit
-    // result (three 64-bit values)
-    product = jsoncharutils::full_multiplication(i, factor_mantissa_low);
-    uint64_t product_low = product.low;
-    uint64_t product_middle2 = product.high;
-    uint64_t product_middle1 = lower;
-    uint64_t product_high = upper;
-    uint64_t product_middle = product_middle1 + product_middle2;
-    if (product_middle < product_middle1) {
-      product_high++; // overflow carry
-    }
-    // We want to check whether mantissa *i + i would affect our result.
-    // This does happen, e.g. with 7.3177701707893310e+15.
-    if (((product_middle + 1 == 0) && ((product_high & 0x1FF) == 0x1FF) &&
-         (product_low + i < product_low))) { // let us be prudent and bail out.
+  // Unless the least significant 9 bits of the high (64-bit) part of the full
+  // product are all 1s, then we know that the most significant 55 bits are
+  // exact and no further work is needed. Having 55 bits is necessary because
+  // we need 53 bits for the mantissa but we have to have one rounding bit and
+  // we can waste a bit if the most significant bit of the product is zero.
+  if((firstproduct.high & 0x1FF) == 0x1FF) {
+    // We want to compute i * 5^q, but only care about the top 55 bits at most.
+    // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing
+    // the full computation is wasteful. So we do what is called a "truncated
+    // multiplication".
+    // We take the most significant 64-bits, and we put them in 
+    // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q
+    // to the desired approximation using one multiplication. Sometimes it does not suffice. 
+    // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and
+    // then we get a better approximation to i * 5^q. In very rare cases, even that
+    // will not suffice, though it is seemingly very hard to find such a scenario.
+    // 
+    // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat
+    // more complicated.
+    //
+    // There is an extra layer of complexity in that we need more than 55 bits of 
+    // accuracy in the round-to-even scenario.
+    //
+    // The full_multiplication function computes the 128-bit product of two 64-bit words
+    // with a returned value of type value128 with a "low component" corresponding to the
+    // 64-bit least significant bits of the product and with a "high component" corresponding
+    // to the 64-bit most significant bits of the product.
+    simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]);
+    firstproduct.low += secondproduct.high;
+    if(secondproduct.high > firstproduct.low) { firstproduct.high++; }
+    // At this point, we might need to add at most one to firstproduct, but this
+    // can only change the value of firstproduct.high if firstproduct.low is maximal.
+    if(simdjson_unlikely(firstproduct.low  == 0xFFFFFFFFFFFFFFFF)) {
+      // This is very unlikely, but if so, we need to do much more work!
      return false;
    }
-    upper = product_high;
-    lower = product_middle;
  }
+  uint64_t lower = firstproduct.low;
+  uint64_t upper = firstproduct.high;
  // The final mantissa should be 53 bits with a leading 1.
  // We shift it so that it occupies 54 bits with a leading 1.
  ///////
@ -182,32 +210,56 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg
  lz += int(1 ^ upperbit);

  // Here we have mantissa < (1<<54).
-
+  int64_t real_exponent = exponent - lz;
+  if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal?
+    // Here have that real_exponent <= 0 so -real_exponent >= 0
+    if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure.
+      d = 0.0;
+      return true;
+    } 
+    // next line is safe because -real_exponent + 1 < 0
+    mantissa >>= -real_exponent + 1;
+    // Thankfully, we can't have both "round-to-even" and subnormals because
+    // "round-to-even" only occurs for powers close to 0.
+    mantissa += (mantissa & 1); // round up
+    mantissa >>= 1;
+    // There is a weird scenario where we don't have a subnormal but just.
+    // Suppose we start with 2.2250738585072013e-308, we end up
+    // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal
+    // whereas 0x40000000000000 x 2^-1023-53  is normal. Now, we need to round
+    // up 0x3fffffffffffff x 2^-1023-53  and once we do, we are no longer
+    // subnormal, but we can only know this after rounding.
+    // So we only declare a subnormal if we are smaller than the threshold.    
+    real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1;
+    d = to_double(mantissa, real_exponent, negative);
+    return true;
+  }
  // We have to round to even. The "to even" part
  // is only a problem when we are right in between two floats
  // which we guard against.
  // If we have lots of trailing zeros, we may fall right between two
  // floating-point values.
-  if (simdjson_unlikely((lower == 0) && ((upper & 0x1FF) == 0) &&
-               ((mantissa & 3) == 1))) {
-    // if mantissa & 1 == 1 we might need to round up.
-    //
-    // Scenarios:
-    // 1. We are not in the middle. Then we should round up.
-    //
-    // 2. We are right in the middle. Whether we round up depends
-    // on the last significant bit: if it is "one" then we round
-    // up (round to even) otherwise, we do not.
-    //
-    // So if the last significant bit is 1, we can safely round up.
-    // Hence we only need to bail out if (mantissa & 3) == 1.
-    // Otherwise we may need more accuracy or analysis to determine whether
-    // we are exactly between two floating-point numbers.
-    // It can be triggered with 1e23.
-    // Note: because the factor_mantissa and factor_mantissa_low are
-    // almost always rounded down (except for small positive powers),
-    // almost always should round up.
-    return false;
+  // 
+  // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54]
+  // times a power of two. That is, it is right between a number with binary significand
+  // m and another number with binary significand m+1; and it must be the case
+  // that it cannot be represented by a float itself.
+  //
+  // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p.
+  // Recall that 10^q = 5^q * 2^q.
+  // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that
+  //  5^23 <=  2^54 and it is the last power of five to qualify, so q <= 23.
+  // When q<0, we have  w  >=  (2m+1) x 5^{-q}.  We must have that w<2^{64} so
+  // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have 
+  // 2^{53} x 5^{-q} < 2^{64}.
+  // Hence we have 5^{-q} < 2^{11}$ or q>= -4. 
+  //
+  // We require lower <= 1 and not lower == 0 because we could not prove that 
+  // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test.
+  if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) {
+    if((mantissa  << (upperbit + 64 - 53 - 2)) ==  upper) {
+      mantissa &= ~1;             // flip it so that we do not round up
+    }
  }

  mantissa += mantissa & 1;
@ -219,53 +271,29 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg
    // This will happen when parsing values such as 7.2057594037927933e+16
    ////////
    mantissa = (1ULL << 52);
-    lz--; // undo previous addition
+    real_exponent++;
  }
  mantissa &= ~(1ULL << 52);
-  uint64_t real_exponent = exponent - lz;
  // we have to check that real_exponent is in range, otherwise we bail out
-  if (simdjson_unlikely((real_exponent < 1) || (real_exponent > 2046))) {
+  if (simdjson_unlikely(real_exponent > 2046)) {
+    // We have an infinte value!!! We could actually throw an error here if we could.
    return false;
  }
-  mantissa |= real_exponent << 52;
-  mantissa |= (((uint64_t)negative) << 63);
-  memcpy(&d, &mantissa, sizeof(d));
+  d = to_double(mantissa, real_exponent, negative);
  return true;
 }

-static bool parse_float_strtod(const uint8_t *ptr, double *outDouble) {
-  char *endptr;
-  // We want to call strtod with the C (default) locale to avoid
-  // potential issues in case someone has a different locale.
-  // Unfortunately, Visual Studio has a different syntax.
-#ifdef _WIN32
-  static _locale_t c_locale = _create_locale(LC_ALL, "C");
-  *outDouble = _strtod_l((const char *)ptr, &endptr, c_locale);
-#else
-  static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL);
-  *outDouble = strtod_l((const char *)ptr, &endptr, c_locale);
-#endif
-  // Some libraries will set errno = ERANGE when the value is subnormal,
-  // yet we may want to be able to parse subnormal values.
-  // However, we do not want to tolerate NAN or infinite values.
-  //
-  // Values like infinity or NaN are not allowed in the JSON specification.
-  // If you consume a large value and you map it to "infinity", you will no
-  // longer be able to serialize back a standard-compliant JSON. And there is
-  // no realistic application where you might need values so large than they
-  // can't fit in binary64. The maximal value is about  1.7976931348623157 x
-  // 10^308 It is an unimaginable large number. There will never be any piece of
-  // engineering involving as many as 10^308 parts. It is estimated that there
-  // are about 10^80 atoms in the universe.  The estimate for the total number
-  // of electrons is similar. Using a double-precision floating-point value, we
-  // can represent easily the number of atoms in the universe. We could  also
-  // represent the number of ways you can pick any three individual atoms at
-  // random in the universe. If you ever encounter a number much larger than
-  // 10^308, you know that you have a bug. RapidJSON will reject a document with
-  // a float that does not fit in binary64. JSON for Modern C++ (nlohmann/json)
-  // will flat out throw an exception.
-  //
-  if ((endptr == (const char *)ptr) || (!std::isfinite(*outDouble))) {
+// We call a fallback floating-point parser that might be slow. Note
+// it will accept JSON numbers, but the JSON spec. is more restrictive so
+// before you call parse_float_fallback, you need to have validated the input
+// string with the JSON grammar.
+// It will return an error (false) if the parsed number is infinite.
+// The string parsing itself always succeeds. We know that there is at least
+// one digit.
+static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) {
+  *outDouble = simdjson::internal::from_chars((const char *)ptr);
+  // We do not accept infinite values.
+  if (!std::isfinite(*outDouble)) {
    return false;
  }
  return true;
@ -292,7 +320,7 @@ simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) {
 template<typename W>
 error_code slow_float_parsing(SIMDJSON_UNUSED const uint8_t * src, W writer) {
  double d;
-  if (parse_float_strtod(src, &d)) {
+  if (parse_float_fallback(src, &d)) {
    writer.append_double(d);
    return SUCCESS;
  }
@ -346,14 +374,14 @@ simdjson_really_inline error_code parse_exponent(SIMDJSON_UNUSED const uint8_t *
  auto start_exp = p;
  int64_t exp_number = 0;
  while (parse_digit(*p, exp_number)) { ++p; }
-  // It is possible for parse_digit to overflow. 
+  // It is possible for parse_digit to overflow.
  // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN.
  // Thus we *must* check for possible overflow before we negate exp_number.

  // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into
  // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may
  // not oblige and may, in fact, generate two distinct paths in any case. It might be
-  // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off 
+  // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off
  // instructions for a simdjson_likely branch, an unconclusive gain.

  // If there were no digits, it's an error.
@ -363,7 +391,7 @@ simdjson_really_inline error_code parse_exponent(SIMDJSON_UNUSED const uint8_t *
  // We have a valid positive exponent in exp_number at this point, except that
  // it may have overflowed.

-  // If there were more than 18 digits, we may have overflowed the integer. We have to do 
+  // If there were more than 18 digits, we may have overflowed the integer. We have to do
  // something!!!!
  if (simdjson_unlikely(p > start_exp+18)) {
    // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow
@ -375,12 +403,12 @@ simdjson_really_inline error_code parse_exponent(SIMDJSON_UNUSED const uint8_t *
    // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before
    // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could
    // truncate at 324.
-    // Note that there is no reason to fail per se at this point in time. 
+    // Note that there is no reason to fail per se at this point in time.
    // E.g., 0e999999999999999999999 is a fine number.
    if (p > start_exp+18) { exp_number = 999999999999999999; }
  }
  // At this point, we know that exp_number is a sane, positive, signed integer.
-  // It is <= 999,999,999,999,999,999. As long as 'exponent' is in 
+  // It is <= 999,999,999,999,999,999. As long as 'exponent' is in
  // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent'
  // is bounded in magnitude by the size of the JSON input, we are fine in this universe.
  // To sum it up: the next line should never overflow.
@ -404,10 +432,11 @@ simdjson_really_inline error_code write_float(const uint8_t *const src, bool neg
  // If we frequently had to deal with long strings of digits,
  // we could extend our code by using a 128-bit integer instead
  // of a 64-bit integer. However, this is uncommon in practice.
-  // digit count is off by 1 because of the decimal (assuming there was one).
  //
  // 9999999999999999999 < 2**64 so we can accomodate 19 digits.
-  if (simdjson_unlikely(digit_count-1 > 19 && significant_digits(start_digits, digit_count) > 19)) {
+  // If we have a decimal separator, then digit_count - 1 is the number of digits, but we
+  // may not have a decimal separator!
+  if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) {
    // Ok, chances are good that we had an overflow!
    // this is almost never going to get called!!!
    // we start anew, going slowly!!!
@ -427,22 +456,25 @@ simdjson_really_inline error_code write_float(const uint8_t *const src, bool neg
  // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other
  // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331
  // To future reader: we'd love if someone found a better way, or at least could explain this result!
-  if (simdjson_unlikely(exponent < FASTFLOAT_SMALLEST_POWER) || (exponent > FASTFLOAT_LARGEST_POWER)) {
-    // this is almost never going to get called!!!
-    // we start anew, going slowly!!!
-    // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens
-    // because slow_float_parsing is a non-inlined function. If we passed our writer reference to
-    // it, it would force it to be stored in memory, preventing the compiler from picking it apart
-    // and putting into registers. i.e. if we pass it as reference, it gets slow.
-    // This is what forces the skip_double, as well.
-    error_code error = slow_float_parsing(src, writer);
-    writer.skip_double();
-    return error;
+  if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) {
+    //
+    // Important: smallest_power is such that it leads to a zero value.
+    // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero
+    // so something x 10^-343 goes to zero, but not so with  something x 10^-342.
+    static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough");
+    // 
+    if((exponent < simdjson::internal::smallest_power) || (i == 0)) {
+      WRITE_DOUBLE(0, src, writer);
+      return SUCCESS;
+    } else { // (exponent > largest_power) and (i != 0)
+      // We have, for sure, an infinite value and simdjson refuses to parse infinite values.
+      return INVALID_NUMBER(src);
+    }
  }
  double d;
  if (!compute_float_64(exponent, i, negative, d)) {
    // we are almost never going to get here.
-    if (!parse_float_strtod(src, &d)) { return INVALID_NUMBER(src); }
+    if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); }
  }
  WRITE_DOUBLE(d, src, writer);
  return SUCCESS;
@ -757,7 +789,7 @@ SIMDJSON_UNUSED simdjson_really_inline simdjson_result<double> parse_double(cons
    if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }

    exponent += exp_neg ? 0-exp : exp;
-    overflow = overflow || exponent < FASTFLOAT_SMALLEST_POWER || exponent > FASTFLOAT_LARGEST_POWER;
+    overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
  }

  if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; }
@ -769,7 +801,7 @@ SIMDJSON_UNUSED simdjson_really_inline simdjson_result<double> parse_double(cons
  if (simdjson_likely(!overflow)) {
    if (compute_float_64(exponent, i, negative, d)) { return d; }
  }
-  if (!parse_float_strtod(src-negative, &d)) {
+  if (!parse_float_fallback(src-negative, &d)) {
    return NUMBER_ERROR;
  }
  return d;
--- a/include/simdjson/internal/jsoncharutils_tables.h
+++ b/include/simdjson/internal/jsoncharutils_tables.h
@ -11,15 +11,6 @@ void found_bad_string(const uint8_t *buf);

 namespace simdjson {
 namespace internal {
-
-constexpr int FASTFLOAT_SMALLEST_POWER = -325;
-constexpr int FASTFLOAT_LARGEST_POWER = 308;
-
-struct value128 {
-  uint64_t low;
-  uint64_t high;
-};
-
 // structural chars here are
 // they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c (and NULL)
 // we are also interested in the four whitespace characters
--- a/include/simdjson/internal/numberparsing_tables.h
+++ b/include/simdjson/internal/numberparsing_tables.h
@ -5,22 +5,54 @@

 namespace simdjson {
 namespace internal {
+/**
+ * The smallest non-zero float (binary64) is 2^−1074.
+ * We take as input numbers of the form w x 10^q where w < 2^64.
+ * We have that w * 10^-343  <  2^(64-344) 5^-343 < 2^-1076.
+ * However, we have that 
+ * (2^64-1) * 10^-342 =  (2^64-1) * 2^-342 * 5^-342 > 2^−1074.
+ * Thus it is possible for a number of the form w * 10^-342 where 
+ * w is a 64-bit value to be a non-zero floating-point number.
+ *********
+ * Any number of form w * 10^309 where w>= 1 is going to be 
+ * infinite in binary64 so we never need to worry about powers
+ * of 5 greater than 308.
+ */
+constexpr int smallest_power = -342;
+constexpr int largest_power = 308;
+
+/**
+ * Represents a 128-bit value.
+ * low: least significant 64 bits.
+ * high: most significant 64 bits.
+ */
+struct value128 {
+  uint64_t low;
+  uint64_t high;
+};
+

 // Precomputed powers of ten from 10^0 to 10^22. These
 // can be represented exactly using the double type.
 extern SIMDJSON_DLLIMPORTEXPORT const double power_of_ten[];
-// The mantissas of powers of ten from -308 to 308, extended out to sixty four
-// bits. The array contains the powers of ten approximated
-// as a 64-bit mantissa. It goes from 10^FASTFLOAT_SMALLEST_POWER to 
-// 10^FASTFLOAT_LARGEST_POWER (inclusively). 
-// The mantissa is truncated, and
-// never rounded up. Uses about 5KB.
-extern SIMDJSON_DLLIMPORTEXPORT const uint64_t mantissa_64[];
-// A complement to mantissa_64
-// complete to a 128-bit mantissa.
-// Uses about 5KB but is rarely accessed.
-extern SIMDJSON_DLLIMPORTEXPORT const uint64_t mantissa_128[];

+
+/**
+ * When mapping numbers from decimal to binary,
+ * we go from w * 10^q to m * 2^p but we have
+ * 10^q = 5^q * 2^q, so effectively
+ * we are trying to match
+ * w * 2^q * 5^q to m * 2^p. Thus the powers of two
+ * are not a concern since they can be represented
+ * exactly using the binary notation, only the powers of five
+ * affect the binary significand.
+ */ 
+
+
+// The truncated powers of five from 5^-342 all the way to 5^308
+// The mantissa is truncated to 128 bits, and
+// never rounded up. Uses about 5KB.
+extern SIMDJSON_DLLIMPORTEXPORT const uint64_t power_of_five_128[];
 } // namespace internal
 } // namespace simdjson

--- a/src/from_chars.cpp
+++ b/src/from_chars.cpp
@ -0,0 +1,486 @@
+#include <cmath>
+#include <limits>
+namespace simdjson {
+namespace internal {
+
+/**
+ * The code in the internal::from_chars function is meant to handle the floating-point number parsing 
+ * when we have more than 19 digits in the decimal mantissa. This should only be seen
+ * in adversarial scenarios: we do not expect production systems to even produce
+ * such floating-point numbers.
+ *
+ * The parser is based on work by Nigel Tao (at https://github.com/google/wuffs/)
+ * who credits Ken Thompson for the design (via a reference to the Go source
+ * code). See
+ * https://github.com/google/wuffs/blob/aa46859ea40c72516deffa1b146121952d6dfd3b/internal/cgen/base/floatconv-submodule-data.c
+ * https://github.com/google/wuffs/blob/46cd8105f47ca07ae2ba8e6a7818ef9c0df6c152/internal/cgen/base/floatconv-submodule-code.c
+ * It is probably not very fast but it is a fallback that should almost never be
+ * called in real life. Google Wuffs is published under APL 2.0.
+ **/
+
+namespace {
+constexpr uint32_t max_digits = 768;
+constexpr int32_t decimal_point_range = 2047;
+} // namespace
+
+struct adjusted_mantissa {
+  uint64_t mantissa;
+  int power2;
+  adjusted_mantissa() : mantissa(0), power2(0) {}
+};
+
+struct decimal {
+  uint32_t num_digits;
+  int32_t decimal_point;
+  bool negative;
+  bool truncated;
+  uint8_t digits[max_digits];
+};
+
+template <typename T> struct binary_format {
+  static constexpr int mantissa_explicit_bits();
+  static constexpr int minimum_exponent();
+  static constexpr int infinite_power();
+  static constexpr int sign_index();
+};
+
+template <> constexpr int binary_format<double>::mantissa_explicit_bits() {
+  return 52;
+}
+
+template <> constexpr int binary_format<double>::minimum_exponent() {
+  return -1023;
+}
+template <> constexpr int binary_format<double>::infinite_power() {
+  return 0x7FF;
+}
+
+template <> constexpr int binary_format<double>::sign_index() { return 63; }
+
+bool is_integer(char c)  noexcept  { return (c >= '0' && c <= '9'); }
+
+// This should always succeed since it follows a call to parse_number.
+decimal parse_decimal(const char *&p) noexcept {
+  decimal answer;
+  answer.num_digits = 0;
+  answer.decimal_point = 0;
+  answer.negative = false;
+  answer.truncated = false;
+  answer.negative = (*p == '-');
+  if ((*p == '-') || (*p == '+')) {
+    ++p;
+  }
+
+  while (*p == '0') {
+    ++p;
+  }
+  while (is_integer(*p)) {
+    if (answer.num_digits + 1 < max_digits) {
+      answer.digits[answer.num_digits++] = uint8_t(*p - '0');
+    } else {
+      answer.truncated = true;
+    }
+    ++p;
+  }
+  const char *first_after_period{};
+  if (*p == '.') {
+    ++p;
+    first_after_period = p;
+    // if we have not yet encountered a zero, we have to skip it as well
+    if (answer.num_digits == 0) {
+      // skip zeros
+      while (*p == '0') {
+        ++p;
+      }
+    }
+    while (is_integer(*p)) {
+      if (answer.num_digits + 1 < max_digits) {
+        answer.digits[answer.num_digits++] = uint8_t(*p - '0');
+      } else {
+        answer.truncated = true;
+      }
+      ++p;
+    }
+    answer.decimal_point = int32_t(first_after_period - p);
+  }
+
+  if (('e' == *p) || ('E' == *p)) {
+    ++p;
+    bool neg_exp = false;
+    if ('-' == *p) {
+      neg_exp = true;
+      ++p;
+    } else if ('+' == *p) {
+      ++p;
+    }
+    int32_t exp_number = 0; // exponential part
+    while (is_integer(*p)) {
+      uint8_t digit = uint8_t(*p - '0');
+      if (exp_number < 0x10000) {
+        exp_number = 10 * exp_number + digit;
+      }
+      ++p;
+    }
+    answer.decimal_point += (neg_exp ? -exp_number : exp_number);
+  }
+  answer.decimal_point += answer.num_digits;
+  return answer;
+}
+
+namespace {
+
+// remove all final zeroes
+inline void trim(decimal &h) {
+  while ((h.num_digits > 0) && (h.digits[h.num_digits - 1] == 0)) {
+    h.num_digits--;
+  }
+}
+
+uint32_t number_of_digits_decimal_left_shift(decimal &h, uint32_t shift) {
+  shift &= 63;
+  const static uint16_t number_of_digits_decimal_left_shift_table[65] = {
+      0x0000, 0x0800, 0x0801, 0x0803, 0x1006, 0x1009, 0x100D, 0x1812, 0x1817,
+      0x181D, 0x2024, 0x202B, 0x2033, 0x203C, 0x2846, 0x2850, 0x285B, 0x3067,
+      0x3073, 0x3080, 0x388E, 0x389C, 0x38AB, 0x38BB, 0x40CC, 0x40DD, 0x40EF,
+      0x4902, 0x4915, 0x4929, 0x513E, 0x5153, 0x5169, 0x5180, 0x5998, 0x59B0,
+      0x59C9, 0x61E3, 0x61FD, 0x6218, 0x6A34, 0x6A50, 0x6A6D, 0x6A8B, 0x72AA,
+      0x72C9, 0x72E9, 0x7B0A, 0x7B2B, 0x7B4D, 0x8370, 0x8393, 0x83B7, 0x83DC,
+      0x8C02, 0x8C28, 0x8C4F, 0x9477, 0x949F, 0x94C8, 0x9CF2, 0x051C, 0x051C,
+      0x051C, 0x051C,
+  };
+  uint32_t x_a = number_of_digits_decimal_left_shift_table[shift];
+  uint32_t x_b = number_of_digits_decimal_left_shift_table[shift + 1];
+  uint32_t num_new_digits = x_a >> 11;
+  uint32_t pow5_a = 0x7FF & x_a;
+  uint32_t pow5_b = 0x7FF & x_b;
+  const static uint8_t
+      number_of_digits_decimal_left_shift_table_powers_of_5[0x051C] = {
+          5, 2, 5, 1, 2, 5, 6, 2, 5, 3, 1, 2, 5, 1, 5, 6, 2, 5, 7, 8, 1, 2, 5,
+          3, 9, 0, 6, 2, 5, 1, 9, 5, 3, 1, 2, 5, 9, 7, 6, 5, 6, 2, 5, 4, 8, 8,
+          2, 8, 1, 2, 5, 2, 4, 4, 1, 4, 0, 6, 2, 5, 1, 2, 2, 0, 7, 0, 3, 1, 2,
+          5, 6, 1, 0, 3, 5, 1, 5, 6, 2, 5, 3, 0, 5, 1, 7, 5, 7, 8, 1, 2, 5, 1,
+          5, 2, 5, 8, 7, 8, 9, 0, 6, 2, 5, 7, 6, 2, 9, 3, 9, 4, 5, 3, 1, 2, 5,
+          3, 8, 1, 4, 6, 9, 7, 2, 6, 5, 6, 2, 5, 1, 9, 0, 7, 3, 4, 8, 6, 3, 2,
+          8, 1, 2, 5, 9, 5, 3, 6, 7, 4, 3, 1, 6, 4, 0, 6, 2, 5, 4, 7, 6, 8, 3,
+          7, 1, 5, 8, 2, 0, 3, 1, 2, 5, 2, 3, 8, 4, 1, 8, 5, 7, 9, 1, 0, 1, 5,
+          6, 2, 5, 1, 1, 9, 2, 0, 9, 2, 8, 9, 5, 5, 0, 7, 8, 1, 2, 5, 5, 9, 6,
+          0, 4, 6, 4, 4, 7, 7, 5, 3, 9, 0, 6, 2, 5, 2, 9, 8, 0, 2, 3, 2, 2, 3,
+          8, 7, 6, 9, 5, 3, 1, 2, 5, 1, 4, 9, 0, 1, 1, 6, 1, 1, 9, 3, 8, 4, 7,
+          6, 5, 6, 2, 5, 7, 4, 5, 0, 5, 8, 0, 5, 9, 6, 9, 2, 3, 8, 2, 8, 1, 2,
+          5, 3, 7, 2, 5, 2, 9, 0, 2, 9, 8, 4, 6, 1, 9, 1, 4, 0, 6, 2, 5, 1, 8,
+          6, 2, 6, 4, 5, 1, 4, 9, 2, 3, 0, 9, 5, 7, 0, 3, 1, 2, 5, 9, 3, 1, 3,
+          2, 2, 5, 7, 4, 6, 1, 5, 4, 7, 8, 5, 1, 5, 6, 2, 5, 4, 6, 5, 6, 6, 1,
+          2, 8, 7, 3, 0, 7, 7, 3, 9, 2, 5, 7, 8, 1, 2, 5, 2, 3, 2, 8, 3, 0, 6,
+          4, 3, 6, 5, 3, 8, 6, 9, 6, 2, 8, 9, 0, 6, 2, 5, 1, 1, 6, 4, 1, 5, 3,
+          2, 1, 8, 2, 6, 9, 3, 4, 8, 1, 4, 4, 5, 3, 1, 2, 5, 5, 8, 2, 0, 7, 6,
+          6, 0, 9, 1, 3, 4, 6, 7, 4, 0, 7, 2, 2, 6, 5, 6, 2, 5, 2, 9, 1, 0, 3,
+          8, 3, 0, 4, 5, 6, 7, 3, 3, 7, 0, 3, 6, 1, 3, 2, 8, 1, 2, 5, 1, 4, 5,
+          5, 1, 9, 1, 5, 2, 2, 8, 3, 6, 6, 8, 5, 1, 8, 0, 6, 6, 4, 0, 6, 2, 5,
+          7, 2, 7, 5, 9, 5, 7, 6, 1, 4, 1, 8, 3, 4, 2, 5, 9, 0, 3, 3, 2, 0, 3,
+          1, 2, 5, 3, 6, 3, 7, 9, 7, 8, 8, 0, 7, 0, 9, 1, 7, 1, 2, 9, 5, 1, 6,
+          6, 0, 1, 5, 6, 2, 5, 1, 8, 1, 8, 9, 8, 9, 4, 0, 3, 5, 4, 5, 8, 5, 6,
+          4, 7, 5, 8, 3, 0, 0, 7, 8, 1, 2, 5, 9, 0, 9, 4, 9, 4, 7, 0, 1, 7, 7,
+          2, 9, 2, 8, 2, 3, 7, 9, 1, 5, 0, 3, 9, 0, 6, 2, 5, 4, 5, 4, 7, 4, 7,
+          3, 5, 0, 8, 8, 6, 4, 6, 4, 1, 1, 8, 9, 5, 7, 5, 1, 9, 5, 3, 1, 2, 5,
+          2, 2, 7, 3, 7, 3, 6, 7, 5, 4, 4, 3, 2, 3, 2, 0, 5, 9, 4, 7, 8, 7, 5,
+          9, 7, 6, 5, 6, 2, 5, 1, 1, 3, 6, 8, 6, 8, 3, 7, 7, 2, 1, 6, 1, 6, 0,
+          2, 9, 7, 3, 9, 3, 7, 9, 8, 8, 2, 8, 1, 2, 5, 5, 6, 8, 4, 3, 4, 1, 8,
+          8, 6, 0, 8, 0, 8, 0, 1, 4, 8, 6, 9, 6, 8, 9, 9, 4, 1, 4, 0, 6, 2, 5,
+          2, 8, 4, 2, 1, 7, 0, 9, 4, 3, 0, 4, 0, 4, 0, 0, 7, 4, 3, 4, 8, 4, 4,
+          9, 7, 0, 7, 0, 3, 1, 2, 5, 1, 4, 2, 1, 0, 8, 5, 4, 7, 1, 5, 2, 0, 2,
+          0, 0, 3, 7, 1, 7, 4, 2, 2, 4, 8, 5, 3, 5, 1, 5, 6, 2, 5, 7, 1, 0, 5,
+          4, 2, 7, 3, 5, 7, 6, 0, 1, 0, 0, 1, 8, 5, 8, 7, 1, 1, 2, 4, 2, 6, 7,
+          5, 7, 8, 1, 2, 5, 3, 5, 5, 2, 7, 1, 3, 6, 7, 8, 8, 0, 0, 5, 0, 0, 9,
+          2, 9, 3, 5, 5, 6, 2, 1, 3, 3, 7, 8, 9, 0, 6, 2, 5, 1, 7, 7, 6, 3, 5,
+          6, 8, 3, 9, 4, 0, 0, 2, 5, 0, 4, 6, 4, 6, 7, 7, 8, 1, 0, 6, 6, 8, 9,
+          4, 5, 3, 1, 2, 5, 8, 8, 8, 1, 7, 8, 4, 1, 9, 7, 0, 0, 1, 2, 5, 2, 3,
+          2, 3, 3, 8, 9, 0, 5, 3, 3, 4, 4, 7, 2, 6, 5, 6, 2, 5, 4, 4, 4, 0, 8,
+          9, 2, 0, 9, 8, 5, 0, 0, 6, 2, 6, 1, 6, 1, 6, 9, 4, 5, 2, 6, 6, 7, 2,
+          3, 6, 3, 2, 8, 1, 2, 5, 2, 2, 2, 0, 4, 4, 6, 0, 4, 9, 2, 5, 0, 3, 1,
+          3, 0, 8, 0, 8, 4, 7, 2, 6, 3, 3, 3, 6, 1, 8, 1, 6, 4, 0, 6, 2, 5, 1,
+          1, 1, 0, 2, 2, 3, 0, 2, 4, 6, 2, 5, 1, 5, 6, 5, 4, 0, 4, 2, 3, 6, 3,
+          1, 6, 6, 8, 0, 9, 0, 8, 2, 0, 3, 1, 2, 5, 5, 5, 5, 1, 1, 1, 5, 1, 2,
+          3, 1, 2, 5, 7, 8, 2, 7, 0, 2, 1, 1, 8, 1, 5, 8, 3, 4, 0, 4, 5, 4, 1,
+          0, 1, 5, 6, 2, 5, 2, 7, 7, 5, 5, 5, 7, 5, 6, 1, 5, 6, 2, 8, 9, 1, 3,
+          5, 1, 0, 5, 9, 0, 7, 9, 1, 7, 0, 2, 2, 7, 0, 5, 0, 7, 8, 1, 2, 5, 1,
+          3, 8, 7, 7, 7, 8, 7, 8, 0, 7, 8, 1, 4, 4, 5, 6, 7, 5, 5, 2, 9, 5, 3,
+          9, 5, 8, 5, 1, 1, 3, 5, 2, 5, 3, 9, 0, 6, 2, 5, 6, 9, 3, 8, 8, 9, 3,
+          9, 0, 3, 9, 0, 7, 2, 2, 8, 3, 7, 7, 6, 4, 7, 6, 9, 7, 9, 2, 5, 5, 6,
+          7, 6, 2, 6, 9, 5, 3, 1, 2, 5, 3, 4, 6, 9, 4, 4, 6, 9, 5, 1, 9, 5, 3,
+          6, 1, 4, 1, 8, 8, 8, 2, 3, 8, 4, 8, 9, 6, 2, 7, 8, 3, 8, 1, 3, 4, 7,
+          6, 5, 6, 2, 5, 1, 7, 3, 4, 7, 2, 3, 4, 7, 5, 9, 7, 6, 8, 0, 7, 0, 9,
+          4, 4, 1, 1, 9, 2, 4, 4, 8, 1, 3, 9, 1, 9, 0, 6, 7, 3, 8, 2, 8, 1, 2,
+          5, 8, 6, 7, 3, 6, 1, 7, 3, 7, 9, 8, 8, 4, 0, 3, 5, 4, 7, 2, 0, 5, 9,
+          6, 2, 2, 4, 0, 6, 9, 5, 9, 5, 3, 3, 6, 9, 1, 4, 0, 6, 2, 5,
+      };
+  const uint8_t *pow5 =
+      &number_of_digits_decimal_left_shift_table_powers_of_5[pow5_a];
+  uint32_t i = 0;
+  uint32_t n = pow5_b - pow5_a;
+  for (; i < n; i++) {
+    if (i >= h.num_digits) {
+      return num_new_digits - 1;
+    } else if (h.digits[i] == pow5[i]) {
+      continue;
+    } else if (h.digits[i] < pow5[i]) {
+      return num_new_digits - 1;
+    } else {
+      return num_new_digits;
+    }
+  }
+  return num_new_digits;
+}
+
+} // end of anonymous namespace
+
+uint64_t round(decimal &h) {
+  if ((h.num_digits == 0) || (h.decimal_point < 0)) {
+    return 0;
+  } else if (h.decimal_point > 18) {
+    return UINT64_MAX;
+  }
+  // at this point, we know that h.decimal_point >= 0
+  uint32_t dp = uint32_t(h.decimal_point);
+  uint64_t n = 0;
+  for (uint32_t i = 0; i < dp; i++) {
+    n = (10 * n) + ((i < h.num_digits) ? h.digits[i] : 0);
+  }
+  bool round_up = false;
+  if (dp < h.num_digits) {
+    round_up = h.digits[dp] >= 5; // normally, we round up
+    // but we may need to round to even!
+    if ((h.digits[dp] == 5) && (dp + 1 == h.num_digits)) {
+      round_up = h.truncated || ((dp > 0) && (1 & h.digits[dp - 1]));
+    }
+  }
+  if (round_up) {
+    n++;
+  }
+  return n;
+}
+
+// computes h * 2^-shift
+void decimal_left_shift(decimal &h, uint32_t shift) {
+  if (h.num_digits == 0) {
+    return;
+  }
+  uint32_t num_new_digits = number_of_digits_decimal_left_shift(h, shift);
+  int32_t read_index = int32_t(h.num_digits - 1);
+  uint32_t write_index = h.num_digits - 1 + num_new_digits;
+  uint64_t n = 0;
+
+  while (read_index >= 0) {
+    n += uint64_t(h.digits[read_index]) << shift;
+    uint64_t quotient = n / 10;
+    uint64_t remainder = n - (10 * quotient);
+    if (write_index < max_digits) {
+      h.digits[write_index] = uint8_t(remainder);
+    } else if (remainder > 0) {
+      h.truncated = true;
+    }
+    n = quotient;
+    write_index--;
+    read_index--;
+  }
+  while (n > 0) {
+    uint64_t quotient = n / 10;
+    uint64_t remainder = n - (10 * quotient);
+    if (write_index < max_digits) {
+      h.digits[write_index] = uint8_t(remainder);
+    } else if (remainder > 0) {
+      h.truncated = true;
+    }
+    n = quotient;
+    write_index--;
+  }
+  h.num_digits += num_new_digits;
+  if (h.num_digits > max_digits) {
+    h.num_digits = max_digits;
+  }
+  h.decimal_point += int32_t(num_new_digits);
+  trim(h);
+}
+
+// computes h * 2^shift
+void decimal_right_shift(decimal &h, uint32_t shift) {
+  uint32_t read_index = 0;
+  uint32_t write_index = 0;
+
+  uint64_t n = 0;
+
+  while ((n >> shift) == 0) {
+    if (read_index < h.num_digits) {
+      n = (10 * n) + h.digits[read_index++];
+    } else if (n == 0) {
+      return;
+    } else {
+      while ((n >> shift) == 0) {
+        n = 10 * n;
+        read_index++;
+      }
+      break;
+    }
+  }
+  h.decimal_point -= int32_t(read_index - 1);
+  if (h.decimal_point < -decimal_point_range) { // it is zero
+    h.num_digits = 0;
+    h.decimal_point = 0;
+    h.negative = false;
+    h.truncated = false;
+    return;
+  }
+  uint64_t mask = (uint64_t(1) << shift) - 1;
+  while (read_index < h.num_digits) {
+    uint8_t new_digit = uint8_t(n >> shift);
+    n = (10 * (n & mask)) + h.digits[read_index++];
+    h.digits[write_index++] = new_digit;
+  }
+  while (n > 0) {
+    uint8_t new_digit = uint8_t(n >> shift);
+    n = 10 * (n & mask);
+    if (write_index < max_digits) {
+      h.digits[write_index++] = new_digit;
+    } else if (new_digit > 0) {
+      h.truncated = true;
+    }
+  }
+  h.num_digits = write_index;
+  trim(h);
+}
+
+template <typename binary> adjusted_mantissa compute_float(decimal &d) {
+  adjusted_mantissa answer;
+  if (d.num_digits == 0) {
+    // should be zero
+    answer.power2 = 0;
+    answer.mantissa = 0;
+    return answer;
+  }
+  // At this point, going further, we can assume that d.num_digits > 0.
+  // We want to guard against excessive decimal point values because
+  // they can result in long running times. Indeed, we do 
+  // shifts by at most 60 bits. We have that log(10**400)/log(2**60) ~= 22
+  // which is fine, but log(10**299995)/log(2**60) ~= 16609 which is not
+  // fine (runs for a long time).
+  //
+  if(d.decimal_point < -324) {
+    // We have something smaller than 1e-324 which is always zero
+    // in binary64 and binary32. 
+    // It should be zero.
+    answer.power2 = 0;
+    answer.mantissa = 0;
+    return answer;
+  } else if(d.decimal_point >= 310) {
+    // We have something at least as large as 0.1e310 which is
+    // always infinite.    
+    answer.power2 = binary::infinite_power();
+    answer.mantissa = 0;
+    return answer;
+  }
+
+  static const uint32_t max_shift = 60;
+  static const uint32_t num_powers = 19;
+  static const uint8_t powers[19] = {
+      0,  3,  6,  9,  13, 16, 19, 23, 26, 29, //
+      33, 36, 39, 43, 46, 49, 53, 56, 59,     //
+  };
+  int32_t exp2 = 0;
+  while (d.decimal_point > 0) {
+    uint32_t n = uint32_t(d.decimal_point);
+    uint32_t shift = (n < num_powers) ? powers[n] : max_shift;
+    decimal_right_shift(d, shift);
+    if (d.decimal_point < -decimal_point_range) {
+      // should be zero
+      answer.power2 = 0;
+      answer.mantissa = 0;
+      return answer;
+    }
+    exp2 += int32_t(shift);
+  }
+  // We shift left toward [1/2 ... 1].
+  while (d.decimal_point <= 0) {
+    uint32_t shift;
+    if (d.decimal_point == 0) {
+      if (d.digits[0] >= 5) {
+        break;
+      }
+      shift = (d.digits[0] < 2) ? 2 : 1;
+    } else {
+      uint32_t n = uint32_t(-d.decimal_point);
+      shift = (n < num_powers) ? powers[n] : max_shift;
+    }
+    decimal_left_shift(d, shift);
+    if (d.decimal_point > decimal_point_range) {
+      // we want to get infinity:
+      answer.power2 = 0xFF;
+      answer.mantissa = 0;
+      return answer;
+    }
+    exp2 -= int32_t(shift);
+  }
+  // We are now in the range [1/2 ... 1] but the binary format uses [1 ... 2].
+  exp2--;
+  constexpr int32_t minimum_exponent = binary::minimum_exponent();
+  while ((minimum_exponent + 1) > exp2) {
+    uint32_t n = uint32_t((minimum_exponent + 1) - exp2);
+    if (n > max_shift) {
+      n = max_shift;
+    }
+    decimal_right_shift(d, n);
+    exp2 += int32_t(n);
+  }
+  if ((exp2 - minimum_exponent) >= binary::infinite_power()) {
+    answer.power2 = binary::infinite_power();
+    answer.mantissa = 0;
+    return answer;
+  }
+
+  const int mantissa_size_in_bits = binary::mantissa_explicit_bits() + 1;
+  decimal_left_shift(d, mantissa_size_in_bits);
+
+  uint64_t mantissa = round(d);
+  // It is possible that we have an overflow, in which case we need
+  // to shift back.
+  if (mantissa >= (uint64_t(1) << mantissa_size_in_bits)) {
+    decimal_right_shift(d, 1);
+    exp2 += 1;
+    mantissa = round(d);
+    if ((exp2 - minimum_exponent) >= binary::infinite_power()) {
+      answer.power2 = binary::infinite_power();
+      answer.mantissa = 0;
+      return answer;
+    }
+  }
+  answer.power2 = exp2 - binary::minimum_exponent();
+  if (mantissa < (uint64_t(1) << binary::mantissa_explicit_bits())) {
+    answer.power2--;
+  }
+  answer.mantissa =
+      mantissa & ((uint64_t(1) << binary::mantissa_explicit_bits()) - 1);
+  return answer;
+}
+
+template <typename binary>
+adjusted_mantissa parse_long_mantissa(const char *first) {
+  decimal d = parse_decimal(first);
+  return compute_float<binary>(d);
+}
+
+double from_chars(const char *first) noexcept {
+  bool negative = first[0] == '-';
+  if (negative) {
+    first++;
+  }
+  adjusted_mantissa am = parse_long_mantissa<binary_format<double>>(first);
+  uint64_t word = am.mantissa;
+  word |= uint64_t(am.power2)
+          << binary_format<double>::mantissa_explicit_bits();
+  word = negative ? word | (uint64_t(1) << binary_format<double>::sign_index())
+                  : word;
+  double value;
+  std::memcpy(&value, &word, sizeof(double));
+  return value;
+}
+
+} // internal
+} // simdjson
--- a/src/internal/numberparsing_tables.cpp
+++ b/src/internal/numberparsing_tables.cpp
--- a/src/simdjson.cpp
+++ b/src/simdjson.cpp
@ -4,6 +4,7 @@ SIMDJSON_PUSH_DISABLE_WARNINGS
 SIMDJSON_DISABLE_UNDESIRED_WARNINGS

 #include "to_chars.cpp"
+#include "from_chars.cpp"
 #include "internal/error_tables.cpp"
 #include "internal/jsoncharutils_tables.cpp"
 #include "internal/numberparsing_tables.cpp"
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@ -54,6 +54,7 @@ target_compile_definitions(stringparsingcheck PRIVATE NOMINMAX)

 # All remaining tests link with simdjson proper
 link_libraries(simdjson)
+add_cpp_test(random_string_number_tests LABELS acceptance per_implementation)
 add_cpp_test(basictests LABELS acceptance per_implementation)
 add_cpp_test(minify_tests LABELS acceptance per_implementation)
 add_cpp_test(document_stream_tests LABELS acceptance per_implementation)
--- a/tests/basictests.cpp
+++ b/tests/basictests.cpp
@ -16,6 +16,17 @@
 #include "cast_tester.h"
 #include "test_macros.h"

+/**
+ * Some systems have bad floating-point parsing. We want to exclude them.
+ */
+#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined (__linux__) || defined (__APPLE__) || defined(__FreeBSD__) 
+// Finally, we want to exclude legacy 32-bit systems.
+#ifndef SIMDJSON_IS_32BITS
+// So we only run some of the floating-point tests under 64-bit linux, apple, regular visual studio, freebsd.
+#define TEST_FLOATS
+#endif
+#endif
+
 const size_t AMAZON_CELLPHONES_NDJSON_DOC_COUNT = 793;
 #define SIMDJSON_SHOW_DEFINE(x) printf("%s=%s\n", #x, STRINGIFY(x))

@ -34,6 +45,34 @@ namespace number_tests {
    return ua + ub + 0x80000000;
  }

+  bool ground_truth() {
+    std::cout << __func__ << std::endl;
+    std::pair<std::string,double> ground_truth[] = {
+      {"2.2250738585072013e-308",0x1p-1022},
+      {"-92666518056446206563E3", -0x1.39f764644154dp+76},
+      {"-92666518056446206563E3", -0x1.39f764644154dp+76},
+      {"-42823146028335318693e-128", -0x1.0176daa6cdaafp-360},
+      {"90054602635948575728E72", 0x1.61ab4ea9cb6c3p+305},
+      {"1.00000000000000188558920870223463870174566020691753515394643550663070558368373221972569761144603605635692374830246134201063722058e-309", 0x0.0b8157268fdafp-1022},
+      {"0e9999999999999999999999999999", 0x0p+0},
+      {"-2402844368454405395.2", -0x1.0ac4f1c7422e7p+61}
+    };
+    simdjson::dom::parser parser;
+    for(auto string_double : ground_truth) {
+        std::cout << "parsing the string '" << string_double.first << "'" << std::endl;
+        std::cout << "I am expecting the floating-point value '" << string_double.second << "'" << std::endl;
+        double result;
+        ASSERT_SUCCESS(parser.parse(string_double.first).get(result));
+        std::cout << "Resulting float is '" << result << "'" << std::endl;
+        if(result != string_double.second) {
+          std::cerr << std::hexfloat << result << " vs " << string_double.second << std::endl;
+          std::cerr << string_double.first << std::endl;
+          return false;
+        }
+    }
+    return true;
+  }
+

  bool small_integers() {
    std::cout << __func__ << std::endl;
@ -56,6 +95,7 @@ namespace number_tests {
    std::cout << __func__ << std::endl;
    simdjson::dom::parser parser;
    std::vector<std::pair<std::string, double>> testing = {
+      {"9999999999999999999e0",9999999999999999999.0},
      {"9999999999999999999.0",9999999999999999999.0},
      {"9999999999999999999",9999999999999999999.},
      {"999999999999999999.9",999999999999999999.9},
@ -99,7 +139,6 @@ namespace number_tests {
      double expected = pow(2, i);
      size_t n = snprintf(buf, sizeof(buf), "%.*e", std::numeric_limits<double>::max_digits10 - 1, expected);
      if (n >= sizeof(buf)) { abort(); }
-      fflush(NULL);
      double actual;
      auto error = parser.parse(buf, n).get(actual);
      if (error) { std::cerr << error << std::endl; return false; }
@ -194,14 +233,13 @@ namespace number_tests {
    simdjson::dom::parser parser;

    bool is_pow_correct{1e-308 == std::pow(10,-308)};
-    int start_point = is_pow_correct ? -10000 : -307;
+    int start_point = is_pow_correct ? -1000 : -307;
    if(!is_pow_correct) {
      std::cout << "On your system, the pow function is busted. Sorry about that. " << std::endl;
    }
    for (int i = start_point; i <= 308; ++i) {// large negative values should be zero.
      size_t n = snprintf(buf, sizeof(buf), "1e%d", i);
      if (n >= sizeof(buf)) { abort(); }
-      fflush(NULL);
      double actual;
      auto error = parser.parse(buf, n).get(actual);
      if (error) { std::cerr << error << std::endl; return false; }
@ -217,8 +255,69 @@ namespace number_tests {
    printf("Powers of 10 can be parsed.\n");
    return true;
  }
+
+  bool basic_test_64bit(std::string vals, double val) {
+    std::cout << " parsing "  << vals << std::endl;
+    double std_answer;
+    char *endptr;
+    // We want to call strtod with the C (default) locale to avoid
+    // potential issues in case someone has a different locale.
+    // Unfortunately, Visual Studio has a different syntax.
+    const char * cval = vals.c_str();
+#ifdef _WIN32
+    static _locale_t c_locale = _create_locale(LC_ALL, "C");
+    std_answer = _strtod_l(cval, &endptr, c_locale);
+#else
+    static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL);
+    std_answer = strtod_l(cval, &endptr, c_locale);
+#endif
+    if(endptr == cval) {
+      std::cerr << "Your runtime library failed to parse " << vals << std::endl;
+    }
+    double actual;
+    simdjson::dom::parser parser;
+    auto error = parser.parse(vals).get(actual);
+    if(error) {
+      std::cerr << error << std::endl;
+      return false;
+    }
+    if (actual != val) {
+      std::cerr  << std::hexfloat << actual << " but I was expecting " << val
+              << std::endl;
+      std::cerr << "string: " << vals << std::endl;
+      std::cout << std::dec;
+      if(std_answer == actual) {
+        std::cerr << "simdjson agrees with your runtime library, so we will accept the answer." << std::endl;
+        return true;
+      }
+      return false;
+    }
+    std::cout << std::hexfloat << actual << " == " << val << std::endl;
+    std::cout << std::dec;
+    return true;
+  }
+
+  bool specific_tests() {
+    std::cout << __func__ << std::endl;
+    return basic_test_64bit("-2402844368454405395.2",-2402844368454405395.2) &&  
+           basic_test_64bit("4503599627370496.5", 4503599627370496.5) &&
+           basic_test_64bit("4503599627475352.5", 4503599627475352.5) &&
+           basic_test_64bit("4503599627475353.5", 4503599627475353.5) &&
+           basic_test_64bit("2251799813685248.25", 2251799813685248.25) &&
+           basic_test_64bit("1125899906842624.125", 1125899906842624.125) &&
+           basic_test_64bit("1125899906842901.875", 1125899906842901.875) &&
+           basic_test_64bit("2251799813685803.75", 2251799813685803.75) &&
+           basic_test_64bit("4503599627370497.5", 4503599627370497.5) &&
+           basic_test_64bit("45035996.273704995", 45035996.273704995) &&
+           basic_test_64bit("45035996.273704985", 45035996.273704985) &&
+           basic_test_64bit("0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000044501477170144022721148195934182639518696390927032912960468522194496444440421538910330590478162701758282983178260792422137401728773891892910553144148156412434867599762821265346585071045737627442980259622449029037796981144446145705102663115100318287949527959668236039986479250965780342141637013812613333119898765515451440315261253813266652951306000184917766328660755595837392240989947807556594098101021612198814605258742579179000071675999344145086087205681577915435923018910334964869420614052182892431445797605163650903606514140377217442262561590244668525767372446430075513332450079650686719491377688478005309963967709758965844137894433796621993967316936280457084866613206797017728916080020698679408551343728867675409720757232455434770912461317493580281734466552734375", 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000044501477170144022721148195934182639518696390927032912960468522194496444440421538910330590478162701758282983178260792422137401728773891892910553144148156412434867599762821265346585071045737627442980259622449029037796981144446145705102663115100318287949527959668236039986479250965780342141637013812613333119898765515451440315261253813266652951306000184917766328660755595837392240989947807556594098101021612198814605258742579179000071675999344145086087205681577915435923018910334964869420614052182892431445797605163650903606514140377217442262561590244668525767372446430075513332450079650686719491377688478005309963967709758965844137894433796621993967316936280457084866613206797017728916080020698679408551343728867675409720757232455434770912461317493580281734466552734375) &&
+           basic_test_64bit("0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000022250738585072008890245868760858598876504231122409594654935248025624400092282356951787758888037591552642309780950434312085877387158357291821993020294379224223559819827501242041788969571311791082261043971979604000454897391938079198936081525613113376149842043271751033627391549782731594143828136275113838604094249464942286316695429105080201815926642134996606517803095075913058719846423906068637102005108723282784678843631944515866135041223479014792369585208321597621066375401613736583044193603714778355306682834535634005074073040135602968046375918583163124224521599262546494300836851861719422417646455137135420132217031370496583210154654068035397417906022589503023501937519773030945763173210852507299305089761582519159720757232455434770912461317493580281734466552734375", 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000022250738585072008890245868760858598876504231122409594654935248025624400092282356951787758888037591552642309780950434312085877387158357291821993020294379224223559819827501242041788969571311791082261043971979604000454897391938079198936081525613113376149842043271751033627391549782731594143828136275113838604094249464942286316695429105080201815926642134996606517803095075913058719846423906068637102005108723282784678843631944515866135041223479014792369585208321597621066375401613736583044193603714778355306682834535634005074073040135602968046375918583163124224521599262546494300836851861719422417646455137135420132217031370496583210154654068035397417906022589503023501937519773030945763173210852507299305089761582519159720757232455434770912461317493580281734466552734375);
+  }
+
  bool run() {
-    return small_integers() &&
+    return specific_tests() &&
+           ground_truth() &&
+           small_integers() &&
           powers_of_two() &&
           powers_of_ten() &&
           nines();
@ -1189,7 +1288,14 @@ namespace type_tests {
      && (expected_value >= 0 ?
          test_cast<uint64_t>(result, expected_value) :
          test_cast_error<uint64_t>(result, NUMBER_OUT_OF_RANGE))
+#ifdef TEST_FLOATS
+      // We trust the underlying system to be accurate.
      && test_cast<double>(result, static_cast<double>(expected_value))
+#else
+      // We don't trust the underlying system so we only run the test_cast
+      // exact test when the expected_value is within the 53-bit range.
+      && ((expected_value<-9007199254740992) || (expected_value>9007199254740992) || test_cast<double>(result, static_cast<double>(expected_value))) 
+#endif
      && test_cast_error<bool>(result, INCORRECT_TYPE)
      && test_is_null(result, false);
  }
@ -1209,6 +1315,14 @@ namespace type_tests {
      && test_cast_error<int64_t>(result, NUMBER_OUT_OF_RANGE)
      && test_cast<uint64_t>(result, expected_value)
      && test_cast<double>(result, static_cast<double>(expected_value))
+#ifdef TEST_FLOATS
+      // We trust the underlying system to be accurate.
+      && test_cast<double>(result, static_cast<double>(expected_value))
+#else
+      // We don't trust the underlying system so we only run the test_cast
+      // exact test when the expected_value is within the 53-bit range.
+      && ((expected_value>9007199254740992) || test_cast<double>(result, static_cast<double>(expected_value))) 
+#endif
      && test_cast_error<bool>(result, INCORRECT_TYPE)
      && test_is_null(result, false);
  }
@ -1409,7 +1523,7 @@ namespace minify_tests {
      auto e = simdjson::minify(bogus_json.get(), i, output_json.get(), newlength);
      if(e) {
        std::cerr << "got an error (unexpected) : " << e << std::endl;
-        return false;       
+        return false;
      }
    }
    return true;
@ -1427,7 +1541,7 @@ namespace minify_tests {
      auto e = simdjson::minify(bogus_json.get(), i, output_json.get(), newlength);
      if(e) {
        std::cerr << "got an error (unexpected) : " << e << std::endl;
-        return false;       
+        return false;
      }
    }
    return true;
@ -1899,7 +2013,7 @@ int main(int argc, char *argv[]) {
      dom_api_tests::run() &&
      type_tests::run() &&
      format_tests::run() &&
-      number_tests::run() 
+      number_tests::run()
  ) {
    std::cout << "Basic tests are ok." << std::endl;
    return EXIT_SUCCESS;
--- a/tests/numberparsingcheck.cpp
+++ b/tests/numberparsingcheck.cpp
@ -1,9 +1,10 @@
 #include <cstring>
-#include <inttypes.h>
-#include <math.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
+#include <cinttypes>
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+
+

 #ifndef JSON_TEST_NUMBERS
 #define JSON_TEST_NUMBERS
@ -22,6 +23,29 @@ void found_unsigned_integer(uint64_t result, const uint8_t *buf);

 #include "simdjson.h"

+
+
+/**
+ * Some systems have bad floating-point parsing. We want to exclude them.
+ */
+#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined (__linux__) || defined (__APPLE__) || defined(__FreeBSD__) 
+// Ok. So under Visual Studio, linux, apple and freebsd systems, we have a good chance of having a decent
+// enough strtod. It is not certain, but it is maybe a good enough heuristics. We exclude systems like msys2 
+// or cygwin.
+//
+// Finally, we want to exclude legacy 32-bit systems.
+#ifndef SIMDJSON_IS_32BITS
+// So we only run some of the floating-point tests under 64-bit linux, apple, regular visual studio, freebsd.
+#define TEST_FLOATS
+// Apple and freebsd need a special header, typically.
+#if defined __APPLE__ || defined(__FreeBSD__)
+#  include <xlocale.h> 
+#endif
+
+#endif
+
+#endif
+
 // ulp distance
 // Marc B. Reynolds, 2016-2019
 // Public Domain under http://unlicense.org, see link for details.
@ -73,6 +97,10 @@ bool is_in_bad_list(const char *buf) {
  return false;
 }

+#ifndef TEST_FLOATS
+// We do not recognize the system, so we do not verify our results.
+void found_invalid_number(const uint8_t *) {}
+#else
 void found_invalid_number(const uint8_t *buf) {
  invalid_count++;
  char *endptr;
@ -82,7 +110,7 @@ void found_invalid_number(const uint8_t *buf) {
 #else
  static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL);
  double expected = strtod_l((const char *)buf, &endptr, c_locale);
-#endif
+#endif     
  if (endptr != (const char *)buf) {
    if (!is_in_bad_list((const char *)buf)) {
      printf("Warning: found_invalid_number %.32s whereas strtod parses it to "
@ -93,6 +121,7 @@ void found_invalid_number(const uint8_t *buf) {
    }
  }
 }
+#endif

 void found_integer(int64_t result, const uint8_t *buf) {
  int_count++;
@ -101,7 +130,7 @@ void found_integer(int64_t result, const uint8_t *buf) {
  if ((endptr == (const char *)buf) || (expected != result)) {
 #if (!(__MINGW32__) && !(__MINGW64__))
    fprintf(stderr, "Error: parsed %" PRId64 " out of %.32s, ", result, buf);
-#else // mingw is busted since we include #include <inttypes.h>
+#else // mingw is busted since we include #include <inttypes.h> and it will still  not provide PRId64
    fprintf(stderr, "Error: parsed %lld out of %.32s, ", (long long)result, buf);
 #endif
    fprintf(stderr, " while parsing %s \n", fullpath);
@ -124,6 +153,10 @@ void found_unsigned_integer(uint64_t result, const uint8_t *buf) {
  }
 }

+#ifndef TEST_FLOATS
+// We do not recognize the system, so we do not verify our results.
+void found_float(double , const uint8_t *) {}
+#else
 void found_float(double result, const uint8_t *buf) {
  char *endptr;
  float_count++;
@ -133,7 +166,7 @@ void found_float(double result, const uint8_t *buf) {
 #else
  static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL);
  double expected = strtod_l((const char *)buf, &endptr, c_locale);
-#endif  
+#endif    
  if (endptr == (const char *)buf) {
    fprintf(stderr,
            "parsed %f from %.32s whereas strtod refuses to parse a float, ",
@ -141,7 +174,7 @@ void found_float(double result, const uint8_t *buf) {
    fprintf(stderr, " while parsing %s \n", fullpath);
    parse_error |= PARSE_ERROR;
  }
-  if (fpclassify(expected) != fpclassify(result)) {
+  if (std::fpclassify(expected) != std::fpclassify(result)) {
    fprintf(stderr,
            "floats not in the same category expected: %f observed: %f \n",
            expected, result);
@ -158,6 +191,7 @@ void found_float(double result, const uint8_t *buf) {
    parse_error |= PARSE_ERROR;
  }
 }
+#endif

 #include "simdjson.h"
 #include "simdjson.cpp"
--- a/tests/random_string_number_tests.cpp
+++ b/tests/random_string_number_tests.cpp
@ -0,0 +1,197 @@
+#include <cstring>
+#include <cinttypes>
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <cstdint>
+#include <random>
+#include <climits>
+#include <unistd.h>
+
+#include "simdjson.h"
+
+
+/**
+ * Some systems have bad floating-point parsing. We want to exclude them.
+ */
+#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined (__linux__) || defined (__APPLE__) || defined(__FreeBSD__) 
+// Ok. So under Visual Studio, linux, apple and freebsd systems, we have a good chance of having a decent
+// enough strtod. It is not certain, but it is maybe a good enough heuristics. We exclude systems like msys2 
+// or cygwin.
+//
+// Finally, we want to exclude legacy 32-bit systems.
+#ifndef SIMDJSON_IS_32BITS
+// So we only run some of the floating-point tests under 64-bit linux, apple, regular visual studio, freebsd.
+#define TEST_FLOATS
+// Apple and freebsd need a special header, typically.
+#if defined __APPLE__ || defined(__FreeBSD__)
+#  include <xlocale.h> 
+#endif
+
+#endif
+
+#endif
+
+
+struct RandomEngine {
+   RandomEngine() = delete;
+   RandomEngine(uint32_t seed) : one_zero_generator(0,1), digit_generator(0,9),  nonzero_digit_generator(1,9), digit_count_generator (1,40),exp_count_generator (1,3), generator(seed) {} 
+   std::uniform_int_distribution<int> one_zero_generator;
+   std::uniform_int_distribution<int> digit_generator;
+   std::uniform_int_distribution<int> nonzero_digit_generator;
+
+   std::uniform_int_distribution<int> digit_count_generator;
+   std::uniform_int_distribution<int> exp_count_generator;
+   bool next_bool() { return one_zero_generator(generator); }
+   int next_digit() { return digit_generator(generator); }
+   int next_nonzero_digit() { return nonzero_digit_generator(generator); }
+   int next_digit_count() { return digit_count_generator(generator); }
+   int next_exp_count() { return exp_count_generator(generator); }
+
+   std::mt19937 generator;
+};
+
+size_t build_random_string(RandomEngine &rand, char *buffer) {
+  size_t pos{0};
+  if (rand.next_bool()) {
+    buffer[pos++] = '-';
+  }
+  size_t number_of_digits = size_t(rand.next_digit_count());
+  std::uniform_int_distribution<int> decimal_generator(1,int(number_of_digits));
+  size_t location_of_decimal_separator = size_t(decimal_generator(rand.generator));
+  for (size_t i = 0; i < number_of_digits; i++) {
+    if (i == location_of_decimal_separator) {
+      buffer[pos++] = '.';
+    } 
+    if (( i == 0) && (location_of_decimal_separator != 1)) {
+      buffer[pos++] = char(rand.next_nonzero_digit() + '0');
+    } else {
+      buffer[pos++] = char(rand.next_digit() + '0');
+    }
+  }
+  if (rand.next_bool()) {
+    if (rand.next_bool()) {
+      buffer[pos++] = 'e';
+    } else {
+      buffer[pos++] = 'E';
+    }
+    if (rand.next_bool()) {
+      buffer[pos++] = '-';
+    } else {
+      if (rand.next_bool()) {
+        buffer[pos++] = '+';
+      }
+    }
+    number_of_digits = rand.next_exp_count();
+    size_t i = 0;
+    if(number_of_digits > 0) {
+        buffer[pos++] = char(rand.next_nonzero_digit() + '0');
+        i++;
+    }
+    for (; i < number_of_digits; i++) {
+      buffer[pos++] = char(rand.next_digit() + '0');
+    }
+  }
+  buffer[pos] = '\0'; // null termination
+  return pos;
+}
+
+
+#ifndef TEST_FLOATS
+// We do not recognize the system, so we do not verify our results.
+bool check_float(double , const char *) {
+    return true;
+}
+#else
+bool check_float(double result, const char *buf) {
+  char *endptr;
+#ifdef _WIN32
+  static _locale_t c_locale = _create_locale(LC_ALL, "C");
+  double expected = _strtod_l((const char *)buf, &endptr, c_locale);
+#else
+  static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL);
+  double expected = strtod_l((const char *)buf, &endptr, c_locale);
+#endif    
+  if (endptr == (const char *)buf) {
+    fprintf(stderr,
+            "parsed %f from %.32s whereas strtod refuses to parse a float, ",
+            result, buf);
+    return false;
+  }
+  if (expected != result) {
+    fprintf(stderr, "parsed %.128e from \n", result);
+    fprintf(stderr, "       %.32s whereas strtod gives\n", buf);
+    fprintf(stderr, "       %.128e,", expected);
+    return false;
+  }
+  return true;
+}
+#endif
+
+
+/**
+ * We generate random strings and we try to parse them,
+ * and we verify that we get the same answer.
+ */
+bool tester(int seed, size_t volume) {
+  char buffer[1024]; // large buffer (can't overflow)
+  simdjson::dom::parser parser;
+  RandomEngine rand(seed);
+  double result;    
+  for (size_t i = 0; i < volume; i++) {
+    if((i%100000) == 0) { std::cout << "."; std::cout.flush(); }
+    size_t length = build_random_string(rand, buffer);
+    auto error = parser.parse(buffer, length).get(result);
+    // When we parse a (finite) number, it better match strtod.
+    if ((!error) && (!check_float(result, buffer))) { return false; } 
+  }
+  return true;
+}
+
+int main(int argc, char *argv[]) {
+  // We test 1,000,000 random strings by default.
+  // You can specify more tests with the '-m' flag if you want.
+  size_t howmany = 1000000;
+
+  int c;
+  while ((c = getopt(argc, argv, "a:m:h")) != -1) {
+    switch (c) {
+    case 'a': {
+      const simdjson::implementation *impl = simdjson::available_implementations[optarg];
+      if (!impl) {
+        fprintf(stderr, "Unsupported architecture value -a %s\n", optarg);
+        return EXIT_FAILURE;
+      }
+      if(!impl->supported_by_runtime_system()) {
+        fprintf(stderr, "The selected implementation does not match your current CPU: -a %s\n", optarg);
+        return EXIT_FAILURE;
+      }
+      simdjson::active_implementation = impl;
+      break;
+    }
+    case 'h': {
+      std::cout << "-a to select an architecture" << std::endl;
+      std::cout << "-m to select a number of tests" << std::endl;
+      return EXIT_SUCCESS;
+    }
+    case 'm': {
+      long long requested_howmany = atoll(optarg);
+      if(requested_howmany <= 0) {
+        fprintf(stderr, "Please provide a positive number of tests -m %s no larger than %lld \n", optarg, LLONG_MAX);
+        return EXIT_FAILURE;
+      }
+      howmany = size_t(requested_howmany);
+      break;
+    }
+    default:
+      fprintf(stderr, "Unexpected argument %c\n", c);
+      return EXIT_FAILURE;
+    }
+  }
+  if (tester(1234344, howmany)) {
+    std::cout << "All tests ok." << std::endl;
+    return EXIT_SUCCESS;
+  }
+  std::cout << "Failure." << std::endl;
+  return EXIT_FAILURE;
+}