Faster and more correct serialization (#1168)

* Adding new files. * Better. * Fixing minifier and adding tests. * Adding benchmarks. * Including the array header. * Replacing old stream-based code by the new code. * Doubling up the itoa. * Hidden away to_chars in internal namespace. * Removing the repetitions. * Documented the atoi functions. * Tuning the escape sequences. * Moving the operators off the main namespace. * Added more tests. * Tweaking the implementation so that it works with and without exp. * The string_builder template and mini_formatter class are not part of our public API and are subject to change at any time! * Adding a benchmark and some optimization. * Cleaning. * Strictly speaking, this header is needed.
2020-09-23 10:00:39 -04:00 · 2020-09-23 10:00:39 -04:00 · 60c139a844
parent f410213003
commit 60c139a844
21 changed files with 1986 additions and 384 deletions
--- a/benchmark/bench_dom_api.cpp
+++ b/benchmark/bench_dom_api.cpp
@ -63,16 +63,136 @@ static void serialize_twitter(State& state) {
    bytes += serial.size();
    benchmark::DoNotOptimize(serial);
  }
  // we validate the result
  {
    auto serial = simdjson::minify(doc);
    dom::element doc2; // we parse the minified output
    if ((error = parser.parse(serial).get(doc2))) { throw std::runtime_error("serialization error"); }
    auto serial2 = simdjson::minify(doc2); // we minify a second time
    if(serial != serial2) { throw std::runtime_error("serialization mismatch"); }
  }
  // Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
  state.counters["Gigabytes"] = benchmark::Counter(
 	        double(bytes), benchmark::Counter::kIsRate,
 	        benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024
  state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
 }
-BENCHMARK(serialize_twitter)->Repetitions(10)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
+BENCHMARK(serialize_twitter)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
    return *(std::max_element(std::begin(v), std::end(v)));
  })->DisplayAggregatesOnly(true);
 static void serialize_big_string_to_string(State& state) {
  dom::parser parser;
  std::vector<char> content;
  content.push_back('\"');
  for(size_t i = 0 ; i < 100000; i ++) {
    content.push_back('0' + char(i%10)); // we add what looks like a long list of digits 
  } 
  content.push_back('\"');
  dom::element doc;
  simdjson::error_code error;
  if ((error = parser.parse(content.data(), content.size()).get(doc))) {
    cerr << "could not parse big string" << error << endl;
    return;
  }
  size_t bytes = 0;
  for (SIMDJSON_UNUSED auto _ : state) {
    auto serial = simdjson::to_string(doc);
    bytes += serial.size();
    benchmark::DoNotOptimize(serial);
  }
  // Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
  state.counters["Gigabytes"] = benchmark::Counter(
 	        double(bytes), benchmark::Counter::kIsRate,
 	        benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024
  state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
 }
 BENCHMARK(serialize_big_string_to_string)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
    return *(std::max_element(std::begin(v), std::end(v)));
  })->DisplayAggregatesOnly(true);
 static void serialize_twitter_to_string(State& state) {
  dom::parser parser;
  padded_string docdata;
  auto error = padded_string::load(TWITTER_JSON).get(docdata);
  if(error) {
      cerr << "could not parse twitter.json" << error << endl;
      return;
  }
  // we do not want mem. alloc. in the loop.
  if((error = parser.allocate(docdata.size()))) {
      cout << error << endl;
      return;
  }
  dom::element doc;
  if ((error = parser.parse(docdata).get(doc))) {
    cerr << "could not parse twitter.json" << error << endl;
    return;
  }
  size_t bytes = 0;
  for (SIMDJSON_UNUSED auto _ : state) {
    auto serial = simdjson::to_string(doc);
    bytes += serial.size();
    benchmark::DoNotOptimize(serial);
  }
  // we validate the result
  {
    auto serial = simdjson::to_string(doc); 
    dom::element doc2; // we parse the stringify output
    if ((error = parser.parse(serial).get(doc2))) { throw std::runtime_error("serialization error"); }
    auto serial2 = simdjson::to_string(doc2); // we stringify again
    if(serial != serial2) { throw std::runtime_error("serialization mismatch"); }
  }
  // Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
  state.counters["Gigabytes"] = benchmark::Counter(
 	        double(bytes), benchmark::Counter::kIsRate,
 	        benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024
  state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
 }
 BENCHMARK(serialize_twitter_to_string)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
    return *(std::max_element(std::begin(v), std::end(v)));
  })->DisplayAggregatesOnly(true);
 static void serialize_twitter_string_builder(State& state) {
  dom::parser parser;
  padded_string docdata;
  auto error = padded_string::load(TWITTER_JSON).get(docdata);
  if(error) {
      cerr << "could not parse twitter.json" << error << endl;
      return;
  }
  // we do not want mem. alloc. in the loop.
  if((error = parser.allocate(docdata.size()))) {
      cout << error << endl;
      return;
  }
  dom::element doc;
  if ((error = parser.parse(docdata).get(doc))) {
    cerr << "could not parse twitter.json" << error << endl;
    return;
  }
  size_t bytes = 0;
  simdjson::internal::string_builder<> sb;// not part of our public API, for internal use
  for (SIMDJSON_UNUSED auto _ : state) {
    sb.clear();
    sb.append(doc);
    std::string_view serial = sb.str();
    bytes += serial.size();
    benchmark::DoNotOptimize(serial);
  }
  // Gigabyte: https://en.wikipedia.org/wiki/Gigabyte
  state.counters["Gigabytes"] = benchmark::Counter(
 	        double(bytes), benchmark::Counter::kIsRate,
 	        benchmark::Counter::OneK::kIs1000); // For GiB : kIs1024
  state.counters["docs"] = Counter(double(state.iterations()), benchmark::Counter::kIsRate);
 }
 BENCHMARK(serialize_twitter_string_builder)->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
    return *(std::max_element(std::begin(v), std::end(v)));
  })->DisplayAggregatesOnly(true);
 static void numbers_scan(State& state) {
  // Prints the number of results in twitter.json
  dom::parser parser;
--- a/include/simdjson.h
+++ b/include/simdjson.h
@ -43,6 +43,7 @@ SIMDJSON_DISABLE_UNDESIRED_WARNINGS
 // Public API
 #include "simdjson/simdjson_version.h"
 #include "simdjson/error.h"
 #include "simdjson/minify.h"
 #include "simdjson/padded_string.h"
 #include "simdjson/implementation.h"
 #include "simdjson/dom/array.h"
@ -51,6 +52,7 @@ SIMDJSON_DISABLE_UNDESIRED_WARNINGS
 #include "simdjson/dom/element.h"
 #include "simdjson/dom/object.h"
 #include "simdjson/dom/parser.h"
 #include "simdjson/dom/serialization.h"
 // Deprecated API
 #include "simdjson/dom/jsonparser.h"
@ -68,6 +70,7 @@ SIMDJSON_DISABLE_UNDESIRED_WARNINGS
 #include "simdjson/dom/parsedjson_iterator-inl.h"
 #include "simdjson/dom/parser-inl.h"
 #include "simdjson/internal/tape_ref-inl.h"
 #include "simdjson/dom/serialization-inl.h"
 SIMDJSON_POP_DISABLE_WARNINGS
--- a/include/simdjson/common_defs.h
+++ b/include/simdjson/common_defs.h
@ -6,6 +6,15 @@
 namespace simdjson {
 namespace internal {
 /**
 * @private
 * Our own implementation of the C++17 to_chars function.
 * Defined in src/to_chars
 */
 char *to_chars(char *first, const char *last, double value);
 }
 #ifndef SIMDJSON_EXCEPTIONS
 #if __cpp_exceptions
 #define SIMDJSON_EXCEPTIONS 1
--- a/include/simdjson/dom/array-inl.h
+++ b/include/simdjson/dom/array-inl.h
@ -144,39 +144,9 @@ inline bool array::iterator::operator>=(const array::iterator& other) const noex
 inline bool array::iterator::operator>(const array::iterator& other) const noexcept {
  return tape.json_index > other.tape.json_index;
 }
 inline std::ostream& operator<<(std::ostream& out, const array &value) {
  return out << minify<array>(value);
 }
 } // namespace dom
 template<>
 inline std::ostream& minifier<dom::array>::print(std::ostream& out) {
  out << '[';
  auto iter = value.begin();
  auto end = value.end();
  if (iter != end) {
    out << minify<dom::element>(*iter);
    for (++iter; iter != end; ++iter) {
      out << "," << minify<dom::element>(*iter);
    }
  }
  return out << ']';
 }
 #if SIMDJSON_EXCEPTIONS
 template<>
 inline std::ostream& minifier<simdjson_result<dom::array>>::print(std::ostream& out) {
  if (value.error()) { throw simdjson_error(value.error()); }
  return out << minify<dom::array>(value.first);
 }
 inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::array> &value) noexcept(false) {
  return out << minify<simdjson_result<dom::array>>(value);
 }
 #endif
 } // namespace simdjson
--- a/include/simdjson/dom/array.h
+++ b/include/simdjson/dom/array.h
@ -4,10 +4,13 @@
 #include "simdjson/common_defs.h"
 #include "simdjson/error.h"
 #include "simdjson/internal/tape_ref.h"
 #include "simdjson/minify.h"
 #include <ostream>
 namespace simdjson {
 namespace internal {
 template<typename T>
 class string_builder;
 }
 namespace dom {
 class document;
@ -125,19 +128,9 @@ private:
  friend class element;
  friend struct simdjson_result<element>;
  template<typename T>
-  friend class simdjson::minifier;
+  friend class simdjson::internal::string_builder;
 };
 /**
 * Print JSON to an output stream.
 *
 * By default, the value will be printed minified.
 *
 * @param out The output stream.
 * @param value The value to print.
 * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
 */
 inline std::ostream& operator<<(std::ostream& out, const array &value);
 } // namespace dom
@ -159,20 +152,7 @@ public:
 #endif // SIMDJSON_EXCEPTIONS
 };
-#if SIMDJSON_EXCEPTIONS
+
 /**
 * Print JSON to an output stream.
 *
 * By default, the value will be printed minified.
 *
 * @param out The output stream.
 * @param value The value to print.
 * @throw simdjson_error if the result being printed has an error. If there is an error with the
 *        underlying output stream, that error will be propagated (simdjson_error will not be
 *        thrown).
 */
 inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::array> &value) noexcept(false);
 #endif
 } // namespace simdjson
--- a/include/simdjson/dom/document.h
+++ b/include/simdjson/dom/document.h
@ -2,7 +2,6 @@
 #define SIMDJSON_DOM_DOCUMENT_H
 #include "simdjson/common_defs.h"
 #include "simdjson/minify.h"
 #include <memory>
 #include <ostream>
@ -67,8 +66,6 @@ public:
 private:
  inline error_code allocate(size_t len) noexcept;
  template<typename T>
  friend class simdjson::minifier;
  friend class parser;
 }; // class document
--- a/include/simdjson/dom/element-inl.h
+++ b/include/simdjson/dom/element-inl.h
@ -387,9 +387,6 @@ inline bool element::dump_raw_tape(std::ostream &out) const noexcept {
  return tape.doc->dump_raw_tape(out);
 }
 inline std::ostream& operator<<(std::ostream& out, const element &value) {
  return out << minify<element>(value);
 }
 inline std::ostream& operator<<(std::ostream& out, element_type type) {
  switch (type) {
@ -416,143 +413,6 @@ inline std::ostream& operator<<(std::ostream& out, element_type type) {
 } // namespace dom
 template<>
 inline std::ostream& minifier<dom::element>::print(std::ostream& out) {
  using tape_type=internal::tape_type;
  size_t depth = 0;
  constexpr size_t MAX_DEPTH = 16;
  bool is_object[MAX_DEPTH];
  is_object[0] = false;
  bool after_value = false;
  internal::tape_ref iter(value.tape);
  do {
    // print commas after each value
    if (after_value) {
      out << ",";
    }
    // If we are in an object, print the next key and :, and skip to the next value.
    if (is_object[depth]) {
      out << '"' << internal::escape_json_string(iter.get_string_view()) << "\":";
      iter.json_index++;
    }
    switch (iter.tape_ref_type()) {
    // Arrays
    case tape_type::START_ARRAY: {
      // If we're too deep, we need to recurse to go deeper.
      depth++;
      if (simdjson_unlikely(depth >= MAX_DEPTH)) {
        out << minify<dom::array>(dom::array(iter));
        iter.json_index = iter.matching_brace_index() - 1; // Jump to the ]
        depth--;
        break;
      }
      // Output start [
      out << '[';
      iter.json_index++;
      // Handle empty [] (we don't want to come back around and print commas)
      if (iter.tape_ref_type() == tape_type::END_ARRAY) {
        out << ']';
        depth--;
        break;
      }
      is_object[depth] = false;
      after_value = false;
      continue;
    }
    // Objects
    case tape_type::START_OBJECT: {
      // If we're too deep, we need to recurse to go deeper.
      depth++;
      if (simdjson_unlikely(depth >= MAX_DEPTH)) {
        out << minify<dom::object>(dom::object(iter));
        iter.json_index = iter.matching_brace_index() - 1; // Jump to the }
        depth--;
        break;
      }
      // Output start {
      out << '{';
      iter.json_index++;
      // Handle empty {} (we don't want to come back around and print commas)
      if (iter.tape_ref_type() == tape_type::END_OBJECT) {
        out << '}';
        depth--;
        break;
      }
      is_object[depth] = true;
      after_value = false;
      continue;
    }
    // Scalars
    case tape_type::STRING:
      out << '"' << internal::escape_json_string(iter.get_string_view()) << '"';
      break;
    case tape_type::INT64:
      out << iter.next_tape_value<int64_t>();
      iter.json_index++; // numbers take up 2 spots, so we need to increment extra
      break;
    case tape_type::UINT64:
      out << iter.next_tape_value<uint64_t>();
      iter.json_index++; // numbers take up 2 spots, so we need to increment extra
      break;
    case tape_type::DOUBLE:
      out << iter.next_tape_value<double>();
      iter.json_index++; // numbers take up 2 spots, so we need to increment extra
      break;
    case tape_type::TRUE_VALUE:
      out << "true";
      break;
    case tape_type::FALSE_VALUE:
      out << "false";
      break;
    case tape_type::NULL_VALUE:
      out << "null";
      break;
    // These are impossible
    case tape_type::END_ARRAY:
    case tape_type::END_OBJECT:
    case tape_type::ROOT:
      out << "unexpected content!!!"; // abort() usage is forbidden in the library
    }
    iter.json_index++;
    after_value = true;
    // Handle multiple ends in a row
    while (depth != 0 && (iter.tape_ref_type() == tape_type::END_ARRAY || iter.tape_ref_type() == tape_type::END_OBJECT)) {
      out << char(iter.tape_ref_type());
      depth--;
      iter.json_index++;
    }
    // Stop when we're at depth 0
  } while (depth != 0);
  return out;
 }
 #if SIMDJSON_EXCEPTIONS
 template<>
 simdjson_really_inline std::ostream& minifier<simdjson_result<dom::element>>::print(std::ostream& out) {
  if (value.error()) { throw simdjson_error(value.error()); }
  return out << minify<dom::element>(value.first);
 }
 simdjson_really_inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::element> &value) noexcept(false) {
  return out << minify<simdjson_result<dom::element>>(value);
 }
 #endif
 } // namespace simdjson
 #endif // SIMDJSON_INLINE_ELEMENT_H
--- a/include/simdjson/dom/element.h
+++ b/include/simdjson/dom/element.h
@ -4,12 +4,14 @@
 #include "simdjson/common_defs.h"
 #include "simdjson/error.h"
 #include "simdjson/internal/tape_ref.h"
 #include "simdjson/minify.h"
 #include <ostream>
 namespace simdjson {
 namespace internal {
 template<typename T>
 class string_builder;
 }
 namespace dom {
 class array;
 class document;
 class object;
@ -473,29 +475,10 @@ private:
  friend class array;
  friend struct simdjson_result<element>;
  template<typename T>
-  friend class simdjson::minifier;
+  friend class simdjson::internal::string_builder;
 };
 /**
 * Print JSON to an output stream.
 *
 * By default, the value will be printed minified.
 *
 * @param out The output stream.
 * @param value The value to print.
 * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
 */
 inline std::ostream& operator<<(std::ostream& out, const element &value);
 /**
 * Print element type to an output stream.
 *
 * @param out The output stream.
 * @param value The value to print.
 * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
 */
 inline std::ostream& operator<<(std::ostream& out, element_type type);
 } // namespace dom
 /** The result of a JSON navigation that may fail. */
@ -557,20 +540,6 @@ public:
 #endif // SIMDJSON_EXCEPTIONS
 };
 #if SIMDJSON_EXCEPTIONS
 /**
 * Print JSON to an output stream.
 *
 * By default, the value will be printed minified.
 *
 * @param out The output stream.
 * @param value The value to print.
 * @throw simdjson_error if the result being printed has an error. If there is an error with the
 *        underlying output stream, that error will be propagated (simdjson_error will not be
 *        thrown).
 */
 simdjson_really_inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::element> &value) noexcept(false);
 #endif
 } // namespace simdjson
--- a/include/simdjson/dom/object-inl.h
+++ b/include/simdjson/dom/object-inl.h
@ -236,47 +236,8 @@ inline bool object::iterator::key_equals_case_insensitive(std::string_view o) co
 inline key_value_pair::key_value_pair(std::string_view _key, element _value) noexcept :
  key(_key), value(_value) {}
 inline std::ostream& operator<<(std::ostream& out, const object &value) {
  return out << minify<object>(value);
 }
 inline std::ostream& operator<<(std::ostream& out, const key_value_pair &value) {
  return out << minify<key_value_pair>(value);
 }
 } // namespace dom
 template<>
 inline std::ostream& minifier<dom::object>::print(std::ostream& out) {
  out << '{';
  auto pair = value.begin();
  auto end = value.end();
  if (pair != end) {
    out << minify<dom::key_value_pair>(*pair);
    for (++pair; pair != end; ++pair) {
      out << "," << minify<dom::key_value_pair>(*pair);
    }
  }
  return out << '}';
 }
 template<>
 inline std::ostream& minifier<dom::key_value_pair>::print(std::ostream& out) {
  return out << '"' << internal::escape_json_string(value.key) << "\":" << value.value;
 }
 #if SIMDJSON_EXCEPTIONS
 template<>
 inline std::ostream& minifier<simdjson_result<dom::object>>::print(std::ostream& out) {
  if (value.error()) { throw simdjson_error(value.error()); }
  return out << minify<dom::object>(value.first);
 }
 inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::object> &value) noexcept(false) {
  return out << minify<simdjson_result<dom::object>>(value);
 }
 #endif // SIMDJSON_EXCEPTIONS
 } // namespace simdjson
 #if defined(__cpp_lib_ranges)
--- a/include/simdjson/dom/object.h
+++ b/include/simdjson/dom/object.h
@ -4,10 +4,12 @@
 #include "simdjson/common_defs.h"
 #include "simdjson/error.h"
 #include "simdjson/internal/tape_ref.h"
 #include "simdjson/minify.h"
 #include <ostream>
 namespace simdjson {
 namespace internal {
 template<typename T>
 class string_builder;
 }
 namespace dom {
 class document;
@ -211,7 +213,7 @@ private:
  friend class element;
  friend struct simdjson_result<element>;
  template<typename T>
-  friend class simdjson::minifier;
+  friend class simdjson::internal::string_builder;
 };
 /**
@ -229,27 +231,6 @@ private:
  friend class object;
 };
 /**
 * Print JSON to an output stream.
 *
 * By default, the value will be printed minified.
 *
 * @param out The output stream.
 * @param value The value to print.
 * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
 */
 inline std::ostream& operator<<(std::ostream& out, const object &value);
 /**
 * Print JSON to an output stream.
 *
 * By default, the value will be printed minified.
 *
 * @param out The output stream.
 * @param value The value to print.
 * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
 */
 inline std::ostream& operator<<(std::ostream& out, const key_value_pair &value);
 } // namespace dom
 /** The result of a JSON conversion that may fail. */
@ -273,21 +254,6 @@ public:
 #endif // SIMDJSON_EXCEPTIONS
 };
 #if SIMDJSON_EXCEPTIONS
 /**
 * Print JSON to an output stream.
 *
 * By default, the value will be printed minified.
 *
 * @param out The output stream.
 * @param value The value to print.
 * @throw simdjson_error if the result being printed has an error. If there is an error with the
 *        underlying output stream, that error will be propagated (simdjson_error will not be
 *        thrown).
 */
 inline std::ostream& operator<<(std::ostream& out, const simdjson_result<dom::object> &value) noexcept(false);
 #endif // SIMDJSON_EXCEPTIONS
 } // namespace simdjson
 #if defined(__cpp_lib_ranges)
--- a/include/simdjson/dom/parser-inl.h
+++ b/include/simdjson/dom/parser-inl.h
@ -25,11 +25,7 @@ simdjson_really_inline parser &parser::operator=(parser &&other) noexcept = defa
 inline bool parser::is_valid() const noexcept { return valid; }
 inline int parser::get_error_code() const noexcept { return error; }
 inline std::string parser::get_error_message() const noexcept { return error_message(error); }
-inline bool parser::print_json(std::ostream &os) const noexcept {
+
  if (!valid) { return false; }
  os << doc.root();
  return true;
 }
 inline bool parser::dump_raw_tape(std::ostream &os) const noexcept {
  return valid ? doc.dump_raw_tape(os) : false;
 }
--- a/include/simdjson/dom/parser.h
+++ b/include/simdjson/dom/parser.h
@ -6,7 +6,6 @@
 #include "simdjson/error.h"
 #include "simdjson/internal/dom_parser_implementation.h"
 #include "simdjson/internal/tape_ref.h"
 #include "simdjson/minify.h"
 #include "simdjson/padded_string.h"
 #include "simdjson/portability.h"
 #include <memory>
--- a/include/simdjson/dom/serialization-inl.h
+++ b/include/simdjson/dom/serialization-inl.h
@ -0,0 +1,421 @@
 #ifndef SIMDJSON_SERIALIZATION_INL_H
 #define SIMDJSON_SERIALIZATION_INL_H
 #include "simdjson/dom/serialization.h"
 #include <cinttypes>
 #include <type_traits>
 namespace simdjson {
 namespace dom {
 inline bool parser::print_json(std::ostream &os) const noexcept {
  if (!valid) { return false; }
  simdjson::internal::string_builder<> sb;
  sb.append(doc.root());
  std::string_view answer = sb.str();
  os << answer;
  return true;
 }
 }
 /***
 * Number utility functions
 **/
 namespace {
 /**@private
 * Escape sequence like \b or \u0001
 * We expect that most compilers will use 8 bytes for this data structure.
 **/
 struct escape_sequence {
    uint8_t length;
    const char string[7]; // technically, we only ever need 6 characters, we pad to 8
 };
 /**@private
 * This converts a signed integer into a character sequence.
 * The caller is responsible for providing enough memory (at least
 * 20 characters.)
 * Though various runtime libraries provide itoa functions,
 * it is not part of the C++ standard. The C++17 standard
 * adds the to_chars functions which would do as well, but
 * we want to support C++11.
 */
 char *fast_itoa(char *output, int64_t value) noexcept {
  // This is a standard implementation of itoa.
  // We first write in reverse order and then reverse.
  if(value < 0) {
    *output++ = '-';
    value = -value;
  }
  char *write_pointer = output;
  do {
    *write_pointer++ = char('0' + (value % 10));
    value /= 10;
  } while (value != 0);
  // then we reverse the result
  char *const answer = write_pointer;
  char *second_write_pointer = output;
  write_pointer -= 1;
  while (second_write_pointer < write_pointer) {
    char c1 = *write_pointer;
    char c2 = *second_write_pointer;
    *second_write_pointer = c1;
    *write_pointer = c2;
    write_pointer--;
    second_write_pointer++;
  }
  return answer;
 }
 /**@private
 * This converts an unsigned integer into a character sequence.
 * The caller is responsible for providing enough memory (at least
 * 19 characters.)
 * Though various runtime libraries provide itoa functions,
 * it is not part of the C++ standard. The C++17 standard
 * adds the to_chars functions which would do as well, but
 * we want to support C++11.
 */
 char *fast_itoa(char *output, uint64_t value) noexcept {
  // This is a standard implementation of itoa.
  // We first write in reverse order and then reverse.
  char *write_pointer = output;
  do {
    *write_pointer++ = char('0' + (value % 10));
    value /= 10;
  } while (value != 0);
  // then we reverse the result
  char *const answer = write_pointer;
  char *second_write_pointer = output;
  write_pointer -= 1;
  while (second_write_pointer < write_pointer) {
    char c1 = *write_pointer;
    char c2 = *second_write_pointer;
    *second_write_pointer = c1;
    *write_pointer = c2;
    write_pointer--;
    second_write_pointer++;
  }
  return answer;
 }
 } // anonymous namespace
 namespace internal {
 /***
 * Minifier/formatter code.
 **/
 simdjson_really_inline void mini_formatter::number(uint64_t x) {
  char number_buffer[24];
  char *newp = fast_itoa(number_buffer, x);
  buffer.insert(buffer.end(), number_buffer, newp);
 }
 simdjson_really_inline void mini_formatter::number(int64_t x) {
  char number_buffer[24];
  char *newp = fast_itoa(number_buffer, x);
  buffer.insert(buffer.end(), number_buffer, newp);
 }
 simdjson_really_inline void mini_formatter::number(double x) {
  char number_buffer[24];
  // Currently, passing the nullptr to the second argument is
  // safe because our implementation does not check the second 
  // argument.
  char *newp = internal::to_chars(number_buffer, nullptr, x);
  buffer.insert(buffer.end(), number_buffer, newp);
 }
 simdjson_really_inline void mini_formatter::start_array() { one_char('['); }
 simdjson_really_inline void mini_formatter::end_array() { one_char(']'); }
 simdjson_really_inline void mini_formatter::start_object() { one_char('{'); }
 simdjson_really_inline void mini_formatter::end_object() { one_char('}'); }
 simdjson_really_inline void mini_formatter::comma() { one_char(','); }
 simdjson_really_inline void mini_formatter::true_atom() { 
  const char * s = "true";
  buffer.insert(buffer.end(), s, s + 4);
 }
 simdjson_really_inline void mini_formatter::false_atom() {
  const char * s = "false";
  buffer.insert(buffer.end(), s, s + 5);
 }
 simdjson_really_inline void mini_formatter::null_atom() {
  const char * s = "null";
  buffer.insert(buffer.end(), s, s + 4);
 }
 simdjson_really_inline void mini_formatter::one_char(char c) { buffer.push_back(c); }
 simdjson_really_inline void mini_formatter::key(std::string_view unescaped) {
  string(unescaped);
  one_char(':');
 }
 simdjson_really_inline void mini_formatter::string(std::string_view unescaped) {
  one_char('\"');
  size_t i = 0;
  // Fast path for the case where we have no control character, no ", and no backslash.
  // This should include most keys.
  constexpr static bool needs_escaping[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
  for(;i + 8 <= unescaped.length(); i += 8) { 
    // Poor's man vectorization. This could get much faster if we used SIMD.
    if(needs_escaping[uint8_t(unescaped[i])] | needs_escaping[uint8_t(unescaped[i+1])] 
      | needs_escaping[uint8_t(unescaped[i+2])] | needs_escaping[uint8_t(unescaped[i+3])]
      | needs_escaping[uint8_t(unescaped[i+4])] | needs_escaping[uint8_t(unescaped[i+5])] 
      | needs_escaping[uint8_t(unescaped[i+6])] | needs_escaping[uint8_t(unescaped[i+7])]
      ) { break; }
  }
  for(;i < unescaped.length(); i++) { 
    if(needs_escaping[uint8_t(unescaped[i])]) { break; }
  }
  // The following is also possible and omits a 256-byte table, but it is slower:
  // for (; (i < unescaped.length()) && (uint8_t(unescaped[i]) > 0x1F) 
  //      && (unescaped[i] != '\"') && (unescaped[i] != '\\'); i++) {}
  // At least for long strings, the following should be fast. We could
  // do better by integrating the checks and the insertion.
  buffer.insert(buffer.end(), unescaped.data(), unescaped.data() + i);
  // We caught a control character if we enter this loop (slow).
  // Note that we are do not restart from the beginning, but rather we continue
  // from the point where we encountered something that requires escaping.
  for (; i < unescaped.length(); i++) {
    switch (unescaped[i]) {
    case '\"':
      {
        const char * s = "\\\"";
        buffer.insert(buffer.end(), s, s + 2);
      }
      break;
    case '\\':
      {
        const char * s = "\\\\";
        buffer.insert(buffer.end(), s, s + 2);
      }
      break;
    default:
      if (uint8_t(unescaped[i]) <= 0x1F) {
        // If packed, this uses 8 * 32 bytes.
        // Note that we expect most compilers to embed this code in the data
        // section.
        constexpr static escape_sequence escaped[32] = {
          {6, "\\u0000"}, {6, "\\u0001"}, {6, "\\u0002"}, {6, "\\u0003"},
          {6, "\\u0004"}, {6, "\\u0005"}, {6, "\\u0006"}, {6, "\\u0007"},
          {2, "\\b"},     {2, "\\t"},     {2, "\\n"},     {6, "\\u000b"},
          {2, "\\f"},     {2, "\\r"},     {6, "\\u000e"}, {6, "\\u000f"},
          {6, "\\u0010"}, {6, "\\u0011"}, {6, "\\u0012"}, {6, "\\u0013"},
          {6, "\\u0014"}, {6, "\\u0015"}, {6, "\\u0016"}, {6, "\\u0017"},
          {6, "\\u0018"}, {6, "\\u0019"}, {6, "\\u001a"}, {6, "\\u001b"},
          {6, "\\u001c"}, {6, "\\u001d"}, {6, "\\u001e"}, {6, "\\u001f"}};
        auto u = escaped[uint8_t(unescaped[i])];
        buffer.insert(buffer.end(), u.string, u.string + u.length);
      } else {
        one_char(unescaped[i]);
      }
    } // switch
  }   // for
  one_char('\"');
 }
 inline void mini_formatter::clear() {
  buffer.clear();
 }
 simdjson_really_inline std::string_view mini_formatter::str() const {
  return std::string_view(buffer.data(), buffer.size());
 }
 /***
 * String building code.
 **/
 template <class serializer>
 inline void string_builder<serializer>::append(simdjson::dom::element value) {
  // using tape_type = simdjson::internal::tape_type;
  size_t depth = 0;
  constexpr size_t MAX_DEPTH = 16;
  bool is_object[MAX_DEPTH];
  is_object[0] = false;
  bool after_value = false;
  internal::tape_ref iter(value.tape);
  do {
    // print commas after each value
    if (after_value) {
      format.comma();
    }
    // If we are in an object, print the next key and :, and skip to the next
    // value.
    if (is_object[depth]) {
      format.key(iter.get_string_view());
      iter.json_index++;
    }
    switch (iter.tape_ref_type()) {
    // Arrays
    case tape_type::START_ARRAY: {
      // If we're too deep, we need to recurse to go deeper.
      depth++;
      if (simdjson_unlikely(depth >= MAX_DEPTH)) {
        append(simdjson::dom::array(iter));
        iter.json_index = iter.matching_brace_index() - 1; // Jump to the ]
        depth--;
        break;
      }
      // Output start [
      format.start_array();
      iter.json_index++;
      // Handle empty [] (we don't want to come back around and print commas)
      if (iter.tape_ref_type() == tape_type::END_ARRAY) {
        format.end_array();
        depth--;
        break;
      }
      is_object[depth] = false;
      after_value = false;
      continue;
    }
    // Objects
    case tape_type::START_OBJECT: {
      // If we're too deep, we need to recurse to go deeper.
      depth++;
      if (simdjson_unlikely(depth >= MAX_DEPTH)) {
        append(simdjson::dom::object(iter));
        iter.json_index = iter.matching_brace_index() - 1; // Jump to the }
        depth--;
        break;
      }
      // Output start {
      format.start_object();
      iter.json_index++;
      // Handle empty {} (we don't want to come back around and print commas)
      if (iter.tape_ref_type() == tape_type::END_OBJECT) {
        format.end_object();
        depth--;
        break;
      }
      is_object[depth] = true;
      after_value = false;
      continue;
    }
    // Scalars
    case tape_type::STRING:
      format.string(iter.get_string_view());
      break;
    case tape_type::INT64:
      format.number(iter.next_tape_value<int64_t>());
      iter.json_index++; // numbers take up 2 spots, so we need to increment
                         // extra
      break;
    case tape_type::UINT64:
      format.number(iter.next_tape_value<uint64_t>());
      iter.json_index++; // numbers take up 2 spots, so we need to increment
                         // extra
      break;
    case tape_type::DOUBLE:
      format.number(iter.next_tape_value<double>());
      iter.json_index++; // numbers take up 2 spots, so we need to increment
                         // extra
      break;
    case tape_type::TRUE_VALUE:
      format.true_atom();
      break;
    case tape_type::FALSE_VALUE:
      format.false_atom();
      break;
    case tape_type::NULL_VALUE:
      format.null_atom();
      break;
    // These are impossible
    case tape_type::END_ARRAY:
    case tape_type::END_OBJECT:
    case tape_type::ROOT:
      SIMDJSON_UNREACHABLE();
    }
    iter.json_index++;
    after_value = true;
    // Handle multiple ends in a row
    while (depth != 0 && (iter.tape_ref_type() == tape_type::END_ARRAY ||
                          iter.tape_ref_type() == tape_type::END_OBJECT)) {
      if (iter.tape_ref_type() == tape_type::END_ARRAY) {
        format.end_array();
      } else {
        format.end_object();
      }
      depth--;
      iter.json_index++;
    }
    // Stop when we're at depth 0
  } while (depth != 0);
 }
 template <class serializer>
 inline void string_builder<serializer>::append(simdjson::dom::object value) {
  format.start_object();
  auto pair = value.begin();
  auto end = value.end();
  if (pair != end) {
    append(*pair);
    for (++pair; pair != end; ++pair) {
      format.comma();
      append(*pair);
    }
  }
  format.end_object();
 }
 template <class serializer>
 inline void string_builder<serializer>::append(simdjson::dom::array value) {
  format.start_array();
  auto iter = value.begin();
  auto end = value.end();
  if (iter != end) {
    append(*iter);
    for (++iter; iter != end; ++iter) {
      format.comma();
      append(*iter);
    }
  }
  format.end_array();
 }
 template <class serializer>
 simdjson_really_inline void string_builder<serializer>::append(simdjson::dom::key_value_pair kv) {
  format.key(kv.key);
  append(kv.value);
 }
 template <class serializer>
 simdjson_really_inline void string_builder<serializer>::clear() {
  format.clear();
 }
 template <class serializer>
 simdjson_really_inline std::string_view string_builder<serializer>::str() const {
  return format.str();
 }
 } // namespace internal
 } // namespace simdjson
 #endif
--- a/include/simdjson/dom/serialization.h
+++ b/include/simdjson/dom/serialization.h
@ -0,0 +1,219 @@
 #ifndef SIMDJSON_SERIALIZATION_H
 #define SIMDJSON_SERIALIZATION_H
 #include "simdjson/common_defs.h"
 #include "simdjson/dom/document.h"
 #include "simdjson/error.h"
 #include "simdjson/internal/dom_parser_implementation.h"
 #include "simdjson/internal/tape_ref.h"
 #include "simdjson/padded_string.h"
 #include "simdjson/portability.h"
 #include <vector>
 namespace simdjson {
 /**
 * The string_builder template and mini_formatter class
 * are not part of  our public API and are subject to change 
 * at any time!
 */
 namespace internal {
 class mini_formatter;
 /**
 * @private The string_builder template allows us to construct
 * a string from a document element. It is parametrized
 * by a "formatter" which handles the details. Thus
 * the string_builder template could support both minification
 * and prettification, and various other tradeoffs.
 */
 template <class formatter = mini_formatter> 
 class string_builder {
 public:
  /** Construct an initially empty builder, would print the empty string **/
  string_builder() = default;
  /** Append an element to the builder (to be printed) **/
  inline void append(simdjson::dom::element value);
  /** Append an array to the builder (to be printed) **/
  inline void append(simdjson::dom::array value);
  /** Append an objet to the builder (to be printed) **/
  inline void append(simdjson::dom::object value);
  /** Reset the builder (so that it would print the empty string) **/
  simdjson_really_inline void clear();
  /** 
   * Get access to the string. The string_view is owned by the builder
   * and it is invalid to use it after the string_builder has been 
   * destroyed.
   * However you can make a copy of the string_view on memory that you
   * own. 
   */
  simdjson_really_inline std::string_view str() const;
  /** Append a key_value_pair to the builder (to be printed) **/
  simdjson_really_inline void append(simdjson::dom::key_value_pair value);
 private:
  formatter format{};
 };
 /**
 * @private This is the class that we expect to use with the string_builder
 * template. It tries to produce a compact version of the JSON element
 * as quickly as possible.
 */
 class mini_formatter {
 public:
  mini_formatter() = default;
  /** Add a comma **/
  simdjson_really_inline void comma();
  /** Start an array, prints [ **/
  simdjson_really_inline void start_array();
  /** End an array, prints ] **/
  simdjson_really_inline void end_array();
  /** Start an array, prints { **/
  simdjson_really_inline void start_object();
  /** Start an array, prints } **/
  simdjson_really_inline void end_object();
  /** Prints a true **/
  simdjson_really_inline void true_atom();
  /** Prints a false **/
  simdjson_really_inline void false_atom();
  /** Prints a null **/
  simdjson_really_inline void null_atom();
  /** Prints a number **/
  simdjson_really_inline void number(int64_t x);
  /** Prints a number **/
  simdjson_really_inline void number(uint64_t x);
  /** Prints a number **/
  simdjson_really_inline void number(double x);
  /** Prints a key (string + colon) **/
  simdjson_really_inline void key(std::string_view unescaped);
  /** Prints a string. The string is escaped as needed. **/
  simdjson_really_inline void string(std::string_view unescaped);
  /** Clears out the content. **/
  simdjson_really_inline void clear();
  /** 
   * Get access to the buffer, it is own by the instance, but
   * the user can make a copy. 
   **/
  simdjson_really_inline std::string_view str() const;
 private:
  // implementation details (subject to change)
  /** Prints one character **/
  simdjson_really_inline void one_char(char c);
  /** Backing buffer **/
  std::vector<char> buffer{}; // not ideal!
 };
 } // internal
 namespace dom {
 /**
 * Print JSON to an output stream.
 *
 * @param out The output stream.
 * @param value The element.
 * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
 */
 inline std::ostream& operator<<(std::ostream& out, simdjson::dom::element value) { 
    simdjson::internal::string_builder<> sb;
    sb.append(value);
    return (out << sb.str());
 }
 #if SIMDJSON_EXCEPTIONS
 inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::dom::element> x) { 
    if (x.error()) { throw simdjson::simdjson_error(x.error()); }
    return (out << x.value());
 }
 #endif
 /**
 * Print JSON to an output stream.
 *
 * @param out The output stream.
 * @param value The array.
 * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
 */
 inline std::ostream& operator<<(std::ostream& out, simdjson::dom::array value)  { 
    simdjson::internal::string_builder<> sb;
    sb.append(value);
    return (out << sb.str());
 }
 #if SIMDJSON_EXCEPTIONS
 inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::dom::array> x) { 
    if (x.error()) { throw simdjson::simdjson_error(x.error()); }
    return (out << x.value());
 }
 #endif
 /**
 * Print JSON to an output stream.
 *
 * @param out The output stream.
 * @param value The objet.
 * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
 */
 inline std::ostream& operator<<(std::ostream& out, simdjson::dom::object value)   { 
    simdjson::internal::string_builder<> sb;
    sb.append(value);
    return (out << sb.str());
 }
 #if SIMDJSON_EXCEPTIONS
 inline std::ostream& operator<<(std::ostream& out,  simdjson::simdjson_result<simdjson::dom::object> x) { 
    if (x.error()) { throw  simdjson::simdjson_error(x.error()); }
    return (out << x.value());
 }
 #endif 
 } // namespace dom
 /**
 * Converts JSON to a string.
 *
 *   dom::parser parser;
 *   element doc = parser.parse("   [ 1 , 2 , 3 ] "_padded);
 *   cout << to_string(doc) << endl; // prints [1,2,3]
 *
 */
 template <class T> 
 std::string to_string(T x)   {
    // in C++, to_string is standard: http://www.cplusplus.com/reference/string/to_string/
    // Currently minify and to_string are identical but in the future, they may 
    // differ.
    simdjson::internal::string_builder<> sb;
    sb.append(x);
    std::string_view answer = sb.str();
    return std::string(answer.data(), answer.size());
 }
 #if SIMDJSON_EXCEPTIONS
 template <class T> 
 std::string to_string(simdjson_result<T> x) {
    if (x.error()) { throw simdjson_error(x.error()); }
    return to_string(x.value());
 }
 #endif 
 /**
 * Minifies a JSON element or document, printing the smallest possible valid JSON.
 *
 *   dom::parser parser;
 *   element doc = parser.parse("   [ 1 , 2 , 3 ] "_padded);
 *   cout << minify(doc) << endl; // prints [1,2,3]
 *
 */
 template <class T> 
 std::string minify(T x)  {
  return to_string(x);
 }
 #if SIMDJSON_EXCEPTIONS
 template <class T> 
 std::string minify(simdjson_result<T> x) {
    if (x.error()) { throw simdjson_error(x.error()); }
    return to_string(x.value());
 }
 #endif 
 } // namespace simdjson
 #endif
--- a/include/simdjson/minify.h
+++ b/include/simdjson/minify.h
@ -27,50 +27,6 @@ namespace simdjson {
 */
 SIMDJSON_WARN_UNUSED error_code minify(const char *buf, size_t len, char *dst, size_t &dst_len) noexcept;
 /**
 * Minifies a JSON element or document, printing the smallest possible valid JSON.
 *
 *   dom::parser parser;
 *   element doc = parser.parse("   [ 1 , 2 , 3 ] "_padded);
 *   cout << minify(doc) << endl; // prints [1,2,3]
 *
 */
 template<typename T>
 class minifier {
 public:
  /**
   * Create a new minifier.
   *
   * @param _value The document or element to minify.
   */
  inline minifier(const T &_value) noexcept : value{_value} {}
  /**
   * Minify JSON to a string.
   */
  inline operator std::string() const noexcept { std::stringstream s; s << *this; return s.str(); }
  /**
   * Minify JSON to an output stream.
   */
  inline std::ostream& print(std::ostream& out);
 private:
  const T &value;
 };
 template<typename T>
 inline minifier<T> minify(const T &value) noexcept { return minifier<T>(value); }
 /**
 * Minify JSON to an output stream.
 *
 * @param out The output stream.
 * @param formatter The minifier.
 * @throw if there is an error with the underlying output stream. simdjson itself will not throw.
 */
 template<typename T>
 inline std::ostream& operator<<(std::ostream& out, minifier<T> formatter) { return formatter.print(out); }
 } // namespace simdjson
 #endif // SIMDJSON_MINIFY_H
--- a/src/simdjson.cpp
+++ b/src/simdjson.cpp
@ -2,7 +2,7 @@
 SIMDJSON_PUSH_DISABLE_WARNINGS
 SIMDJSON_DISABLE_UNDESIRED_WARNINGS
-
+#include "to_chars.cpp"
 #include "error.cpp"
 #include "implementation.cpp"
--- a/src/to_chars.cpp
+++ b/src/to_chars.cpp
@ -0,0 +1,946 @@
 #include <cmath>
 #include <cstring>
 #include <cstdint>
 #include <array>
 namespace simdjson {
 namespace internal {
 /*!
 implements the Grisu2 algorithm for binary to decimal floating-point
 conversion.
 Adapted from JSON for Modern C++
 This implementation is a slightly modified version of the reference
 implementation which may be obtained from
 http://florian.loitsch.com/publications (bench.tar.gz).
 The code is distributed under the MIT license, Copyright (c) 2009 Florian
 Loitsch. For a detailed description of the algorithm see: [1] Loitsch, "Printing
 Floating-Point Numbers Quickly and Accurately with Integers", Proceedings of the
 ACM SIGPLAN 2010 Conference on Programming Language Design and Implementation,
 PLDI 2010 [2] Burger, Dybvig, "Printing Floating-Point Numbers Quickly and
 Accurately", Proceedings of the ACM SIGPLAN 1996 Conference on Programming
 Language Design and Implementation, PLDI 1996
 */
 namespace dtoa_impl {
 template <typename Target, typename Source>
 Target reinterpret_bits(const Source source) {
  static_assert(sizeof(Target) == sizeof(Source), "size mismatch");
  Target target;
  std::memcpy(&target, &source, sizeof(Source));
  return target;
 }
 struct diyfp // f * 2^e
 {
  static constexpr int kPrecision = 64; // = q
  std::uint64_t f = 0;
  int e = 0;
  constexpr diyfp(std::uint64_t f_, int e_) noexcept : f(f_), e(e_) {}
  /*!
  @brief returns x - y
  @pre x.e == y.e and x.f >= y.f
  */
  static diyfp sub(const diyfp &x, const diyfp &y) noexcept {
    return {x.f - y.f, x.e};
  }
  /*!
  @brief returns x * y
  @note The result is rounded. (Only the upper q bits are returned.)
  */
  static diyfp mul(const diyfp &x, const diyfp &y) noexcept {
    static_assert(kPrecision == 64, "internal error");
    // Computes:
    //  f = round((x.f * y.f) / 2^q)
    //  e = x.e + y.e + q
    // Emulate the 64-bit * 64-bit multiplication:
    //
    // p = u * v
    //   = (u_lo + 2^32 u_hi) (v_lo + 2^32 v_hi)
    //   = (u_lo v_lo         ) + 2^32 ((u_lo v_hi         ) + (u_hi v_lo )) +
    //   2^64 (u_hi v_hi         ) = (p0                ) + 2^32 ((p1 ) + (p2 ))
    //   + 2^64 (p3                ) = (p0_lo + 2^32 p0_hi) + 2^32 ((p1_lo +
    //   2^32 p1_hi) + (p2_lo + 2^32 p2_hi)) + 2^64 (p3                ) =
    //   (p0_lo             ) + 2^32 (p0_hi + p1_lo + p2_lo ) + 2^64 (p1_hi +
    //   p2_hi + p3) = (p0_lo             ) + 2^32 (Q ) + 2^64 (H ) = (p0_lo ) +
    //   2^32 (Q_lo + 2^32 Q_hi                           ) + 2^64 (H )
    //
    // (Since Q might be larger than 2^32 - 1)
    //
    //   = (p0_lo + 2^32 Q_lo) + 2^64 (Q_hi + H)
    //
    // (Q_hi + H does not overflow a 64-bit int)
    //
    //   = p_lo + 2^64 p_hi
    const std::uint64_t u_lo = x.f & 0xFFFFFFFFu;
    const std::uint64_t u_hi = x.f >> 32u;
    const std::uint64_t v_lo = y.f & 0xFFFFFFFFu;
    const std::uint64_t v_hi = y.f >> 32u;
    const std::uint64_t p0 = u_lo * v_lo;
    const std::uint64_t p1 = u_lo * v_hi;
    const std::uint64_t p2 = u_hi * v_lo;
    const std::uint64_t p3 = u_hi * v_hi;
    const std::uint64_t p0_hi = p0 >> 32u;
    const std::uint64_t p1_lo = p1 & 0xFFFFFFFFu;
    const std::uint64_t p1_hi = p1 >> 32u;
    const std::uint64_t p2_lo = p2 & 0xFFFFFFFFu;
    const std::uint64_t p2_hi = p2 >> 32u;
    std::uint64_t Q = p0_hi + p1_lo + p2_lo;
    // The full product might now be computed as
    //
    // p_hi = p3 + p2_hi + p1_hi + (Q >> 32)
    // p_lo = p0_lo + (Q << 32)
    //
    // But in this particular case here, the full p_lo is not required.
    // Effectively we only need to add the highest bit in p_lo to p_hi (and
    // Q_hi + 1 does not overflow).
    Q += std::uint64_t{1} << (64u - 32u - 1u); // round, ties up
    const std::uint64_t h = p3 + p2_hi + p1_hi + (Q >> 32u);
    return {h, x.e + y.e + 64};
  }
  /*!
  @brief normalize x such that the significand is >= 2^(q-1)
  @pre x.f != 0
  */
  static diyfp normalize(diyfp x) noexcept {
    while ((x.f >> 63u) == 0) {
      x.f <<= 1u;
      x.e--;
    }
    return x;
  }
  /*!
  @brief normalize x such that the result has the exponent E
  @pre e >= x.e and the upper e - x.e bits of x.f must be zero.
  */
  static diyfp normalize_to(const diyfp &x,
                            const int target_exponent) noexcept {
    const int delta = x.e - target_exponent;
    return {x.f << delta, target_exponent};
  }
 };
 struct boundaries {
  diyfp w;
  diyfp minus;
  diyfp plus;
 };
 /*!
 Compute the (normalized) diyfp representing the input number 'value' and its
 boundaries.
@pre value must be finite and positive
 */
 template <typename FloatType> boundaries compute_boundaries(FloatType value) {
  // Convert the IEEE representation into a diyfp.
  //
  // If v is denormal:
  //      value = 0.F * 2^(1 - bias) = (          F) * 2^(1 - bias - (p-1))
  // If v is normalized:
  //      value = 1.F * 2^(E - bias) = (2^(p-1) + F) * 2^(E - bias - (p-1))
  static_assert(std::numeric_limits<FloatType>::is_iec559,
                "internal error: dtoa_short requires an IEEE-754 "
                "floating-point implementation");
  constexpr int kPrecision =
      std::numeric_limits<FloatType>::digits; // = p (includes the hidden bit)
  constexpr int kBias =
      std::numeric_limits<FloatType>::max_exponent - 1 + (kPrecision - 1);
  constexpr int kMinExp = 1 - kBias;
  constexpr std::uint64_t kHiddenBit = std::uint64_t{1}
                                       << (kPrecision - 1); // = 2^(p-1)
  using bits_type = typename std::conditional<kPrecision == 24, std::uint32_t,
                                              std::uint64_t>::type;
  const std::uint64_t bits = reinterpret_bits<bits_type>(value);
  const std::uint64_t E = bits >> (kPrecision - 1);
  const std::uint64_t F = bits & (kHiddenBit - 1);
  const bool is_denormal = E == 0;
  const diyfp v = is_denormal
                      ? diyfp(F, kMinExp)
                      : diyfp(F + kHiddenBit, static_cast<int>(E) - kBias);
  // Compute the boundaries m- and m+ of the floating-point value
  // v = f * 2^e.
  //
  // Determine v- and v+, the floating-point predecessor and successor if v,
  // respectively.
  //
  //      v- = v - 2^e        if f != 2^(p-1) or e == e_min                (A)
  //         = v - 2^(e-1)    if f == 2^(p-1) and e > e_min                (B)
  //
  //      v+ = v + 2^e
  //
  // Let m- = (v- + v) / 2 and m+ = (v + v+) / 2. All real numbers _strictly_
  // between m- and m+ round to v, regardless of how the input rounding
  // algorithm breaks ties.
  //
  //      ---+-------------+-------------+-------------+-------------+---  (A)
  //         v-            m-            v             m+            v+
  //
  //      -----------------+------+------+-------------+-------------+---  (B)
  //                       v-     m-     v             m+            v+
  const bool lower_boundary_is_closer = F == 0 && E > 1;
  const diyfp m_plus = diyfp(2 * v.f + 1, v.e - 1);
  const diyfp m_minus = lower_boundary_is_closer
                            ? diyfp(4 * v.f - 1, v.e - 2)  // (B)
                            : diyfp(2 * v.f - 1, v.e - 1); // (A)
  // Determine the normalized w+ = m+.
  const diyfp w_plus = diyfp::normalize(m_plus);
  // Determine w- = m- such that e_(w-) = e_(w+).
  const diyfp w_minus = diyfp::normalize_to(m_minus, w_plus.e);
  return {diyfp::normalize(v), w_minus, w_plus};
 }
 // Given normalized diyfp w, Grisu needs to find a (normalized) cached
 // power-of-ten c, such that the exponent of the product c * w = f * 2^e lies
 // within a certain range [alpha, gamma] (Definition 3.2 from [1])
 //
 //      alpha <= e = e_c + e_w + q <= gamma
 //
 // or
 //
 //      f_c * f_w * 2^alpha <= f_c 2^(e_c) * f_w 2^(e_w) * 2^q
 //                          <= f_c * f_w * 2^gamma
 //
 // Since c and w are normalized, i.e. 2^(q-1) <= f < 2^q, this implies
 //
 //      2^(q-1) * 2^(q-1) * 2^alpha <= c * w * 2^q < 2^q * 2^q * 2^gamma
 //
 // or
 //
 //      2^(q - 2 + alpha) <= c * w < 2^(q + gamma)
 //
 // The choice of (alpha,gamma) determines the size of the table and the form of
 // the digit generation procedure. Using (alpha,gamma)=(-60,-32) works out well
 // in practice:
 //
 // The idea is to cut the number c * w = f * 2^e into two parts, which can be
 // processed independently: An integral part p1, and a fractional part p2:
 //
 //      f * 2^e = ( (f div 2^-e) * 2^-e + (f mod 2^-e) ) * 2^e
 //              = (f div 2^-e) + (f mod 2^-e) * 2^e
 //              = p1 + p2 * 2^e
 //
 // The conversion of p1 into decimal form requires a series of divisions and
 // modulos by (a power of) 10. These operations are faster for 32-bit than for
 // 64-bit integers, so p1 should ideally fit into a 32-bit integer. This can be
 // achieved by choosing
 //
 //      -e >= 32   or   e <= -32 := gamma
 //
 // In order to convert the fractional part
 //
 //      p2 * 2^e = p2 / 2^-e = d[-1] / 10^1 + d[-2] / 10^2 + ...
 //
 // into decimal form, the fraction is repeatedly multiplied by 10 and the digits
 // d[-i] are extracted in order:
 //
 //      (10 * p2) div 2^-e = d[-1]
 //      (10 * p2) mod 2^-e = d[-2] / 10^1 + ...
 //
 // The multiplication by 10 must not overflow. It is sufficient to choose
 //
 //      10 * p2 < 16 * p2 = 2^4 * p2 <= 2^64.
 //
 // Since p2 = f mod 2^-e < 2^-e,
 //
 //      -e <= 60   or   e >= -60 := alpha
 constexpr int kAlpha = -60;
 constexpr int kGamma = -32;
 struct cached_power // c = f * 2^e ~= 10^k
 {
  std::uint64_t f;
  int e;
  int k;
 };
 /*!
 For a normalized diyfp w = f * 2^e, this function returns a (normalized) cached
 power-of-ten c = f_c * 2^e_c, such that the exponent of the product w * c
 satisfies (Definition 3.2 from [1])
     alpha <= e_c + e + q <= gamma.
 */
 inline cached_power get_cached_power_for_binary_exponent(int e) {
  // Now
  //
  //      alpha <= e_c + e + q <= gamma                                    (1)
  //      ==> f_c * 2^alpha <= c * 2^e * 2^q
  //
  // and since the c's are normalized, 2^(q-1) <= f_c,
  //
  //      ==> 2^(q - 1 + alpha) <= c * 2^(e + q)
  //      ==> 2^(alpha - e - 1) <= c
  //
  // If c were an exact power of ten, i.e. c = 10^k, one may determine k as
  //
  //      k = ceil( log_10( 2^(alpha - e - 1) ) )
  //        = ceil( (alpha - e - 1) * log_10(2) )
  //
  // From the paper:
  // "In theory the result of the procedure could be wrong since c is rounded,
  //  and the computation itself is approximated [...]. In practice, however,
  //  this simple function is sufficient."
  //
  // For IEEE double precision floating-point numbers converted into
  // normalized diyfp's w = f * 2^e, with q = 64,
  //
  //      e >= -1022      (min IEEE exponent)
  //           -52        (p - 1)
  //           -52        (p - 1, possibly normalize denormal IEEE numbers)
  //           -11        (normalize the diyfp)
  //         = -1137
  //
  // and
  //
  //      e <= +1023      (max IEEE exponent)
  //           -52        (p - 1)
  //           -11        (normalize the diyfp)
  //         = 960
  //
  // This binary exponent range [-1137,960] results in a decimal exponent
  // range [-307,324]. One does not need to store a cached power for each
  // k in this range. For each such k it suffices to find a cached power
  // such that the exponent of the product lies in [alpha,gamma].
  // This implies that the difference of the decimal exponents of adjacent
  // table entries must be less than or equal to
  //
  //      floor( (gamma - alpha) * log_10(2) ) = 8.
  //
  // (A smaller distance gamma-alpha would require a larger table.)
  // NB:
  // Actually this function returns c, such that -60 <= e_c + e + 64 <= -34.
  constexpr int kCachedPowersMinDecExp = -300;
  constexpr int kCachedPowersDecStep = 8;
  static constexpr std::array<cached_power, 79> kCachedPowers = {{
      {0xAB70FE17C79AC6CA, -1060, -300}, {0xFF77B1FCBEBCDC4F, -1034, -292},
      {0xBE5691EF416BD60C, -1007, -284}, {0x8DD01FAD907FFC3C, -980, -276},
      {0xD3515C2831559A83, -954, -268},  {0x9D71AC8FADA6C9B5, -927, -260},
      {0xEA9C227723EE8BCB, -901, -252},  {0xAECC49914078536D, -874, -244},
      {0x823C12795DB6CE57, -847, -236},  {0xC21094364DFB5637, -821, -228},
      {0x9096EA6F3848984F, -794, -220},  {0xD77485CB25823AC7, -768, -212},
      {0xA086CFCD97BF97F4, -741, -204},  {0xEF340A98172AACE5, -715, -196},
      {0xB23867FB2A35B28E, -688, -188},  {0x84C8D4DFD2C63F3B, -661, -180},
      {0xC5DD44271AD3CDBA, -635, -172},  {0x936B9FCEBB25C996, -608, -164},
      {0xDBAC6C247D62A584, -582, -156},  {0xA3AB66580D5FDAF6, -555, -148},
      {0xF3E2F893DEC3F126, -529, -140},  {0xB5B5ADA8AAFF80B8, -502, -132},
      {0x87625F056C7C4A8B, -475, -124},  {0xC9BCFF6034C13053, -449, -116},
      {0x964E858C91BA2655, -422, -108},  {0xDFF9772470297EBD, -396, -100},
      {0xA6DFBD9FB8E5B88F, -369, -92},   {0xF8A95FCF88747D94, -343, -84},
      {0xB94470938FA89BCF, -316, -76},   {0x8A08F0F8BF0F156B, -289, -68},
      {0xCDB02555653131B6, -263, -60},   {0x993FE2C6D07B7FAC, -236, -52},
      {0xE45C10C42A2B3B06, -210, -44},   {0xAA242499697392D3, -183, -36},
      {0xFD87B5F28300CA0E, -157, -28},   {0xBCE5086492111AEB, -130, -20},
      {0x8CBCCC096F5088CC, -103, -12},   {0xD1B71758E219652C, -77, -4},
      {0x9C40000000000000, -50, 4},      {0xE8D4A51000000000, -24, 12},
      {0xAD78EBC5AC620000, 3, 20},       {0x813F3978F8940984, 30, 28},
      {0xC097CE7BC90715B3, 56, 36},      {0x8F7E32CE7BEA5C70, 83, 44},
      {0xD5D238A4ABE98068, 109, 52},     {0x9F4F2726179A2245, 136, 60},
      {0xED63A231D4C4FB27, 162, 68},     {0xB0DE65388CC8ADA8, 189, 76},
      {0x83C7088E1AAB65DB, 216, 84},     {0xC45D1DF942711D9A, 242, 92},
      {0x924D692CA61BE758, 269, 100},    {0xDA01EE641A708DEA, 295, 108},
      {0xA26DA3999AEF774A, 322, 116},    {0xF209787BB47D6B85, 348, 124},
      {0xB454E4A179DD1877, 375, 132},    {0x865B86925B9BC5C2, 402, 140},
      {0xC83553C5C8965D3D, 428, 148},    {0x952AB45CFA97A0B3, 455, 156},
      {0xDE469FBD99A05FE3, 481, 164},    {0xA59BC234DB398C25, 508, 172},
      {0xF6C69A72A3989F5C, 534, 180},    {0xB7DCBF5354E9BECE, 561, 188},
      {0x88FCF317F22241E2, 588, 196},    {0xCC20CE9BD35C78A5, 614, 204},
      {0x98165AF37B2153DF, 641, 212},    {0xE2A0B5DC971F303A, 667, 220},
      {0xA8D9D1535CE3B396, 694, 228},    {0xFB9B7CD9A4A7443C, 720, 236},
      {0xBB764C4CA7A44410, 747, 244},    {0x8BAB8EEFB6409C1A, 774, 252},
      {0xD01FEF10A657842C, 800, 260},    {0x9B10A4E5E9913129, 827, 268},
      {0xE7109BFBA19C0C9D, 853, 276},    {0xAC2820D9623BF429, 880, 284},
      {0x80444B5E7AA7CF85, 907, 292},    {0xBF21E44003ACDD2D, 933, 300},
      {0x8E679C2F5E44FF8F, 960, 308},    {0xD433179D9C8CB841, 986, 316},
      {0x9E19DB92B4E31BA9, 1013, 324},
  }};
  // This computation gives exactly the same results for k as
  //      k = ceil((kAlpha - e - 1) * 0.30102999566398114)
  // for |e| <= 1500, but doesn't require floating-point operations.
  // NB: log_10(2) ~= 78913 / 2^18
  const int f = kAlpha - e - 1;
  const int k = (f * 78913) / (1 << 18) + static_cast<int>(f > 0);
  const int index = (-kCachedPowersMinDecExp + k + (kCachedPowersDecStep - 1)) /
                    kCachedPowersDecStep;
  const cached_power cached = kCachedPowers[static_cast<std::size_t>(index)];
  return cached;
 }
 /*!
 For n != 0, returns k, such that pow10 := 10^(k-1) <= n < 10^k.
 For n == 0, returns 1 and sets pow10 := 1.
 */
 inline int find_largest_pow10(const std::uint32_t n, std::uint32_t &pow10) {
  // LCOV_EXCL_START
  if (n >= 1000000000) {
    pow10 = 1000000000;
    return 10;
  }
  // LCOV_EXCL_STOP
  else if (n >= 100000000) {
    pow10 = 100000000;
    return 9;
  } else if (n >= 10000000) {
    pow10 = 10000000;
    return 8;
  } else if (n >= 1000000) {
    pow10 = 1000000;
    return 7;
  } else if (n >= 100000) {
    pow10 = 100000;
    return 6;
  } else if (n >= 10000) {
    pow10 = 10000;
    return 5;
  } else if (n >= 1000) {
    pow10 = 1000;
    return 4;
  } else if (n >= 100) {
    pow10 = 100;
    return 3;
  } else if (n >= 10) {
    pow10 = 10;
    return 2;
  } else {
    pow10 = 1;
    return 1;
  }
 }
 inline void grisu2_round(char *buf, int len, std::uint64_t dist,
                         std::uint64_t delta, std::uint64_t rest,
                         std::uint64_t ten_k) {
  //               <--------------------------- delta ---->
  //                                  <---- dist --------->
  // --------------[------------------+-------------------]--------------
  //               M-                 w                   M+
  //
  //                                  ten_k
  //                                <------>
  //                                       <---- rest ---->
  // --------------[------------------+----+--------------]--------------
  //                                  w    V
  //                                       = buf * 10^k
  //
  // ten_k represents a unit-in-the-last-place in the decimal representation
  // stored in buf.
  // Decrement buf by ten_k while this takes buf closer to w.
  // The tests are written in this order to avoid overflow in unsigned
  // integer arithmetic.
  while (rest < dist && delta - rest >= ten_k &&
         (rest + ten_k < dist || dist - rest > rest + ten_k - dist)) {
    buf[len - 1]--;
    rest += ten_k;
  }
 }
 /*!
 Generates V = buffer * 10^decimal_exponent, such that M- <= V <= M+.
 M- and M+ must be normalized and share the same exponent -60 <= e <= -32.
 */
 inline void grisu2_digit_gen(char *buffer, int &length, int &decimal_exponent,
                             diyfp M_minus, diyfp w, diyfp M_plus) {
  static_assert(kAlpha >= -60, "internal error");
  static_assert(kGamma <= -32, "internal error");
  // Generates the digits (and the exponent) of a decimal floating-point
  // number V = buffer * 10^decimal_exponent in the range [M-, M+]. The diyfp's
  // w, M- and M+ share the same exponent e, which satisfies alpha <= e <=
  // gamma.
  //
  //               <--------------------------- delta ---->
  //                                  <---- dist --------->
  // --------------[------------------+-------------------]--------------
  //               M-                 w                   M+
  //
  // Grisu2 generates the digits of M+ from left to right and stops as soon as
  // V is in [M-,M+].
  std::uint64_t delta =
      diyfp::sub(M_plus, M_minus)
          .f; // (significand of (M+ - M-), implicit exponent is e)
  std::uint64_t dist =
      diyfp::sub(M_plus, w)
          .f; // (significand of (M+ - w ), implicit exponent is e)
  // Split M+ = f * 2^e into two parts p1 and p2 (note: e < 0):
  //
  //      M+ = f * 2^e
  //         = ((f div 2^-e) * 2^-e + (f mod 2^-e)) * 2^e
  //         = ((p1        ) * 2^-e + (p2        )) * 2^e
  //         = p1 + p2 * 2^e
  const diyfp one(std::uint64_t{1} << -M_plus.e, M_plus.e);
  auto p1 = static_cast<std::uint32_t>(
      M_plus.f >>
      -one.e); // p1 = f div 2^-e (Since -e >= 32, p1 fits into a 32-bit int.)
  std::uint64_t p2 = M_plus.f & (one.f - 1); // p2 = f mod 2^-e
  // 1)
  //
  // Generate the digits of the integral part p1 = d[n-1]...d[1]d[0]
  std::uint32_t pow10;
  const int k = find_largest_pow10(p1, pow10);
  //      10^(k-1) <= p1 < 10^k, pow10 = 10^(k-1)
  //
  //      p1 = (p1 div 10^(k-1)) * 10^(k-1) + (p1 mod 10^(k-1))
  //         = (d[k-1]         ) * 10^(k-1) + (p1 mod 10^(k-1))
  //
  //      M+ = p1                                             + p2 * 2^e
  //         = d[k-1] * 10^(k-1) + (p1 mod 10^(k-1))          + p2 * 2^e
  //         = d[k-1] * 10^(k-1) + ((p1 mod 10^(k-1)) * 2^-e + p2) * 2^e
  //         = d[k-1] * 10^(k-1) + (                         rest) * 2^e
  //
  // Now generate the digits d[n] of p1 from left to right (n = k-1,...,0)
  //
  //      p1 = d[k-1]...d[n] * 10^n + d[n-1]...d[0]
  //
  // but stop as soon as
  //
  //      rest * 2^e = (d[n-1]...d[0] * 2^-e + p2) * 2^e <= delta * 2^e
  int n = k;
  while (n > 0) {
    // Invariants:
    //      M+ = buffer * 10^n + (p1 + p2 * 2^e)    (buffer = 0 for n = k)
    //      pow10 = 10^(n-1) <= p1 < 10^n
    //
    const std::uint32_t d = p1 / pow10; // d = p1 div 10^(n-1)
    const std::uint32_t r = p1 % pow10; // r = p1 mod 10^(n-1)
    //
    //      M+ = buffer * 10^n + (d * 10^(n-1) + r) + p2 * 2^e
    //         = (buffer * 10 + d) * 10^(n-1) + (r + p2 * 2^e)
    //
    buffer[length++] = static_cast<char>('0' + d); // buffer := buffer * 10 + d
    //
    //      M+ = buffer * 10^(n-1) + (r + p2 * 2^e)
    //
    p1 = r;
    n--;
    //
    //      M+ = buffer * 10^n + (p1 + p2 * 2^e)
    //      pow10 = 10^n
    //
    // Now check if enough digits have been generated.
    // Compute
    //
    //      p1 + p2 * 2^e = (p1 * 2^-e + p2) * 2^e = rest * 2^e
    //
    // Note:
    // Since rest and delta share the same exponent e, it suffices to
    // compare the significands.
    const std::uint64_t rest = (std::uint64_t{p1} << -one.e) + p2;
    if (rest <= delta) {
      // V = buffer * 10^n, with M- <= V <= M+.
      decimal_exponent += n;
      // We may now just stop. But instead look if the buffer could be
      // decremented to bring V closer to w.
      //
      // pow10 = 10^n is now 1 ulp in the decimal representation V.
      // The rounding procedure works with diyfp's with an implicit
      // exponent of e.
      //
      //      10^n = (10^n * 2^-e) * 2^e = ulp * 2^e
      //
      const std::uint64_t ten_n = std::uint64_t{pow10} << -one.e;
      grisu2_round(buffer, length, dist, delta, rest, ten_n);
      return;
    }
    pow10 /= 10;
    //
    //      pow10 = 10^(n-1) <= p1 < 10^n
    // Invariants restored.
  }
  // 2)
  //
  // The digits of the integral part have been generated:
  //
  //      M+ = d[k-1]...d[1]d[0] + p2 * 2^e
  //         = buffer            + p2 * 2^e
  //
  // Now generate the digits of the fractional part p2 * 2^e.
  //
  // Note:
  // No decimal point is generated: the exponent is adjusted instead.
  //
  // p2 actually represents the fraction
  //
  //      p2 * 2^e
  //          = p2 / 2^-e
  //          = d[-1] / 10^1 + d[-2] / 10^2 + ...
  //
  // Now generate the digits d[-m] of p1 from left to right (m = 1,2,...)
  //
  //      p2 * 2^e = d[-1]d[-2]...d[-m] * 10^-m
  //                      + 10^-m * (d[-m-1] / 10^1 + d[-m-2] / 10^2 + ...)
  //
  // using
  //
  //      10^m * p2 = ((10^m * p2) div 2^-e) * 2^-e + ((10^m * p2) mod 2^-e)
  //                = (                   d) * 2^-e + (                   r)
  //
  // or
  //      10^m * p2 * 2^e = d + r * 2^e
  //
  // i.e.
  //
  //      M+ = buffer + p2 * 2^e
  //         = buffer + 10^-m * (d + r * 2^e)
  //         = (buffer * 10^m + d) * 10^-m + 10^-m * r * 2^e
  //
  // and stop as soon as 10^-m * r * 2^e <= delta * 2^e
  int m = 0;
  for (;;) {
    // Invariant:
    //      M+ = buffer * 10^-m + 10^-m * (d[-m-1] / 10 + d[-m-2] / 10^2 + ...)
    //      * 2^e
    //         = buffer * 10^-m + 10^-m * (p2                                 )
    //         * 2^e = buffer * 10^-m + 10^-m * (1/10 * (10 * p2) ) * 2^e =
    //         buffer * 10^-m + 10^-m * (1/10 * ((10*p2 div 2^-e) * 2^-e +
    //         (10*p2 mod 2^-e)) * 2^e
    //
    p2 *= 10;
    const std::uint64_t d = p2 >> -one.e;     // d = (10 * p2) div 2^-e
    const std::uint64_t r = p2 & (one.f - 1); // r = (10 * p2) mod 2^-e
    //
    //      M+ = buffer * 10^-m + 10^-m * (1/10 * (d * 2^-e + r) * 2^e
    //         = buffer * 10^-m + 10^-m * (1/10 * (d + r * 2^e))
    //         = (buffer * 10 + d) * 10^(-m-1) + 10^(-m-1) * r * 2^e
    //
    buffer[length++] = static_cast<char>('0' + d); // buffer := buffer * 10 + d
    //
    //      M+ = buffer * 10^(-m-1) + 10^(-m-1) * r * 2^e
    //
    p2 = r;
    m++;
    //
    //      M+ = buffer * 10^-m + 10^-m * p2 * 2^e
    // Invariant restored.
    // Check if enough digits have been generated.
    //
    //      10^-m * p2 * 2^e <= delta * 2^e
    //              p2 * 2^e <= 10^m * delta * 2^e
    //                    p2 <= 10^m * delta
    delta *= 10;
    dist *= 10;
    if (p2 <= delta) {
      break;
    }
  }
  // V = buffer * 10^-m, with M- <= V <= M+.
  decimal_exponent -= m;
  // 1 ulp in the decimal representation is now 10^-m.
  // Since delta and dist are now scaled by 10^m, we need to do the
  // same with ulp in order to keep the units in sync.
  //
  //      10^m * 10^-m = 1 = 2^-e * 2^e = ten_m * 2^e
  //
  const std::uint64_t ten_m = one.f;
  grisu2_round(buffer, length, dist, delta, p2, ten_m);
  // By construction this algorithm generates the shortest possible decimal
  // number (Loitsch, Theorem 6.2) which rounds back to w.
  // For an input number of precision p, at least
  //
  //      N = 1 + ceil(p * log_10(2))
  //
  // decimal digits are sufficient to identify all binary floating-point
  // numbers (Matula, "In-and-Out conversions").
  // This implies that the algorithm does not produce more than N decimal
  // digits.
  //
  //      N = 17 for p = 53 (IEEE double precision)
  //      N = 9  for p = 24 (IEEE single precision)
 }
 /*!
 v = buf * 10^decimal_exponent
 len is the length of the buffer (number of decimal digits)
 The buffer must be large enough, i.e. >= max_digits10.
 */
 inline void grisu2(char *buf, int &len, int &decimal_exponent, diyfp m_minus,
                   diyfp v, diyfp m_plus) {
  //  --------(-----------------------+-----------------------)--------    (A)
  //          m-                      v                       m+
  //
  //  --------------------(-----------+-----------------------)--------    (B)
  //                      m-          v                       m+
  //
  // First scale v (and m- and m+) such that the exponent is in the range
  // [alpha, gamma].
  const cached_power cached = get_cached_power_for_binary_exponent(m_plus.e);
  const diyfp c_minus_k(cached.f, cached.e); // = c ~= 10^-k
  // The exponent of the products is = v.e + c_minus_k.e + q and is in the range
  // [alpha,gamma]
  const diyfp w = diyfp::mul(v, c_minus_k);
  const diyfp w_minus = diyfp::mul(m_minus, c_minus_k);
  const diyfp w_plus = diyfp::mul(m_plus, c_minus_k);
  //  ----(---+---)---------------(---+---)---------------(---+---)----
  //          w-                      w                       w+
  //          = c*m-                  = c*v                   = c*m+
  //
  // diyfp::mul rounds its result and c_minus_k is approximated too. w, w- and
  // w+ are now off by a small amount.
  // In fact:
  //
  //      w - v * 10^k < 1 ulp
  //
  // To account for this inaccuracy, add resp. subtract 1 ulp.
  //
  //  --------+---[---------------(---+---)---------------]---+--------
  //          w-  M-                  w                   M+  w+
  //
  // Now any number in [M-, M+] (bounds included) will round to w when input,
  // regardless of how the input rounding algorithm breaks ties.
  //
  // And digit_gen generates the shortest possible such number in [M-, M+].
  // Note that this does not mean that Grisu2 always generates the shortest
  // possible number in the interval (m-, m+).
  const diyfp M_minus(w_minus.f + 1, w_minus.e);
  const diyfp M_plus(w_plus.f - 1, w_plus.e);
  decimal_exponent = -cached.k; // = -(-k) = k
  grisu2_digit_gen(buf, len, decimal_exponent, M_minus, w, M_plus);
 }
 /*!
 v = buf * 10^decimal_exponent
 len is the length of the buffer (number of decimal digits)
 The buffer must be large enough, i.e. >= max_digits10.
 */
 template <typename FloatType>
 void grisu2(char *buf, int &len, int &decimal_exponent, FloatType value) {
  static_assert(diyfp::kPrecision >= std::numeric_limits<FloatType>::digits + 3,
                "internal error: not enough precision");
  // If the neighbors (and boundaries) of 'value' are always computed for
  // double-precision numbers, all float's can be recovered using strtod (and
  // strtof). However, the resulting decimal representations are not exactly
  // "short".
  //
  // The documentation for 'std::to_chars'
  // (https://en.cppreference.com/w/cpp/utility/to_chars) says "value is
  // converted to a string as if by std::sprintf in the default ("C") locale"
  // and since sprintf promotes float's to double's, I think this is exactly
  // what 'std::to_chars' does. On the other hand, the documentation for
  // 'std::to_chars' requires that "parsing the representation using the
  // corresponding std::from_chars function recovers value exactly". That
  // indicates that single precision floating-point numbers should be recovered
  // using 'std::strtof'.
  //
  // NB: If the neighbors are computed for single-precision numbers, there is a
  // single float
  //     (7.0385307e-26f) which can't be recovered using strtod. The resulting
  //     double precision value is off by 1 ulp.
 #if 0
    const boundaries w = compute_boundaries(static_cast<double>(value));
 #else
  const boundaries w = compute_boundaries(value);
 #endif
  grisu2(buf, len, decimal_exponent, w.minus, w.w, w.plus);
 }
 /*!
@brief appends a decimal representation of e to buf
@return a pointer to the element following the exponent.
@pre -1000 < e < 1000
 */
 inline char *append_exponent(char *buf, int e) {
  if (e < 0) {
    e = -e;
    *buf++ = '-';
  } else {
    *buf++ = '+';
  }
  auto k = static_cast<std::uint32_t>(e);
  if (k < 10) {
    // Always print at least two digits in the exponent.
    // This is for compatibility with printf("%g").
    *buf++ = '0';
    *buf++ = static_cast<char>('0' + k);
  } else if (k < 100) {
    *buf++ = static_cast<char>('0' + k / 10);
    k %= 10;
    *buf++ = static_cast<char>('0' + k);
  } else {
    *buf++ = static_cast<char>('0' + k / 100);
    k %= 100;
    *buf++ = static_cast<char>('0' + k / 10);
    k %= 10;
    *buf++ = static_cast<char>('0' + k);
  }
  return buf;
 }
 /*!
@brief prettify v = buf * 10^decimal_exponent
 If v is in the range [10^min_exp, 10^max_exp) it will be printed in fixed-point
 notation. Otherwise it will be printed in exponential notation.
@pre min_exp < 0
@pre max_exp > 0
 */
 inline char *format_buffer(char *buf, int len, int decimal_exponent,
                           int min_exp, int max_exp) {
  const int k = len;
  const int n = len + decimal_exponent;
  // v = buf * 10^(n-k)
  // k is the length of the buffer (number of decimal digits)
  // n is the position of the decimal point relative to the start of the buffer.
  if (k <= n && n <= max_exp) {
    // digits[000]
    // len <= max_exp + 2
    std::memset(buf + k, '0', static_cast<size_t>(n) - static_cast<size_t>(k));
    // Make it look like a floating-point number (#362, #378)
    buf[n + 0] = '.';
    buf[n + 1] = '0';
    return buf + (static_cast<size_t>(n) + 2);
  }
  if (0 < n && n <= max_exp) {
    // dig.its
    // len <= max_digits10 + 1
    std::memmove(buf + (static_cast<size_t>(n) + 1), buf + n,
                 static_cast<size_t>(k) - static_cast<size_t>(n));
    buf[n] = '.';
    return buf + (static_cast<size_t>(k) + 1U);
  }
  if (min_exp < n && n <= 0) {
    // 0.[000]digits
    // len <= 2 + (-min_exp - 1) + max_digits10
    std::memmove(buf + (2 + static_cast<size_t>(-n)), buf,
                 static_cast<size_t>(k));
    buf[0] = '0';
    buf[1] = '.';
    std::memset(buf + 2, '0', static_cast<size_t>(-n));
    return buf + (2U + static_cast<size_t>(-n) + static_cast<size_t>(k));
  }
  if (k == 1) {
    // dE+123
    // len <= 1 + 5
    buf += 1;
  } else {
    // d.igitsE+123
    // len <= max_digits10 + 1 + 5
    std::memmove(buf + 2, buf + 1, static_cast<size_t>(k) - 1);
    buf[1] = '.';
    buf += 1 + static_cast<size_t>(k);
  }
  *buf++ = 'e';
  return append_exponent(buf, n - 1);
 }
 } // namespace dtoa_impl
 /*!
 The format of the resulting decimal representation is similar to printf's %g
 format. Returns an iterator pointing past-the-end of the decimal representation.
@note The input number must be finite, i.e. NaN's and Inf's are not supported.
@note The buffer must be large enough.
@note The result is NOT null-terminated.
 */
 char *to_chars(char *first, const char *last, double value) {
  static_cast<void>(last); // maybe unused - fix warning
  // Use signbit(value) instead of (value < 0) since signbit works for -0.
  if (std::signbit(value)) {
    value = -value;
    *first++ = '-';
  }
  if (value == 0) // +-0
  {
    *first++ = '0';
    // Make it look like a floating-point number (#362, #378)
    *first++ = '.';
    *first++ = '0';
    return first;
  }
  // Compute v = buffer * 10^decimal_exponent.
  // The decimal digits are stored in the buffer, which needs to be interpreted
  // as an unsigned decimal integer.
  // len is the length of the buffer, i.e. the number of decimal digits.
  int len = 0;
  int decimal_exponent = 0;
  dtoa_impl::grisu2(first, len, decimal_exponent, value);
  // Format the buffer like printf("%.*g", prec, value)
  constexpr int kMinExp = -4;
  constexpr int kMaxExp = std::numeric_limits<double>::digits10;
  return dtoa_impl::format_buffer(first, len, decimal_exponent, kMinExp,
                                  kMaxExp);
 }
 } // namespace internal
 } // namespace simdjson
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@ -52,6 +52,7 @@ target_compile_definitions(stringparsingcheck PRIVATE NOMINMAX)
 # All remaining tests link with simdjson proper
 link_libraries(simdjson)
 add_cpp_test(basictests LABELS acceptance per_implementation)
 add_cpp_test(minify_tests LABELS acceptance per_implementation)
 add_cpp_test(document_stream_tests LABELS acceptance per_implementation)
 add_cpp_test(document_tests LABELS acceptance per_implementation)
 add_cpp_test(errortests LABELS acceptance per_implementation)
--- a/tests/basictests.cpp
+++ b/tests/basictests.cpp
@ -1365,8 +1365,8 @@ namespace minify_tests {
  bool test_minify() {
    std::cout << "Running " << __func__ << std::endl;
-    const std::string test = R"({ "foo" : 1, "bar" : [ 1, 2, 3 ], "baz": { "a": 1, "b": 2, "c": 3 } })";
+    const std::string test = R"({ "foo" : 1, "bar" : [ 1, 2, 0.11111111111111113 ], "baz": { "a": 3.1415926535897936, "b": 2, "c": 3.141592653589794 } })";
-    const std::string minified(R"({"foo":1,"bar":[1,2,3],"baz":{"a":1,"b":2,"c":3}})");
+    const std::string minified(R"({"foo":1,"bar":[1,2,0.11111111111111113],"baz":{"a":3.1415926535897936,"b":2,"c":3.141592653589794}})");
    return check_minification(test.c_str(), test.size(), minified.c_str(), minified.size());
  }
  bool test_minify_array() {
@ -1394,8 +1394,8 @@ namespace format_tests {
  using namespace simdjson;
  using namespace simdjson::dom;
  using namespace std;
-  const padded_string DOCUMENT = R"({ "foo" : 1, "bar" : [ 1, 2, 3 ], "baz": { "a": 1, "b": 2, "c": 3 } })"_padded;
+  const padded_string DOCUMENT = R"({ "foo" : 1, "bar" : [ 1, 2, 0.11111111111111113 ], "baz": { "a": 3.1415926535897936, "b": 2, "c": 3.141592653589794 } })"_padded;
-  const string MINIFIED(R"({"foo":1,"bar":[1,2,3],"baz":{"a":1,"b":2,"c":3}})");
+  const string MINIFIED(R"({"foo":1,"bar":[1,2,0.11111111111111113],"baz":{"a":3.1415926535897936,"b":2,"c":3.141592653589794}})");
  bool assert_minified(ostringstream &actual, const std::string &expected=MINIFIED) {
    if (actual.str() != expected) {
      cerr << "Failed to correctly minify " << DOCUMENT << endl;
@ -1451,7 +1451,7 @@ namespace format_tests {
    ASSERT_SUCCESS( parser.parse(DOCUMENT)["bar"].get(array) );
    ostringstream s;
    s << array;
-    return assert_minified(s, "[1,2,3]");
+    return assert_minified(s, "[1,2,0.11111111111111113]");
  }
  bool print_minify_array() {
    std::cout << "Running " << __func__ << std::endl;
@ -1460,7 +1460,7 @@ namespace format_tests {
    ASSERT_SUCCESS( parser.parse(DOCUMENT)["bar"].get(array) );
    ostringstream s;
    s << minify(array);
-    return assert_minified(s, "[1,2,3]");
+    return assert_minified(s, "[1,2,0.11111111111111113]");
  }
  bool print_object() {
@ -1470,7 +1470,7 @@ namespace format_tests {
    ASSERT_SUCCESS( parser.parse(DOCUMENT)["baz"].get(object) );
    ostringstream s;
    s << object;
-    return assert_minified(s, R"({"a":1,"b":2,"c":3})");
+    return assert_minified(s, R"({"a":3.1415926535897936,"b":2,"c":3.141592653589794})");
  }
  bool print_minify_object() {
    std::cout << "Running " << __func__ << std::endl;
@ -1479,7 +1479,7 @@ namespace format_tests {
    ASSERT_SUCCESS( parser.parse(DOCUMENT)["baz"].get(object) );
    ostringstream s;
    s << minify(object);
-    return assert_minified(s, R"({"a":1,"b":2,"c":3})");
+    return assert_minified(s, R"({"a":3.1415926535897936,"b":2,"c":3.141592653589794})");
  }
 #if SIMDJSON_EXCEPTIONS
@ -1536,14 +1536,14 @@ namespace format_tests {
    dom::parser parser;
    ostringstream s;
    s << parser.parse(DOCUMENT)["bar"].get<dom::array>();
-    return assert_minified(s, "[1,2,3]");
+    return assert_minified(s, "[1,2,0.11111111111111113]");
  }
  bool print_minify_array_result_exception() {
    std::cout << "Running " << __func__ << std::endl;
    dom::parser parser;
    ostringstream s;
    s << minify(parser.parse(DOCUMENT)["bar"].get<dom::array>());
-    return assert_minified(s, "[1,2,3]");
+    return assert_minified(s, "[1,2,0.11111111111111113]");
  }
  bool print_object_result_exception() {
@ -1551,14 +1551,14 @@ namespace format_tests {
    dom::parser parser;
    ostringstream s;
    s << parser.parse(DOCUMENT)["baz"].get<dom::object>();
-    return assert_minified(s, R"({"a":1,"b":2,"c":3})");
+    return assert_minified(s, R"({"a":3.1415926535897936,"b":2,"c":3.141592653589794})");
  }
  bool print_minify_object_result_exception() {
    std::cout << "Running " << __func__ << std::endl;
    dom::parser parser;
    ostringstream s;
    s << minify(parser.parse(DOCUMENT)["baz"].get<dom::object>());
-    return assert_minified(s, R"({"a":1,"b":2,"c":3})");
+    return assert_minified(s, R"({"a":3.1415926535897936,"b":2,"c":3.141592653589794})");
  }
  bool print_array_exception() {
@ -1567,7 +1567,7 @@ namespace format_tests {
    dom::array array = parser.parse(DOCUMENT)["bar"];
    ostringstream s;
    s << array;
-    return assert_minified(s, "[1,2,3]");
+    return assert_minified(s, "[1,2,0.11111111111111113]");
  }
  bool print_minify_array_exception() {
    std::cout << "Running " << __func__ << std::endl;
@ -1575,7 +1575,7 @@ namespace format_tests {
    dom::array array = parser.parse(DOCUMENT)["bar"];
    ostringstream s;
    s << minify(array);
-    return assert_minified(s, "[1,2,3]");
+    return assert_minified(s, "[1,2,0.11111111111111113]");
  }
  bool print_object_exception() {
@ -1584,7 +1584,7 @@ namespace format_tests {
    dom::object object = parser.parse(DOCUMENT)["baz"];
    ostringstream s;
    s << object;
-    return assert_minified(s, R"({"a":1,"b":2,"c":3})");
+    return assert_minified(s, R"({"a":3.1415926535897936,"b":2,"c":3.141592653589794})");
  }
  bool print_minify_object_exception() {
    std::cout << "Running " << __func__ << std::endl;
@ -1592,7 +1592,7 @@ namespace format_tests {
    dom::object object = parser.parse(DOCUMENT)["baz"];
    ostringstream s;
    s << minify(object);
-    return assert_minified(s, R"({"a":1,"b":2,"c":3})");
+    return assert_minified(s, R"({"a":3.1415926535897936,"b":2,"c":3.141592653589794})");
  }
 #endif // SIMDJSON_EXCEPTIONS
@ -1615,6 +1615,149 @@ namespace format_tests {
 }
 namespace to_string_tests {
  using namespace simdjson;
  using namespace simdjson::dom;
  using namespace std;
  const padded_string DOCUMENT = R"({ "foo" : 1, "bar" : [ 1, 2, 0.11111111111111113 ], "baz": { "a": 3.1415926535897936, "b": 2, "c": 3.141592653589794 } })"_padded;
  const string MINIFIED(R"({"foo":1,"bar":[1,2,0.11111111111111113],"baz":{"a":3.1415926535897936,"b":2,"c":3.141592653589794}})");
  bool assert_minified(ostringstream &actual, const std::string &expected=MINIFIED) {
    if (actual.str() != expected) {
      cerr << "Failed to correctly to_string " << DOCUMENT << endl;
      cerr << "Expected: " << expected << endl;
      cerr << "Actual:   " << actual.str() << endl;
      return false;
    }
    return true;
  }
  bool print_to_string_parser_parse() {
    std::cout << "Running " << __func__ << std::endl;
    dom::parser parser;
    dom::element doc;
    ASSERT_SUCCESS( parser.parse(DOCUMENT).get(doc) );
    ostringstream s;
    s << to_string(doc);
    return assert_minified(s);
  }
  bool print_to_string_element() {
    std::cout << "Running " << __func__ << std::endl;
    dom::parser parser;
    dom::element value;
    ASSERT_SUCCESS( parser.parse(DOCUMENT)["foo"].get(value) );
    ostringstream s;
    s << to_string(value);
    return assert_minified(s, "1");
  }
  bool print_to_string_array() {
    std::cout << "Running " << __func__ << std::endl;
    dom::parser parser;
    dom::array array;
    ASSERT_SUCCESS( parser.parse(DOCUMENT)["bar"].get(array) );
    ostringstream s;
    s << to_string(array);
    return assert_minified(s, "[1,2,0.11111111111111113]");
  }
  bool print_to_string_object() {
    std::cout << "Running " << __func__ << std::endl;
    dom::parser parser;
    dom::object object;
    ASSERT_SUCCESS( parser.parse(DOCUMENT)["baz"].get(object) );
    ostringstream s;
    s << to_string(object);
    return assert_minified(s, R"({"a":3.1415926535897936,"b":2,"c":3.141592653589794})");
  }
 #if SIMDJSON_EXCEPTIONS
  bool print_to_string_parser_parse_exception() {
    std::cout << "Running " << __func__ << std::endl;
    dom::parser parser;
    ostringstream s;
    s << to_string(parser.parse(DOCUMENT));
    return assert_minified(s);
  }
  bool print_to_string_element_result_exception() {
    std::cout << "Running " << __func__ << std::endl;
    dom::parser parser;
    ostringstream s;
    s << to_string(parser.parse(DOCUMENT)["foo"]);
    return assert_minified(s, "1");
  }
  bool print_to_string_element_exception() {
    std::cout << "Running " << __func__ << std::endl;
    dom::parser parser;
    element value = parser.parse(DOCUMENT)["foo"];
    ostringstream s;
    s << to_string(value);
    return assert_minified(s, "1");
  }
  bool print_to_string_array_result_exception() {
    std::cout << "Running " << __func__ << std::endl;
    dom::parser parser;
    ostringstream s;
    s << to_string(parser.parse(DOCUMENT)["bar"].get<dom::array>());
    return assert_minified(s, "[1,2,0.11111111111111113]");
  }
  bool print_to_string_object_result_exception() {
    std::cout << "Running " << __func__ << std::endl;
    dom::parser parser;
    ostringstream s;
    s << to_string(parser.parse(DOCUMENT)["baz"].get<dom::object>());
    return assert_minified(s, R"({"a":3.1415926535897936,"b":2,"c":3.141592653589794})");
  }
  bool print_to_string_array_exception() {
    std::cout << "Running " << __func__ << std::endl;
    dom::parser parser;
    dom::array array = parser.parse(DOCUMENT)["bar"];
    ostringstream s;
    s << to_string(array);
    return assert_minified(s, "[1,2,0.11111111111111113]");
  }
  bool print_to_string_object_exception() {
    std::cout << "Running " << __func__ << std::endl;
    dom::parser parser;
    dom::object object = parser.parse(DOCUMENT)["baz"];
    ostringstream s;
    s << to_string(object);
    return assert_minified(s, R"({"a":3.1415926535897936,"b":2,"c":3.141592653589794})");
  }
 #endif // SIMDJSON_EXCEPTIONS
  bool run() {
    return print_to_string_parser_parse() &&
           print_to_string_element() &&
           print_to_string_array() &&
          print_to_string_object() &&
 #if SIMDJSON_EXCEPTIONS
           print_to_string_parser_parse_exception() &&
           print_to_string_element_result_exception() &&
           print_to_string_array_result_exception() &&
           print_to_string_object_result_exception() &&
           print_to_string_element_exception() &&
           print_to_string_array_exception() &&
           print_to_string_object_exception() &&
 #endif
           true;
  }
 }
 int main(int argc, char *argv[]) {
  std::cout << std::unitbuf;
  int c;
@ -1646,7 +1789,8 @@ int main(int argc, char *argv[]) {
  std::cout << "------------------------------------------------------------" << std::endl;
  std::cout << "Running basic tests." << std::endl;
-  if (validate_tests::run() &&
+  if (to_string_tests::run() &&
      validate_tests::run() &&
      minify_tests::run() &&
      parse_api_tests::run() &&
      dom_api_tests::run() &&
--- a/tests/minify_tests.cpp
+++ b/tests/minify_tests.cpp
@ -0,0 +1,80 @@
 #include <cinttypes>
 #include <ciso646>
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include <iostream>
 #include <set>
 #include <sstream>
 #include <string>
 #include <unistd.h>
 #include <utility>
 #include <vector>
 #include "cast_tester.h"
 #include "simdjson.h"
 #include "test_macros.h"
 const char *test_files[] = {
    TWITTER_JSON, TWITTER_TIMELINE_JSON, REPEAT_JSON, CANADA_JSON,
    MESH_JSON,    APACHE_JSON,           GSOC_JSON};
 /**
 * The general idea of these tests if that if you take a JSON file,
 * load it, then convert it into a string, then parse that, and
 * convert it again into a second string, then the two strings should
 * be  identifical. If not, then something was lost or added in the
 * process.
 */
 bool load_to_string(const char *filename) {
  std::cout << "Loading " << filename << std::endl;
  simdjson::dom::parser parser;
  simdjson::dom::element doc;
  auto error = parser.load(filename).get(doc);
  if (error) { std::cerr << error << std::endl; return false; }
  auto serial1 = simdjson::to_string(doc);
  error = parser.parse(serial1).get(doc);
  if (error) { std::cerr << error << std::endl; return false; }
  auto serial2 = simdjson::to_string(doc);
  bool match = (serial1 == serial2);
  if (match) {
    std::cout << "Parsing to_string and calling to_string again results in the "
                 "same content."
              << std::endl;
  }
  return match;
 }
 bool load_minify(const char *filename) {
  std::cout << "Loading " << filename << std::endl;
  simdjson::dom::parser parser;
  simdjson::dom::element doc;
  auto error = parser.load(filename).get(doc);
  if (error) { std::cerr << error << std::endl; return false; }
  auto serial1 = simdjson::minify(doc);
  error = parser.parse(serial1).get(doc);
  if (error) { std::cerr << error << std::endl; return false; }
  auto serial2 = simdjson::minify(doc);
  bool match = (serial1 == serial2);
  if (match) {
    std::cout << "Parsing minify and calling minify again results in the same "
                 "content."
              << std::endl;
  }
  return match;
 }
 bool minify_test() {
  std::cout << "Running " << __func__ << std::endl;
  for (size_t i = 0; i < sizeof(test_files) / sizeof(test_files[0]); i++) {
    bool ok = load_to_string(test_files[i]) && load_minify(test_files[i]);
    if (!ok) {
      return false;
    }
  }
  return true;
 }
 int main() { return minify_test() ? EXIT_SUCCESS : EXIT_FAILURE; }
--- a/tests/test_macros.h
+++ b/tests/test_macros.h
@ -7,6 +7,11 @@
 const char *TWITTER_JSON = SIMDJSON_BENCHMARK_DATA_DIR "twitter.json";
 const char *TWITTER_TIMELINE_JSON = SIMDJSON_BENCHMARK_DATA_DIR "twitter_timeline.json";
 const char *REPEAT_JSON = SIMDJSON_BENCHMARK_DATA_DIR "repeat.json";
 const char *CANADA_JSON = SIMDJSON_BENCHMARK_DATA_DIR "canada.json";
 const char *MESH_JSON = SIMDJSON_BENCHMARK_DATA_DIR "mesh.json";
 const char *APACHE_JSON = SIMDJSON_BENCHMARK_DATA_DIR "apache_builds.json";
 const char *GSOC_JSON = SIMDJSON_BENCHMARK_DATA_DIR "gsoc-2018.json";
 const char *AMAZON_CELLPHONES_NDJSON = SIMDJSON_BENCHMARK_DATA_DIR "amazon_cellphones.ndjson";
 #define SIMDJSON_BENCHMARK_SMALLDATA_DIR SIMDJSON_BENCHMARK_DATA_DIR "small/"